From cdb56a63f7eef34e89b045fc8bcae8d326bbdb19 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 12:46:49 +0300 Subject: [PATCH 01/16] drm/xe/pciids: separate ARL and MTL PCI IDs Avoid including PCI IDs for one platform to the PCI IDs of another. It's more clear to deal with them completely separately at the PCI ID macro level. Reviewed-by: Shekhar Chauhan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/a30cb0da7694a8eccceba66d676ac59aa0e96176.1725443121.git.jani.nikula@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + include/drm/intel/xe_pciids.h | 13 ++++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 14fe08e40c98..942817e5a79e 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -388,6 +388,7 @@ static const struct pci_device_id pciidlist[] = { XE_RPLS_IDS(INTEL_VGA_DEVICE, &adl_s_desc), XE_DG1_IDS(INTEL_VGA_DEVICE, &dg1_desc), XE_ATS_M_IDS(INTEL_VGA_DEVICE, &ats_m_desc), + XE_ARL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), diff --git a/include/drm/intel/xe_pciids.h b/include/drm/intel/xe_pciids.h index 7a9a7d0a89ca..79001afa7d27 100644 --- a/include/drm/intel/xe_pciids.h +++ b/include/drm/intel/xe_pciids.h @@ -176,16 +176,19 @@ XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) -/* MTL / ARL */ +/* ARL */ +#define XE_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + +/* MTL */ #define XE_MTL_IDS(MACRO__, ...) \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) #define XE_LNL_IDS(MACRO__, ...) \ -- 2.50.1 From 5ea28f921a1cd8e722ddfd9cc0cb92b8e37b5adb Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 4 Sep 2024 17:52:31 +0300 Subject: [PATCH 02/16] drm/xe: use IS_ENABLED() instead of defined() on config options Prefer IS_ENABLED() instead of defined() for checking whether a kconfig option is enabled. Reviewed-by: Badal Nilawar Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240904145231.3902289-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0c64d3b3155e..3a9653e33481 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -468,7 +468,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += vram->io_start; mem->bus.is_iomem = true; -#if !defined(CONFIG_X86) +#if !IS_ENABLED(CONFIG_X86) mem->bus.caching = ttm_write_combined; #endif return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c index f3ddcbefc6bc..2ed5b6780d30 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_debugfs.c @@ -33,7 +33,7 @@ static const struct drm_info_list vf_info[] = { .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_vf_print_version, }, -#if defined(CONFIG_DRM_XE_DEBUG) || defined(CONFIG_DRM_XE_DEBUG_SRIOV) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) || IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) { "runtime_regs", .show = xe_gt_debugfs_simple_show, diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index c67d4807f37d..5599464013bd 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -583,7 +583,7 @@ static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc) * extreme thermal throttling. And a system that is that hot during boot is probably * dead anyway! */ -#if defined(CONFIG_DRM_XE_DEBUG) +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) #define GUC_LOAD_RETRY_LIMIT 20 #else #define GUC_LOAD_RETRY_LIMIT 3 -- 2.50.1 From c2bf07dd0bbce1f318b73b525e21fbc6d67a3a94 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Fri, 6 Sep 2024 12:41:25 +0530 Subject: [PATCH 03/16] drm/xe/xe_gt_idle: modify powergate enable condition Modify powergate enable condition based on the type of GT or presence of media engines. Also have a copy of the value written to powergate enable register. v2: add condition to enable render or media powergating (Badal) v3: fix commit message (Shekhar) fix kernel-doc Signed-off-by: Riana Tauro Reviewed-by: Shekhar Chauhan Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20240906071126.28078-2-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_idle.c | 26 ++++++++++++++++++-------- drivers/gpu/drm/xe/xe_gt_idle_types.h | 2 ++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 67aba4140510..3924f9f3d0a5 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -98,7 +98,8 @@ static u64 get_residency_ms(struct xe_gt_idle *gtidle, u64 cur_residency) void xe_gt_idle_enable_pg(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - u32 pg_enable; + struct xe_gt_idle *gtidle = >->gtidle; + u32 vcs_mask, vecs_mask; int i, j; if (IS_SRIOV_VF(xe)) @@ -110,12 +111,19 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) xe_device_assert_mem_access(gt_to_xe(gt)); - pg_enable = RENDER_POWERGATE_ENABLE | MEDIA_POWERGATE_ENABLE; + vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + + if (vcs_mask || vecs_mask) + gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; + + if (!xe_gt_is_media_type(gt)) + gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { if ((gt->info.engine_mask & BIT(i))) - pg_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | - VDN_MFXVDENC_POWERGATE_ENABLE(j)); + gtidle->powergate_enable |= (VDN_HCP_POWERGATE_ENABLE(j) | + VDN_MFXVDENC_POWERGATE_ENABLE(j)); } XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); @@ -128,20 +136,22 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) xe_mmio_write32(gt, RENDER_POWERGATE_IDLE_HYSTERESIS, 25); } - xe_mmio_write32(gt, POWERGATE_ENABLE, pg_enable); + xe_mmio_write32(gt, POWERGATE_ENABLE, gtidle->powergate_enable); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); } void xe_gt_idle_disable_pg(struct xe_gt *gt) { + struct xe_gt_idle *gtidle = >->gtidle; + if (IS_SRIOV_VF(gt_to_xe(gt))) return; xe_device_assert_mem_access(gt_to_xe(gt)); - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); - - xe_mmio_write32(gt, POWERGATE_ENABLE, 0); + gtidle->powergate_enable = 0; + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + xe_mmio_write32(gt, POWERGATE_ENABLE, gtidle->powergate_enable); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); } diff --git a/drivers/gpu/drm/xe/xe_gt_idle_types.h b/drivers/gpu/drm/xe/xe_gt_idle_types.h index f99b447534f3..b8b297a3f884 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle_types.h +++ b/drivers/gpu/drm/xe/xe_gt_idle_types.h @@ -23,6 +23,8 @@ enum xe_gt_idle_state { struct xe_gt_idle { /** @name: name */ char name[16]; + /** @powergate_enable: copy of powergate enable bits */ + u32 powergate_enable; /** @residency_multiplier: residency multiplier in ns */ u32 residency_multiplier; /** @cur_residency: raw driver copy of idle residency */ -- 2.50.1 From 0914c1e45d3a1a747faeebae27ba197d7ba41f94 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Fri, 6 Sep 2024 12:41:26 +0530 Subject: [PATCH 04/16] drm/xe/xe_gt_idle: add debugfs entry for powergating info Coarse Powergating is a power saving technique where Render and Media can be power-gated independently irrespective of the rest of the GT. For debug purposes, it is useful to expose the powergating information. v2: move to debugfs add details to commit message add per-slice status for media define reg bits in descending order (Matt Roper) v3: fix return statement fix kernel-doc use loop for media slices use helper function for status (Michal) v4: add pg prefix do not wake GT if in C6 (Badal) Signed-off-by: Riana Tauro Reviewed-by: Badal Nilawar Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240906071126.28078-3-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 8 +++ drivers/gpu/drm/xe/xe_gt_debugfs.c | 13 ++++ drivers/gpu/drm/xe/xe_gt_idle.c | 91 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_idle.h | 2 + 4 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0d1a4a9f4e11..cbead3f75fad 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -341,6 +341,14 @@ #define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) #define FORCEWAKE_RENDER XE_REG(0xa278) + +#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0) +#define MEDIA_SLICE3_AWAKE_STATUS REG_BIT(4) +#define MEDIA_SLICE2_AWAKE_STATUS REG_BIT(3) +#define MEDIA_SLICE1_AWAKE_STATUS REG_BIT(2) +#define RENDER_AWAKE_STATUS REG_BIT(1) +#define MEDIA_SLICE0_AWAKE_STATUS REG_BIT(0) + #define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) #define FORCEWAKE_GSC XE_REG(0xa618) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 8f95d3a5949b..cbc43973ff7e 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -15,6 +15,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_gt_idle.h" #include "xe_gt_sriov_pf_debugfs.h" #include "xe_gt_sriov_vf_debugfs.h" #include "xe_gt_stats.h" @@ -109,6 +110,17 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int powergate_info(struct xe_gt *gt, struct drm_printer *p) +{ + int ret; + + xe_pm_runtime_get(gt_to_xe(gt)); + ret = xe_gt_idle_pg_print(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return ret; +} + static int force_reset(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -288,6 +300,7 @@ static const struct drm_info_list debugfs_list[] = { {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, + {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 3924f9f3d0a5..85a35ed153a3 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -53,6 +53,11 @@ pc_to_xe(struct xe_guc_pc *pc) return gt_to_xe(gt); } +static inline const char *str_up_down(bool v) +{ + return v ? "up" : "down"; +} + static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) { switch (state) { @@ -155,6 +160,92 @@ void xe_gt_idle_disable_pg(struct xe_gt *gt) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); } +/** + * xe_gt_idle_pg_print - Xe powergating info + * @gt: GT object + * @p: drm_printer. + * + * This function prints the powergating information + * + * Return: 0 on success, negative error code otherwise + */ +int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_gt_idle *gtidle = >->gtidle; + struct xe_device *xe = gt_to_xe(gt); + enum xe_gt_idle_state state; + u32 pg_enabled, pg_status = 0; + u32 vcs_mask, vecs_mask; + int err, n; + /* + * Media Slices + * + * Slice 0: VCS0, VCS1, VECS0 + * Slice 1: VCS2, VCS3, VECS1 + * Slice 2: VCS4, VCS5, VECS2 + * Slice 3: VCS6, VCS7, VECS3 + */ + static const struct { + u64 engines; + u32 status_bit; + } media_slices[] = { + {(BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS1) | + BIT(XE_HW_ENGINE_VECS0)), MEDIA_SLICE0_AWAKE_STATUS}, + + {(BIT(XE_HW_ENGINE_VCS2) | BIT(XE_HW_ENGINE_VCS3) | + BIT(XE_HW_ENGINE_VECS1)), MEDIA_SLICE1_AWAKE_STATUS}, + + {(BIT(XE_HW_ENGINE_VCS4) | BIT(XE_HW_ENGINE_VCS5) | + BIT(XE_HW_ENGINE_VECS2)), MEDIA_SLICE2_AWAKE_STATUS}, + + {(BIT(XE_HW_ENGINE_VCS6) | BIT(XE_HW_ENGINE_VCS7) | + BIT(XE_HW_ENGINE_VECS3)), MEDIA_SLICE3_AWAKE_STATUS}, + }; + + if (xe->info.platform == XE_PVC) { + drm_printf(p, "Power Gating not supported\n"); + return 0; + } + + state = gtidle->idle_status(gtidle_to_pc(gtidle)); + pg_enabled = gtidle->powergate_enable; + + /* Do not wake the GT to read powergating status */ + if (state != GT_IDLE_C6) { + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return err; + + pg_enabled = xe_mmio_read32(gt, POWERGATE_ENABLE); + pg_status = xe_mmio_read32(gt, POWERGATE_DOMAIN_STATUS); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); + } + + if (gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK) { + drm_printf(p, "Render Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & RENDER_POWERGATE_ENABLE)); + + drm_printf(p, "Render Power Gate Status: %s\n", + str_up_down(pg_status & RENDER_AWAKE_STATUS)); + } + + vcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE); + vecs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + + /* Print media CPG status only if media is present */ + if (vcs_mask || vecs_mask) { + drm_printf(p, "Media Power Gating Enabled: %s\n", + str_yes_no(pg_enabled & MEDIA_POWERGATE_ENABLE)); + + for (n = 0; n < ARRAY_SIZE(media_slices); n++) + if (gt->info.engine_mask & media_slices[n].engines) + drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n, + str_up_down(pg_status & media_slices[n].status_bit)); + } + return 0; +} + static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buff) { diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 554447b5d46d..4455a6501cb0 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -8,6 +8,7 @@ #include "xe_gt_idle_types.h" +struct drm_printer; struct xe_gt; int xe_gt_idle_init(struct xe_gt_idle *gtidle); @@ -15,5 +16,6 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt); void xe_gt_idle_disable_c6(struct xe_gt *gt); void xe_gt_idle_enable_pg(struct xe_gt *gt); void xe_gt_idle_disable_pg(struct xe_gt *gt); +int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p); #endif /* _XE_GT_IDLE_H_ */ -- 2.50.1 From cb85e39dc5d1717fab82810984cce0e54712a3c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 5 Sep 2024 10:02:15 -0400 Subject: [PATCH 05/16] drm/xe: Suppress missing outer rpm protection warning Do not raise a WARN if we are likely within suspending or resuming path. This is likely this false positive: rpm_status: 0000:03:00.0 status=RPM_SUSPENDING console: xe_bo_evict_all (called from suspend) xe_sched_job_create: dev=0000:03:00.0, ... xe_sched_job_exec: dev=0000:03:00.0, ... xe_pm_runtime_put: dev=0000:03:00.0, ... xe_sched_job_run: dev=0000:03:00.0, ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... rpm_usage: 0000:03:00.0 flags-0 cnt-2 ... console: xe 0000:03:00.0: [drm] Missing outer runtime PM protection console: xe_guc_ct_send+0x15/0x50 [xe] console: guc_exec_queue_run_job+0x1509/0x3950 [xe] [snip] console: drm_sched_run_job_work+0x649/0xc20 At this point, BOs are getting evicted from VRAM with rpm usage-counter = 2, but rpm status = SUSPENDING. The xe->pm_callback_task won't be equal 'current' because this call is coming from a work queue. So, pm_runtime_get_if_active() will be called and return 0 because rpm status != ACTIVE (but equal SUSPENDING or RESUMING). v2: Still get the reference even on non suspending/resuming path (Jonathan, Brost). Cc: Matthew Brost Cc: Matthew Auld Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240905140215.56404-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index e518557e0eec..9c59a30d7646 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -595,6 +595,18 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) return pm_runtime_get_if_in_use(xe->drm.dev) > 0; } +/* + * Very unreliable! Should only be used to suppress the false positive case + * in the missing outer rpm protection warning. + */ +static bool xe_pm_suspending_or_resuming(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + + return dev->power.runtime_status == RPM_SUSPENDING || + dev->power.runtime_status == RPM_RESUMING; +} + /** * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming * @xe: xe device instance @@ -611,8 +623,11 @@ void xe_pm_runtime_get_noresume(struct xe_device *xe) ref = xe_pm_runtime_get_if_in_use(xe); - if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + if (!ref) { pm_runtime_get_noresume(xe->drm.dev); + drm_WARN(&xe->drm, !xe_pm_suspending_or_resuming(xe), + "Missing outer runtime PM protection\n"); + } } /** -- 2.50.1 From 249df8cbecf0ab4877eab66cae857748631831a9 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 1 Sep 2024 07:42:27 +0300 Subject: [PATCH 06/16] drm/xe: fix missing 'xe_vm_put' Fix memleak caused by missing xe_vm_put Fixes: 852856e3b6f6 ("drm/xe: Use reserved copy engine for user binds on faulting devices") Signed-off-by: Dafna Hirschfeld Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240901044227.1177211-1-dhirschfeld@habana.ai Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_exec_queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5a9cbc97f0be..7f28b7fc68d5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -223,8 +223,10 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, gt->usm.reserved_bcs_instance, false); - if (!hwe) + if (!hwe) { + xe_vm_put(migrate_vm); return ERR_PTR(-EINVAL); + } q = xe_exec_queue_create(xe, migrate_vm, BIT(hwe->logical_instance), 1, hwe, -- 2.50.1 From b43723f864618be715646d6164469f5e4d77aa7e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2024 09:22:37 -0700 Subject: [PATCH 07/16] drm/xe: Cleanup has_flat_ccs handling The flag is set in XE_HP_FEATURES, but then overridden in all but one xe_graphics_desc. Make it set only where needed. Reviewed-by: Jonathan Cavitt Reviewed-by: Matt Roper Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240904162238.2831202-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 942817e5a79e..add735369f8f 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpp = { #define XE_HP_FEATURES \ .has_range_tlb_invalidation = true, \ - .has_flat_ccs = true, \ .dma_mask_size = 46, \ .va_bits = 48, \ .vm_max_level = 3 @@ -120,6 +119,8 @@ static const struct xe_graphics_desc graphics_xehpg = { XE_HP_FEATURES, .vram_flags = XE_VRAM_FLAGS_NEED64K, + + .has_flat_ccs = 1, }; static const struct xe_graphics_desc graphics_xehpc = { @@ -145,7 +146,6 @@ static const struct xe_graphics_desc graphics_xehpc = { .has_asid = 1, .has_atomic_enable_pte_bit = 1, - .has_flat_ccs = 0, .has_usm = 1, }; @@ -156,7 +156,6 @@ static const struct xe_graphics_desc graphics_xelpg = { BIT(XE_HW_ENGINE_CCS0), XE_HP_FEATURES, - .has_flat_ccs = 0, }; #define XE2_GFX_FEATURES \ -- 2.50.1 From 0c841e47d8d94bd0f5444a25052d86c0b1f2c2e8 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Sep 2024 09:22:38 -0700 Subject: [PATCH 08/16] drm/xe: Update runtime detection of has_flat_ccs It's confusing to have a *set* function that actually probes the hardware rather than receiving a parameter. Rename it to *probe* along with prefix removal and comment in the relevant places that the has_flat_ccs flag may be overridden in runtime. While at it, fix the mixed declaration of struct xe_gt. Reviewed-by: Matt Roper Reviewed-by: Himal Prasad Ghimiray Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240904162238.2831202-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 8 +++++--- drivers/gpu/drm/xe/xe_pci.c | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1a0d7fdd094b..449b85035d3a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -588,15 +588,17 @@ int xe_device_probe_early(struct xe_device *xe) return 0; } -static int xe_device_set_has_flat_ccs(struct xe_device *xe) +static int probe_has_flat_ccs(struct xe_device *xe) { + struct xe_gt *gt; u32 reg; int err; + /* Always enabled/disabled, no runtime check to do */ if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs) return 0; - struct xe_gt *gt = xe_root_mmio_gt(xe); + gt = xe_root_mmio_gt(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) @@ -688,7 +690,7 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err; - err = xe_device_set_has_flat_ccs(xe); + err = probe_has_flat_ccs(xe); if (err) goto err; diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index add735369f8f..8f8e28894546 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -679,7 +679,10 @@ static int xe_info_init(struct xe_device *xe, xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit; if (xe->info.platform != XE_PVC) xe->info.has_device_atomics_on_smem = 1; + + /* Runtime detection may change this later */ xe->info.has_flat_ccs = graphics_desc->has_flat_ccs; + xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation; xe->info.has_usm = graphics_desc->has_usm; -- 2.50.1 From ceb29504dd80ebdfc09adb942c9ef8d12d4612ca Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 Sep 2024 20:25:07 -0700 Subject: [PATCH 09/16] drm/xe: Fix arg to pci_iomap() Commit 2d8865b27724 ("drm/xe: Move BAR definitions to dedicated file") moved the BAR definition to the header, but replaced the wrong arg in the pci_iomap() function - the last arg is actuall the length, not the BAR. Luckily GTTMMADR_BAR == 0, so it still works. Fix the argument to avoid confusion. Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Reviewed-by: Alan Previn Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20240906032507.2952859-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_mmio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 3fd462fda625..4aae30880bc6 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -157,15 +157,14 @@ int xe_mmio_init(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - const int mmio_bar = 0; /* * Map the entire BAR. * The first 16MB of the BAR, belong to the root tile, and include: * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). */ - xe->mmio.size = pci_resource_len(pdev, mmio_bar); - xe->mmio.regs = pci_iomap(pdev, mmio_bar, GTTMMADR_BAR); + xe->mmio.size = pci_resource_len(pdev, GTTMMADR_BAR); + xe->mmio.regs = pci_iomap(pdev, GTTMMADR_BAR, 0); if (xe->mmio.regs == NULL) { drm_err(&xe->drm, "failed to map registers\n"); return -EIO; -- 2.50.1 From 3fe62f7bfd1b1bf829d53c39c76fa2efc8a5c108 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 6 Sep 2024 13:56:09 -0700 Subject: [PATCH 10/16] drm/xe: Mark reserved engines in snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When printing /gt*/hw_engines, it's useful to mark what engines are reserved so it doesn't mislead developers while debugging. Cc: José Roberto de Souza Reviewed-by: Matthew Brost Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240906205609.3131330-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_hw_engine.c | 3 +++ drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index c9c3beb3ce8d..ce180faf2592 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -903,6 +903,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt), hwe->domain); snapshot->mmio_base = hwe->mmio_base; + snapshot->kernel_reserved = xe_hw_engine_is_reserved(hwe); /* no more VF accessible data below this point */ if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) @@ -1025,6 +1026,8 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->logical_instance); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", snapshot->forcewake.domain, snapshot->forcewake.ref); + drm_printf(p, "\tReserved: %s\n", + str_yes_no(snapshot->kernel_reserved)); drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 8be6d420ece4..be60edb3e673 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -173,6 +173,8 @@ struct xe_hw_engine_snapshot { } forcewake; /** @mmio_base: MMIO base address of this hw engine*/ u32 mmio_base; + /** @kernel_reserved: Engine reserved, can't be used by userspace */ + bool kernel_reserved; /** @reg: Useful MMIO register snapshot */ struct { /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */ -- 2.50.1 From f2710d95724ebbfa35d6d4b82017eeab70994509 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sat, 7 Sep 2024 00:03:48 +0200 Subject: [PATCH 11/16] drm/xe: Don't keep stale pointer to bo->ggtt_node When we fail to map a BO in the GGTT, we release our GGTT node placeholder, but leave stale bo->ggtt_node pointer to it, which triggers an assert immediately followed by a crash, due to UAF: [ ] xe 0000:00:02.0: [drm] Assertion `bo->ggtt_node->base.size == bo->size` failed! [ ] WARNING: CPU: 4 PID: 126 at drivers/gpu/drm/xe/xe_ggtt.c:689 xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] RIP: 0010:xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] Call Trace: [ ] [ ] ? __warn+0x88/0x190 [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] ? report_bug+0x1c3/0x1d0 [ ] ? handle_bug+0x42/0x70 [ ] ? exc_invalid_op+0x14/0x70 [ ] ? asm_exc_invalid_op+0x16/0x20 [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] ? xe_ggtt_remove_bo+0x1d9/0x250 [xe] [ ] xe_ttm_bo_destroy+0x11f/0x260 [xe] [ ] ? ttm_bo_release+0x31c/0x350 [ttm] [ ] ? __mutex_unlock_slowpath+0x35/0x270 [ ] __xe_bo_create_locked+0x4a0/0x550 [xe] [ ] ? mark_held_locks+0x49/0x80 [ ] xe_bo_create_pin_map_at+0x37/0x200 [xe] [ ] xe_bo_create_pin_map+0x11/0x20 [xe] While around, for similar reason, also don't keep an error pointer if we fail to allocate ggtt_node placeholder. Fixes: 34e804220f69 ("drm/xe: Make xe_ggtt_node struct independent") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240906220348.1836-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index f3fca5565d32..2895f154654c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -619,16 +619,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, bo->ggtt_node = xe_ggtt_node_init(ggtt); if (IS_ERR(bo->ggtt_node)) { err = PTR_ERR(bo->ggtt_node); + bo->ggtt_node = NULL; goto out; } mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, alignment, 0, start, end, 0); - if (err) + if (err) { xe_ggtt_node_fini(bo->ggtt_node); - else + bo->ggtt_node = NULL; + } else { xe_ggtt_map_bo(ggtt, bo); + } mutex_unlock(&ggtt->lock); if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) -- 2.50.1 From 1c129ed07de47684ff2471e32b52fa823533aa06 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Sep 2024 20:25:08 +0000 Subject: [PATCH 12/16] drm/xe: fix build warning with CONFIG_PM=n The 'runtime_status' field is an implementation detail of the power management code, so a device driver should not normally touch this: drivers/gpu/drm/xe/xe_pm.c: In function 'xe_pm_suspending_or_resuming': drivers/gpu/drm/xe/xe_pm.c:606:26: error: 'struct dev_pm_info' has no member named 'runtime_status' 606 | return dev->power.runtime_status == RPM_SUSPENDING || | ^ drivers/gpu/drm/xe/xe_pm.c:607:27: error: 'struct dev_pm_info' has no member named 'runtime_status' 607 | dev->power.runtime_status == RPM_RESUMING; | ^ drivers/gpu/drm/xe/xe_pm.c:608:1: error: control reaches end of non-void function [-Werror=return-type] Add an #ifdef check to avoid the build regression. Fixes: cb85e39dc5d1 ("drm/xe: Suppress missing outer rpm protection warning") Reviewed-by: Rodrigo Vivi Signed-off-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/20240909202521.1018439-1-arnd@kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 9c59a30d7646..a3d1509066f7 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -601,10 +601,14 @@ bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) */ static bool xe_pm_suspending_or_resuming(struct xe_device *xe) { +#ifdef CONFIG_PM struct device *dev = xe->drm.dev; return dev->power.runtime_status == RPM_SUSPENDING || dev->power.runtime_status == RPM_RESUMING; +#else + return false; +#endif } /** -- 2.50.1 From 5a90b60db5e6765367d9bb2c03f66b14d72946d2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 9 Sep 2024 10:56:54 +0200 Subject: [PATCH 13/16] drm/xe: Add a xe_bo subtest for shrinking / swapping MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add a subtest that tries to allocate twice the amount of buffer object memory available, write data to it and then read all the data back verifying data integrity. In order to be able to do this on systems that have no or not enough swap-space available, allocate some memory as purgeable, and introduce a function to purge such memory from the TTM swap_notify path. this test is intended to add test coverage to the current bo swap path and upcoming shrinking path. The test has previously been part of the xe bo shrinker series. v2: - Skip test if the execution time is expected to be too long. - Minor code cleanups. v3: - Print random seed. (Matthew Auld) Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Matthew Auld Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20240909085654.5064-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/tests/xe_bo.c | 239 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_bo.c | 32 ++++- 2 files changed, 270 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 8dac069483e8..7d3fd720478b 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -6,6 +6,12 @@ #include #include +#include +#include +#include + +#include + #include "tests/xe_kunit_helpers.h" #include "tests/xe_pci_test.h" #include "tests/xe_test.h" @@ -358,9 +364,242 @@ static void xe_bo_evict_kunit(struct kunit *test) evict_test_run_device(xe); } +struct xe_bo_link { + struct list_head link; + struct xe_bo *bo; + u32 val; +}; + +#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M) + +static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state, + struct xe_bo_link *link) +{ + struct iosys_map map; + int ret = ttm_bo_vmap(&bo->ttm, &map); + size_t __maybe_unused i; + + if (ret) + return ret; + + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) { + u32 val = prandom_u32_state(state); + + iosys_map_wr(&map, i, u32, val); + if (i == 0) + link->val = val; + } + + ttm_bo_vunmap(&bo->ttm, &map); + return 0; +} + +static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo, + unsigned int bo_nr, struct rnd_state *state, + struct xe_bo_link *link) +{ + struct iosys_map map; + int ret = ttm_bo_vmap(&bo->ttm, &map); + size_t i; + bool failed = false; + + if (ret) { + KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr); + return true; + } + + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) { + u32 val = prandom_u32_state(state); + + if (iosys_map_rd(&map, i, u32) != val) { + KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx", + bo_nr, (unsigned long long)i); + kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n", + (unsigned int)iosys_map_rd(&map, i, u32), val); + if (i == 0 && val != link->val) + kunit_info(test, "Looks like PRNG is out of sync.\n"); + failed = true; + break; + } + } + + ttm_bo_vunmap(&bo->ttm, &map); + + return failed; +} + +/* + * Try to create system bos corresponding to twice the amount + * of available system memory to test shrinker functionality. + * If no swap space is available to accommodate the + * memory overcommit, mark bos purgeable. + */ +static int shrink_test_run_device(struct xe_device *xe) +{ + struct kunit *test = kunit_get_current_test(); + LIST_HEAD(bos); + struct xe_bo_link *link, *next; + struct sysinfo si; + size_t ram, ram_and_swap, purgeable, alloced, to_alloc, limit; + unsigned int interrupted = 0, successful = 0, count = 0; + struct rnd_state prng; + u64 rand_seed; + bool failed = false; + + rand_seed = get_random_u64(); + prandom_seed_state(&prng, rand_seed); + kunit_info(test, "Random seed is 0x%016llx.\n", + (unsigned long long)rand_seed); + + /* Skip if execution time is expected to be too long. */ + + limit = SZ_32G; + /* IGFX with flat CCS needs to copy when swapping / shrinking */ + if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe)) + limit = SZ_16G; + + si_meminfo(&si); + ram = (size_t)si.freeram * si.mem_unit; + if (ram > limit) { + kunit_skip(test, "Too long expected execution time.\n"); + return 0; + } + to_alloc = ram * 2; + + ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE; + if (to_alloc > ram_and_swap) + purgeable = to_alloc - ram_and_swap; + purgeable += purgeable / 5; + + kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n", + (unsigned long)ram); + for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) { + struct xe_bo *bo; + unsigned int mem_type; + struct xe_ttm_tt *xe_tt; + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + KUNIT_FAIL(test, "Unexpected link allocation failure\n"); + failed = true; + break; + } + + INIT_LIST_HEAD(&link->link); + + /* We can create bos using WC caching here. But it is slower. */ + bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE, + DRM_XE_GEM_CPU_CACHING_WB, + XE_BO_FLAG_SYSTEM); + if (IS_ERR(bo)) { + if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) && + bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS)) + KUNIT_FAIL(test, "Error creating bo: %pe\n", bo); + kfree(link); + failed = true; + break; + } + xe_bo_lock(bo, false); + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm); + + /* + * Allocate purgeable bos first, because if we do it the + * other way around, they may not be subject to swapping... + */ + if (alloced < purgeable) { + xe_tt->purgeable = true; + bo->ttm.priority = 0; + } else { + int ret = shrink_test_fill_random(bo, &prng, link); + + if (ret) { + xe_bo_unlock(bo); + xe_bo_put(bo); + KUNIT_FAIL(test, "Error filling bo with random data: %pe\n", + ERR_PTR(ret)); + kfree(link); + failed = true; + break; + } + } + + mem_type = bo->ttm.resource->mem_type; + xe_bo_unlock(bo); + link->bo = bo; + list_add_tail(&link->link, &bos); + + if (mem_type != XE_PL_TT) { + KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n", + bo->ttm.resource->mem_type); + failed = true; + } + cond_resched(); + if (signal_pending(current)) + break; + } + + /* + * Read back and destroy bos. Reset the pseudo-random seed to get an + * identical pseudo-random number sequence for readback. + */ + prandom_seed_state(&prng, rand_seed); + list_for_each_entry_safe(link, next, &bos, link) { + static struct ttm_operation_ctx ctx = {.interruptible = true}; + struct xe_bo *bo = link->bo; + struct xe_ttm_tt *xe_tt; + int ret; + + count++; + if (!signal_pending(current) && !failed) { + bool purgeable, intr = false; + + xe_bo_lock(bo, NULL); + + /* xe_tt->purgeable is cleared on validate. */ + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm); + purgeable = xe_tt->purgeable; + do { + ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx); + if (ret == -EINTR) + intr = true; + } while (ret == -EINTR && !signal_pending(current)); + + if (!ret && !purgeable) + failed = shrink_test_verify(test, bo, count, &prng, link); + + xe_bo_unlock(bo); + if (ret) { + KUNIT_FAIL(test, "Validation failed: %pe\n", + ERR_PTR(ret)); + failed = true; + } else if (intr) { + interrupted++; + } else { + successful++; + } + } + xe_bo_put(link->bo); + list_del(&link->link); + kfree(link); + } + kunit_info(test, "Readbacks interrupted: %u successful: %u\n", + interrupted, successful); + + return 0; +} + +static void xe_bo_shrink_kunit(struct kunit *test) +{ + struct xe_device *xe = test->priv; + + shrink_test_run_device(xe); +} + static struct kunit_case xe_bo_tests[] = { KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param), KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param), + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param, + {.speed = KUNIT_SPEED_SLOW}), {} }; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3a9653e33481..0dc8ebcf8389 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -283,6 +283,8 @@ struct xe_ttm_tt { struct device *dev; struct sg_table sgt; struct sg_table *sg; + /** @purgeable: Whether the content of the pages of @ttm is purgeable. */ + bool purgeable; }; static int xe_tt_map_sg(struct ttm_tt *tt) @@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (xe_rpm_reclaim_safe(xe)) { /* * We might be called through swapout in the validation path of - * another TTM device, so unconditionally acquire rpm here. + * another TTM device, so acquire rpm here. */ xe_pm_runtime_get(xe); } else { @@ -1082,6 +1084,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) } } +static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx) +{ + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + + if (ttm_bo->ttm) { + struct ttm_placement place = {}; + int ret = ttm_bo_validate(ttm_bo, &place, ctx); + + drm_WARN_ON(&xe->drm, ret); + } +} + +static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false + }; + + if (ttm_bo->ttm) { + struct xe_ttm_tt *xe_tt = + container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm); + + if (xe_tt->purgeable) + xe_ttm_bo_purge(ttm_bo, &ctx); + } +} + const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, @@ -1094,6 +1123,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .release_notify = xe_ttm_bo_release_notify, .eviction_valuable = ttm_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, + .swap_notify = xe_ttm_bo_swap_notify, }; static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) -- 2.50.1 From 9db969b36b2fbca13ad4088aff725ebd5e8142f5 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Wed, 4 Sep 2024 15:43:33 +0530 Subject: [PATCH 14/16] drm/xe/xe2hpg: Add Wa_15016589081 Wa_15016589081 applies to xe2_hpg renderCS V2(Gustavo) - rename bit macro Signed-off-by: Tejas Upadhyay Reviewed-by: Gustavo Sousa Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240904101333.2049655-1-tejas.upadhyay@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index cbead3f75fad..cf21de3adca6 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -105,6 +105,7 @@ #define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define DIS_CLIP_NEGATIVE_BOUNDING_BOX REG_BIT(6) #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 28b7f95b6c2f..d424992514a4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -733,6 +733,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, {} }; -- 2.50.1 From 474f64cb988a410db8a0b779d6afdaa2a7fc5759 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Sep 2024 17:00:49 +0200 Subject: [PATCH 15/16] drm/xe: Fix missing conversion to xe_display_pm_runtime_resume This error path was missed when converting away from xe_display_pm_resume with second argument. Fixes: 66a0f6b9f5fc ("drm/xe/display: handle HPD polling in display runtime suspend/resume") Cc: Arun R Murthy Cc: Vinod Govindapillai Signed-off-by: Maarten Lankhorst Reviewed-by: Lucas De Marchi Reviewed-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240905150052.174895-2-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/xe_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index a3d1509066f7..7cf2160fe040 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -416,7 +416,7 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); out: if (err) - xe_display_pm_resume(xe, true); + xe_display_pm_runtime_resume(xe); xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return err; -- 2.50.1 From f90491d4b64e302e940133103d3d9908e70e454f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 5 Sep 2024 17:00:50 +0200 Subject: [PATCH 16/16] drm/xe: Remove runtime argument from display s/r functions The previous change ensures that pm_suspend is only called when suspending or resuming. This ensures no further bugs like those in the previous commit. Signed-off-by: Maarten Lankhorst Reviewed-by: Lucas De Marchi Reviewed-by: Vinod Govindapillai Link: https://patchwork.freedesktop.org/patch/msgid/20240905150052.174895-3-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/xe/display/xe_display.c | 53 +++++++++++++++---------- drivers/gpu/drm/xe/display/xe_display.h | 8 ++-- drivers/gpu/drm/xe/xe_pm.c | 6 +-- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 75736faf2a80..1c25c4f6a53b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe) } /* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ -void xe_display_pm_runtime_suspend(struct xe_device *xe) -{ - if (!xe->info.probe_display) - return; - - if (xe->d3cold.allowed) - xe_display_pm_suspend(xe, true); - - intel_hpd_poll_enable(xe); -} - -void xe_display_pm_suspend(struct xe_device *xe, bool runtime) +static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; bool s2idle = suspend_to_idle(); @@ -355,26 +344,31 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) intel_dmc_suspend(xe); } -void xe_display_pm_suspend_late(struct xe_device *xe) +void xe_display_pm_suspend(struct xe_device *xe) +{ + __xe_display_pm_suspend(xe, false); +} + +void xe_display_pm_runtime_suspend(struct xe_device *xe) { - bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) return; - intel_power_domains_suspend(xe, s2idle); + if (xe->d3cold.allowed) + __xe_display_pm_suspend(xe, true); - intel_display_power_suspend_late(xe); + intel_hpd_poll_enable(xe); } -void xe_display_pm_runtime_resume(struct xe_device *xe) +void xe_display_pm_suspend_late(struct xe_device *xe) { + bool s2idle = suspend_to_idle(); if (!xe->info.probe_display) return; - intel_hpd_poll_disable(xe); + intel_power_domains_suspend(xe, s2idle); - if (xe->d3cold.allowed) - xe_display_pm_resume(xe, true); + intel_display_power_suspend_late(xe); } void xe_display_pm_resume_early(struct xe_device *xe) @@ -387,7 +381,7 @@ void xe_display_pm_resume_early(struct xe_device *xe) intel_power_domains_resume(xe); } -void xe_display_pm_resume(struct xe_device *xe, bool runtime) +static void __xe_display_pm_resume(struct xe_device *xe, bool runtime) { struct intel_display *display = &xe->display; @@ -421,6 +415,23 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime) intel_power_domains_enable(xe); } +void xe_display_pm_resume(struct xe_device *xe) +{ + __xe_display_pm_resume(xe, false); +} + +void xe_display_pm_runtime_resume(struct xe_device *xe) +{ + if (!xe->info.probe_display) + return; + + intel_hpd_poll_disable(xe); + + if (xe->d3cold.allowed) + __xe_display_pm_resume(xe, true); +} + + static void display_device_remove(struct drm_device *dev, void *arg) { struct xe_device *xe = arg; diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 53d727fd792b..bed55fd26f30 100644 --- a/drivers/gpu/drm/xe/display/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h @@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir); void xe_display_irq_reset(struct xe_device *xe); void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); -void xe_display_pm_suspend(struct xe_device *xe, bool runtime); +void xe_display_pm_suspend(struct xe_device *xe); void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe); -void xe_display_pm_resume(struct xe_device *xe, bool runtime); +void xe_display_pm_resume(struct xe_device *xe); void xe_display_pm_runtime_suspend(struct xe_device *xe); void xe_display_pm_runtime_resume(struct xe_device *xe); @@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) static inline void xe_display_irq_reset(struct xe_device *xe) {} static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} -static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_suspend(struct xe_device *xe) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {} -static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} +static inline void xe_display_pm_resume(struct xe_device *xe) {} static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 7cf2160fe040..33eb039053e4 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); - xe_display_pm_suspend(xe, false); + xe_display_pm_suspend(xe); /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); @@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { - xe_display_pm_resume(xe, false); + xe_display_pm_resume(xe); goto err; } } @@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_resume(gt); - xe_display_pm_resume(xe, false); + xe_display_pm_resume(xe); err = xe_bo_restore_user(xe); if (err) -- 2.50.1