From 8c0aff7d92e2be25717669eb65a81a89740a24f2 Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Mon, 24 Feb 2025 15:58:06 +0530 Subject: [PATCH 01/16] drm/xe/pf: Create a link between PF and VF devices MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When both PF and VF devices are enabled on the host, they resume simultaneously during system resume. However, the PF must finish provisioning the VF before any VFs can successfully resume. Establish a parent-child device link between the PF and VF devices to ensure the correct order of resumption. V4 -> V5: - Added missing break in the error condition. V3 -> V4: - Made xe_pci_pf_get_vf_dev() as a static function and updated input parameter types. - Updated xe_sriov_warn() to xe_sriov_abort() when VF device cannot be found. V2 -> V3: - Added function documentation for xe_pci_pf_get_vf_dev(). - Added assertion if not called from PF. V1 -> V2: - Added a helper function to get VF pci_dev. - Updated xe_sriov_notice() to xe_sriov_warn() if vf pci_dev is not found. Signed-off-by: Satyanarayana K V P Cc: Michał Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Reviewed-by: Piotr Piorkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20250224102807.11065-2-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/xe/xe_pci_sriov.c | 51 +++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c index aaceee748287..09ee8a06fe2e 100644 --- a/drivers/gpu/drm/xe/xe_pci_sriov.c +++ b/drivers/gpu/drm/xe/xe_pci_sriov.c @@ -62,6 +62,55 @@ static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) xe_gt_sriov_pf_control_trigger_flr(gt, n); } +static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* caller must use pci_dev_put() */ + return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus), + pdev->bus->number, + pci_iov_virtfn_devfn(pdev, vf_id)); +} + +static void pf_link_vfs(struct xe_device *xe, int num_vfs) +{ + struct pci_dev *pdev_pf = to_pci_dev(xe->drm.dev); + struct device_link *link; + struct pci_dev *pdev_vf; + unsigned int n; + + /* + * When both PF and VF devices are enabled on the host, during system + * resume they are resuming in parallel. + * + * But PF has to complete the provision of VF first to allow any VFs to + * successfully resume. + * + * Create a parent-child device link between PF and VF devices that will + * enforce correct resume order. + */ + for (n = 1; n <= num_vfs; n++) { + pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1); + + /* unlikely, something weird is happening, abort */ + if (!pdev_vf) { + xe_sriov_err(xe, "Cannot find VF%u device, aborting link%s creation!\n", + n, str_plural(num_vfs)); + break; + } + + link = device_link_add(&pdev_vf->dev, &pdev_pf->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + /* unlikely and harmless, continue with other VFs */ + if (!link) + xe_sriov_notice(xe, "Failed linking VF%u\n", n); + + pci_dev_put(pdev_vf); + } +} + static int pf_enable_vfs(struct xe_device *xe, int num_vfs) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -92,6 +141,8 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs) if (err < 0) goto failed; + pf_link_vfs(xe, num_vfs); + xe_sriov_info(xe, "Enabled %u of %u VF%s\n", num_vfs, total_vfs, str_plural(total_vfs)); return num_vfs; -- 2.50.1 From ba757a65d2a28d46a8ccf50538f4f05036983f1b Mon Sep 17 00:00:00 2001 From: Satyanarayana K V P Date: Mon, 24 Feb 2025 15:58:07 +0530 Subject: [PATCH 02/16] drm/xe/vf: Retry sending MMIO request to GUC on timeout error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add support to allow retrying the sending of MMIO requests from the VF to the GUC in the event of an error. During the suspend/resume process, VFs begin resuming only after the PF has resumed. Although the PF resumes, the GUC reset and provisioning occur later in a separate worker process. When there are a large number of VFs, some may attempt to resume before the PF has completed its provisioning. Therefore, if a MMIO request from a VF fails during this period, we will retry sending the request up to GUC_RESET_VF_STATE_RETRY_MAX times, which is set to a maximum of 10 attempts. Signed-off-by: Satyanarayana K V P Cc: Michał Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Reviewed-by: Piotr Piorkowski Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20250224102807.11065-3-satyanarayana.k.v.p@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 4831549da319..a439261bf4d7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -47,12 +47,19 @@ static int guc_action_vf_reset(struct xe_guc *guc) return ret > 0 ? -EPROTO : ret; } +#define GUC_RESET_VF_STATE_RETRY_MAX 10 static int vf_reset_guc_state(struct xe_gt *gt) { + unsigned int retry = GUC_RESET_VF_STATE_RETRY_MAX; struct xe_guc *guc = >->uc.guc; int err; - err = guc_action_vf_reset(guc); + do { + err = guc_action_vf_reset(guc); + if (!err || err != -ETIMEDOUT) + break; + } while (--retry); + if (unlikely(err)) xe_gt_sriov_err(gt, "Failed to reset GuC state (%pe)\n", ERR_PTR(err)); return err; -- 2.50.1 From 25d434cef791e03cf40680f5441b576c639bfa84 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:00 +0000 Subject: [PATCH 03/16] drm/xe: Fix GT "for each engine" workarounds Any rules using engine matching are currently broken due RTP processing happening too in early init, before the list of hardware engines has been initialised. Fix this by moving workaround processing to later in the driver probe sequence, to just before the processed list is used for the first time. Looking at the debugfs gt0/workarounds on ADL-P we notice 14011060649 should be present while we see, before: GT Workarounds 14011059788 14015795083 And with the patch: GT Workarounds 14011060649 14011059788 14015795083 Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Matt Roper Cc: stable@vger.kernel.org # v6.11+ Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-2-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 5bd8dfdce300..90ff51ad76a6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -362,9 +362,7 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_wa_process_gt(gt); xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); spin_lock_init(>->global_invl_lock); @@ -451,6 +449,8 @@ static int all_fw_domain_init(struct xe_gt *gt) } xe_gt_mcr_set_implicit_defaults(gt); + xe_wa_process_gt(gt); + xe_tuning_process_gt(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); err = xe_gt_clock_init(gt); -- 2.50.1 From d9b5d83c5a4d720af6ddbefe2825c78f0325a3fd Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:01 +0000 Subject: [PATCH 04/16] drm/xe/xelp: Move Wa_16011163337 from tunings to workarounds Workaround database specifies 16011163337 as a workaround so lets move it there. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Matt Roper Cc: Gustavo Sousa Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-3-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_tuning.c | 8 -------- drivers/gpu/drm/xe/xe_wa.c | 7 +++++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index d449de0fb6ec..3c78f3d71559 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -97,14 +97,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { }; static const struct xe_rtp_entry_sr lrc_tunings[] = { - { XE_RTP_NAME("Tuning: ganged timer, also known as 16011163337"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), - /* read verification is ignored due to 1608008084. */ - XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, - FF_MODE2_GS_TIMER_MASK, - FF_MODE2_GS_TIMER_224)) - }, - /* DG2 */ { XE_RTP_NAME("Tuning: L3 cache"), diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index b797c863bbe5..3e32fd4353a0 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -629,6 +629,13 @@ static const struct xe_rtp_entry_sr engine_was[] = { }; static const struct xe_rtp_entry_sr lrc_was[] = { + { XE_RTP_NAME("16011163337"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, + FF_MODE2_GS_TIMER_MASK, + FF_MODE2_GS_TIMER_224)) + }, { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, -- 2.50.1 From 96f18263140266d737e931530cb759d14858b0df Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:02 +0000 Subject: [PATCH 05/16] drm/xe/xelp: Add Wa_1604555607 According to the i915 code base and as confirmed in the workaround database, apart from setting the GS timer, all XeLP platforms should also set the TDS timer. Signed-off-by: Tvrtko Ursulin References: 2b5298b0aa09 ("drm/i915/gen12: Add recommended hardware tuning value") Cc: Lucas De Marchi Cc: Matt Roper Cc: Gustavo Sousa Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-4-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 3e32fd4353a0..55eb453f4b1f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -636,6 +636,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = { FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224)) }, + { XE_RTP_NAME("1604555607"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), ENGINE_CLASS(RENDER)), + /* read verification is ignored due to 1608008084. */ + XE_RTP_ACTIONS(FIELD_SET_NO_READ_MASK(FF_MODE2, + FF_MODE2_TDS_TIMER_MASK, + FF_MODE2_TDS_TIMER_128)) + }, { XE_RTP_NAME("1409342910, 14010698770, 14010443199, 1408979724, 1409178076, 1409207793, 1409217633, 1409252684, 1409347922, 1409142259"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN3, -- 2.50.1 From 4f122372579d28e5ac74f3c222c173466ae5951d Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:03 +0000 Subject: [PATCH 06/16] drm/xe/xelp: L3 recommended hashing mask According to the i915 codebase xe missed to set the recommended performance tuning for L3 hashing which is applicable to all legacy XeLP platforms. Lets add it. v2: * Rename prefixes to XELP_. * Tweak version end point. v3: * Add bspec tag. * Tweak version range. v4: * Move from LRC to engine tunings list. v5: * Drop L3 Cache Control comment. Bspec: 31870 Signed-off-by: Tvrtko Ursulin References: c46c5fb725be ("drm/i915/gen12: Apply recommended L3 hashing mask") Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-5-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 4 +++- drivers/gpu/drm/xe/xe_tuning.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d08dd437172f..da1f198ac107 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -364,10 +364,12 @@ #define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) #define FORCEWAKE_GSC XE_REG(0xa618) +#define XELP_GARBCNTL XE_REG(0xb004) +#define XELP_BUS_HASH_CTL_BIT_EXC REG_BIT(7) + #define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) #define XEHPC_OVRLSCCC REG_BIT(0) -/* L3 Cache Control */ #define LNCFCMOCS_REG_COUNT 32 #define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) #define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 3c78f3d71559..551c2f308e1c 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -88,6 +88,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { }; static const struct xe_rtp_entry_sr engine_tunings[] = { + { XE_RTP_NAME("Tuning: L3 Hashing Mask"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1210), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(CLR(XELP_GARBCNTL, XELP_BUS_HASH_CTL_BIT_EXC)) + }, { XE_RTP_NAME("Tuning: Set Indirect State Override"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), ENGINE_CLASS(RENDER)), -- 2.50.1 From 067a974fd8a9ea43f97ca184e2768b583f2f8c44 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Feb 2025 10:13:04 +0000 Subject: [PATCH 07/16] drm/xe: Add performance tunings to debugfs Add a list of active tunings to debugfs, analogous to the existing list of workarounds. Rationale being that it seems to make sense to either have both or none. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250227101304.46660-6-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 4 ++ drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 ++++++ drivers/gpu/drm/xe/xe_gt_types.h | 10 +++++ drivers/gpu/drm/xe/xe_tuning.c | 59 ++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_tuning.h | 3 ++ 5 files changed, 87 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 90ff51ad76a6..10a9e3c72b36 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -362,6 +362,10 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; + err = xe_tuning_init(gt); + if (err) + return err; + xe_wa_process_oob(gt); xe_force_wake_init_gt(gt, gt_to_fw(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index e7792858b1e4..2d63a69cbfa3 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -30,6 +30,7 @@ #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_sriov.h" +#include "xe_tuning.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" @@ -217,6 +218,15 @@ static int workarounds(struct xe_gt *gt, struct drm_printer *p) return 0; } +static int tunings(struct xe_gt *gt, struct drm_printer *p) +{ + xe_pm_runtime_get(gt_to_xe(gt)); + xe_tuning_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); + + return 0; +} + static int pat(struct xe_gt *gt, struct drm_printer *p) { xe_pm_runtime_get(gt_to_xe(gt)); @@ -300,6 +310,7 @@ static const struct drm_info_list debugfs_list[] = { {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info}, {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, + {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings}, {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs}, {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index f67474e06fb3..e3cfb026ac88 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -413,6 +413,16 @@ struct xe_gt { bool oob_initialized; } wa_active; + /** @tuning_active: keep track of active tunings */ + struct { + /** @tuning_active.gt: bitmap with active GT tunings */ + unsigned long *gt; + /** @tuning_active.engine: bitmap with active engine tunings */ + unsigned long *engine; + /** @tuning_active.lrc: bitmap with active LRC tunings */ + unsigned long *lrc; + } tuning_active; + /** @user_engines: engines present in GT and available to userspace */ struct { /** diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 551c2f308e1c..77bc958f5a42 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -7,6 +7,8 @@ #include +#include + #include "regs/xe_gt_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" @@ -140,10 +142,44 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { {} }; +/** + * xe_tuning_init - initialize gt with tunings bookkeeping + * @gt: GT instance to initialize + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_tuning_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + size_t n_lrc, n_engine, n_gt, total; + unsigned long *p; + + n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_tunings)); + n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_tunings)); + n_lrc = BITS_TO_LONGS(ARRAY_SIZE(lrc_tunings)); + total = n_gt + n_engine + n_lrc; + + p = drmm_kzalloc(&xe->drm, sizeof(*p) * total, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gt->tuning_active.gt = p; + p += n_gt; + gt->tuning_active.engine = p; + p += n_engine; + gt->tuning_active.lrc = p; + + return 0; +} +ALLOW_ERROR_INJECTION(xe_tuning_init, ERRNO); /* See xe_pci_probe() */ + void xe_tuning_process_gt(struct xe_gt *gt) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + gt->tuning_active.gt, + ARRAY_SIZE(gt_tunings)); xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -152,6 +188,9 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + hwe->gt->tuning_active.engine, + ARRAY_SIZE(engine_tunings)); xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -168,5 +207,25 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); + xe_rtp_process_ctx_enable_active_tracking(&ctx, + hwe->gt->tuning_active.lrc, + ARRAY_SIZE(lrc_tunings)); xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); } + +void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "GT Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)) + drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name); + + drm_printf(p, "\nEngine Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)) + drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name); + + drm_printf(p, "\nLRC Tunings\n"); + for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)) + drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name); +} diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h index 4f9c3ac3b516..dd0d3ccc9c65 100644 --- a/drivers/gpu/drm/xe/xe_tuning.h +++ b/drivers/gpu/drm/xe/xe_tuning.h @@ -6,11 +6,14 @@ #ifndef _XE_TUNING_ #define _XE_TUNING_ +struct drm_printer; struct xe_gt; struct xe_hw_engine; +int xe_tuning_init(struct xe_gt *gt); void xe_tuning_process_gt(struct xe_gt *gt); void xe_tuning_process_engine(struct xe_hw_engine *hwe); void xe_tuning_process_lrc(struct xe_hw_engine *hwe); +void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p); #endif -- 2.50.1 From 5488bec96bccbd87335921338f8dc38b87db7d2c Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Fri, 28 Feb 2025 12:32:24 +0530 Subject: [PATCH 08/16] drm/xe/uapi: Use hint for guc to set GT frequency Allow user to provide a low latency hint. When set, KMD sends a hint to GuC which results in special handling for that process. SLPC will ramp the GT frequency aggressively every time it switches to this process. We need to enable the use of SLPC Compute strategy during init, but it will apply only to processes that set this bit during process creation. Improvement with this approach as below: Before, :~$ NEOReadDebugKeys=1 EnableDirectSubmission=0 clpeak --kernel-latency Platform: Intel(R) OpenCL Graphics Device: Intel(R) Graphics [0xe20b] Driver version : 24.52.0 (Linux x64) Compute units : 160 Clock frequency : 2850 MHz Kernel launch latency : 283.16 us After, :~$ NEOReadDebugKeys=1 EnableDirectSubmission=0 clpeak --kernel-latency Platform: Intel(R) OpenCL Graphics Device: Intel(R) Graphics [0xe20b] Driver version : 24.52.0 (Linux x64) Compute units : 160 Clock frequency : 2850 MHz Kernel launch latency : 63.38 us Compute PR: https://github.com/intel/compute-runtime/pull/794 Mesa PR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33214 IGT PR: https://patchwork.freedesktop.org/patch/639989/ V10(Lucas): - Remove doc from drm-uapi.rst v9(Vinay): - remove extra line, align commit message v8(Vinay): - Add separate example for using low latency hint v7(Jose): - Update UMD PR - applicable to all gpus V6: - init flags, remove redundant flags check (MAuld) V5: - Move uapi doc to documentation and GuC ABI specific change (Rodrigo) - Modify logic to restrict exec queue flags (MAuld) V4: - To make it clear, dont use exec queue word (Vinay) - Correct typo in description of flag (Jose/Vinay) - rename set_strategy api and replace ctx with exec queue(Vinay) - Start with 0th bit to indentify user flags (Jose) V3: - Conver user flag to kernel internal flag and use (Oak) - Support query config for use to check kernel support (Jose) - Dont need to take runtime pm (Vinay) V2: - DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT 1 planned for other hint(Szymon) - Add motivation to description (Lucas) Acked-by: Lucas De Marchi Reviewed-by: Vinay Belgaumkar Link: https://patchwork.freedesktop.org/patch/msgid/20250228070224.739295-2-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h | 3 +++ drivers/gpu/drm/xe/xe_exec_queue.c | 10 ++++++--- drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 ++ drivers/gpu/drm/xe/xe_guc_pc.c | 16 ++++++++++++++ drivers/gpu/drm/xe/xe_guc_submit.c | 8 +++++++ drivers/gpu/drm/xe/xe_query.c | 2 ++ include/uapi/drm/xe_drm.h | 21 ++++++++++++++++++- 7 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h index 85abe4f09ae2..b28c8fa061f7 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h @@ -174,6 +174,9 @@ struct slpc_task_state_data { }; } __packed; +#define SLPC_CTX_FREQ_REQ_IS_COMPUTE REG_BIT(28) +#define SLPC_OPTIMIZED_STRATEGY_COMPUTE REG_BIT(0) + struct slpc_shared_data_header { /* Total size in bytes of this shared buffer. */ u32 size; diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 23a9f519ce1c..7c5c003d3c40 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -604,11 +604,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; + u32 flags = 0; u32 id; u32 len; int err; - if (XE_IOCTL_DBG(xe, args->flags) || + if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; @@ -625,6 +626,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) return -EINVAL; + if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) + flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { if (XE_IOCTL_DBG(xe, args->width != 1) || XE_IOCTL_DBG(xe, args->num_placements != 1) || @@ -633,8 +637,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, for_each_tile(tile, xe, id) { struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; + flags |= EXEC_QUEUE_FLAG_VM; if (id) flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; @@ -680,7 +684,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, + args->width, hwe, flags, args->extensions); up_read(&vm->lock); xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 6eb7ff091534..cc1cffb5c87f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -85,6 +85,8 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(3) /* kernel exec_queue only, set priority to highest level */ #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) +/* flag to indicate low latency hint to guc */ +#define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 02409eedb914..25040efa043f 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -995,6 +995,17 @@ out: return ret; } +static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val) +{ + int ret = 0; + + ret = pc_action_set_param(pc, + SLPC_PARAM_STRATEGIES, + val); + + return ret; +} + /** * xe_guc_pc_start - Start GuC's Power Conservation component * @pc: Xe_GuC_PC instance @@ -1054,6 +1065,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) } ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); + if (ret) + goto out; + + /* Enable SLPC Optimized Strategy for compute */ + ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE); out: xe_force_wake_put(gt_to_fw(gt), fw_ref); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index b6a2dd742ebd..b95934055f72 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -15,6 +15,7 @@ #include #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_slpc_abi.h" #include "abi/guc_klvs_abi.h" #include "regs/xe_lrc_layout.h" #include "xe_assert.h" @@ -400,6 +401,7 @@ static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) +MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY) #undef MAKE_EXEC_QUEUE_POLICY_ADD static const int xe_exec_queue_prio_to_guc[] = { @@ -414,14 +416,20 @@ static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) struct exec_queue_policy policy; enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; + u32 slpc_exec_queue_freq_req = 0; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); + if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY) + slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE; + __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); + __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy, + slpc_exec_queue_freq_req); xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, __guc_exec_queue_policy_action_size(&policy), 0, 0); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 781dd21682e5..ce2a2767de1a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -340,6 +340,8 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) if (xe_device_get_root_tile(xe)->mem.vram.usable_size) config->info[DRM_XE_QUERY_CONFIG_FLAGS] = DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; + config->info[DRM_XE_QUERY_CONFIG_FLAGS] |= + DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY; config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 76a462fae05f..d1f0018342b6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -393,6 +393,8 @@ struct drm_xe_query_mem_regions { * * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device * has usable VRAM + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - Flag is set if the device + * has low latency hint support * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment * required by this device, typically SZ_4K or SZ_64K * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address @@ -409,6 +411,7 @@ struct drm_xe_query_config { #define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 #define DRM_XE_QUERY_CONFIG_FLAGS 1 #define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) + #define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) #define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 #define DRM_XE_QUERY_CONFIG_VA_BITS 3 #define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 @@ -1205,6 +1208,21 @@ struct drm_xe_vm_bind { * }; * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); * + * Allow users to provide a hint to kernel for cases demanding low latency + * profile. Please note it will have impact on power consumption. User can + * indicate low latency hint with flag while creating exec queue as + * mentioned below, + * + * struct drm_xe_exec_queue_create exec_queue_create = { + * .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT, + * .extensions = 0, + * .vm_id = vm, + * .num_bb_per_exec = 1, + * .num_eng_per_bb = 1, + * .instances = to_user_pointer(&instance), + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); + * */ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 @@ -1223,7 +1241,8 @@ struct drm_xe_exec_queue_create { /** @vm_id: VM to use for this exec queue */ __u32 vm_id; - /** @flags: MBZ */ +#define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (1 << 0) + /** @flags: flags to use for this exec queue */ __u32 flags; /** @exec_queue_id: Returned exec queue ID */ -- 2.50.1 From 03c346d4d0d85d210d549d43c8cfb3dfb7f20e0a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:55 +0100 Subject: [PATCH 09/16] drm/xe/vm: Validate userptr during gpu vma prefetching MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If a userptr vma subject to prefetching was already invalidated or invalidated during the prefetch operation, the operation would repeatedly return -EAGAIN which would typically cause an infinite loop. Validate the userptr to ensure this doesn't happen. v2: - Don't fallthrough from UNMAP to PREFETCH (Matthew Brost) Fixes: 5bd24e78829a ("drm/xe/vm: Subclass userptr vmas") Fixes: 617eebb9c480 ("drm/xe: Fix array of binds") Cc: Matthew Brost Cc: # v6.9+ Suggested-by: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 996000f2424e..6fdc17be619e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2306,8 +2306,17 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, break; } case DRM_GPUVA_OP_UNMAP: + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); + break; case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ + vma = gpuva_to_vma(op->base.prefetch.va); + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (err) + return err; + } + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); break; default: -- 2.50.1 From fcc20a4c752214b3e25632021c57d7d1d71ee1dd Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:56 +0100 Subject: [PATCH 10/16] drm/xe/vm: Fix a misplaced #endif MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix a (harmless) misplaced #endif leading to declarations appearing multiple times. Fixes: 0eb2a18a8fad ("drm/xe: Implement VM snapshot support for BO's and userptr") Cc: Maarten Lankhorst Cc: José Roberto de Souza Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_vm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index f66075f8a6fe..7c8e39049223 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -282,9 +282,9 @@ static inline void vm_dbg(const struct drm_device *dev, const char *format, ...) { /* noop */ } #endif -#endif struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); +#endif -- 2.50.1 From 100a5b8dadfca50d91d9a4c9fc01431b42a25cab Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 28 Feb 2025 08:30:57 +0100 Subject: [PATCH 11/16] drm/xe: Fix fault mode invalidation with unbind MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix fault mode invalidation racing with unbind leading to the PTE zapping potentially traversing an invalid page-table tree. Do this by holding the notifier lock across PTE zapping. This might transfer any contention waiting on the notifier seqlock read side to the notifier lock read side, but that shouldn't be a major problem. At the same time get rid of the open-coded invalidation in the bind code by relying on the notifier even when the vma bind is not yet committed. Finally let userptr invalidation call a dedicated xe_vm function performing a full invalidation. Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Cc: Thomas Hellström Cc: Matthew Brost Cc: Matthew Auld Cc: # v6.12+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_pt.c | 38 ++++---------- drivers/gpu/drm/xe/xe_vm.c | 85 +++++++++++++++++++++----------- drivers/gpu/drm/xe/xe_vm.h | 8 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +- 4 files changed, 75 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 1ddcc7e79a93..12a627a23eb4 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1213,42 +1213,22 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, return 0; uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; + if (xe_pt_userptr_inject_eagain(uvma)) + xe_vma_userptr_force_invalidate(uvma); - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; + notifier_seq = uvma->userptr.notifier_seq; if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) + notifier_seq)) return 0; - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm)) return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } + /* + * Just continue the operation since exec or rebind worker + * will take care of rebinding. + */ return 0; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 6fdc17be619e..dd422ac95dc0 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -580,51 +580,26 @@ out_unlock_outer: trace_xe_vm_rebind_worker_exit(vm); } -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) +static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); + struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); struct dma_resv_iter cursor; struct dma_fence *fence; long err; - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - /* * Tell exec and rebind worker they need to repin and rebind this * userptr. */ if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { spin_lock(&vm->userptr.invalidated_lock); list_move_tail(&userptr->invalidate_link, &vm->userptr.invalidated); spin_unlock(&vm->userptr.invalidated_lock); } - up_write(&vm->userptr.notifier_lock); - /* * Preempt fences turn into schedule disables, pipeline these. * Note that even in fault mode, we need to wait for binds and @@ -642,11 +617,35 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, false, MAX_SCHEDULE_TIMEOUT); XE_WARN_ON(err <= 0); - if (xe_vm_in_fault_mode(vm)) { + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } +} +static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier); + struct xe_vma *vma = &uvma->vma; + struct xe_vm *vm = xe_vma_vm(vma); + + xe_assert(vm->xe, xe_vma_is_userptr(vma)); + trace_xe_vma_userptr_invalidate(vma); + + if (!mmu_notifier_range_blockable(range)) + return false; + + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + + down_write(&vm->userptr.notifier_lock); + mmu_interval_set_seq(mni, cur_seq); + + __vma_userptr_invalidate(vm, uvma); + up_write(&vm->userptr.notifier_lock); trace_xe_vma_userptr_invalidate_complete(vma); return true; @@ -656,6 +655,34 @@ static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { .invalidate = vma_userptr_invalidate, }; +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +/** + * xe_vma_userptr_force_invalidate() - force invalidate a userptr + * @uvma: The userptr vma to invalidate + * + * Perform a forced userptr invalidation for testing purposes. + */ +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + /* Protect against concurrent userptr pinning */ + lockdep_assert_held(&vm->lock); + /* Protect against concurrent notifiers */ + lockdep_assert_held(&vm->userptr.notifier_lock); + /* + * Protect against concurrent instances of this function and + * the critical exec sections + */ + xe_vm_assert_held(vm); + + if (!mmu_interval_read_retry(&uvma->userptr.notifier, + uvma->userptr.notifier_seq)) + uvma->userptr.notifier_seq -= 2; + __vma_userptr_invalidate(vm, uvma); +} +#endif + int xe_vm_userptr_pin(struct xe_vm *vm) { struct xe_userptr_vma *uvma, *next; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 7c8e39049223..f5d835271350 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -287,4 +287,12 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); + +#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) +void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma); +#else +static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) +{ +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 52467b9b5348..1fe79bf23b6b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -228,8 +228,8 @@ struct xe_vm { * up for revalidation. Protected from access with the * @invalidated_lock. Removing items from the list * additionally requires @lock in write mode, and adding - * items to the list requires the @userptr.notifer_lock in - * write mode. + * items to the list requires either the @userptr.notifer_lock in + * write mode, OR @lock in write mode. */ struct list_head invalidated; } userptr; -- 2.50.1 From 6f39b0c5ef0385eae586760d10b9767168037aa5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 28 Feb 2025 08:30:58 +0100 Subject: [PATCH 12/16] drm/xe: Add staging tree for VM binds MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Concurrent VM bind staging and zapping of PTEs from a userptr notifier do not work because the view of PTEs is not stable. VM binds cannot acquire the notifier lock during staging, as memory allocations are required. To resolve this race condition, use a staging tree for VM binds that is committed only under the userptr notifier lock during the final step of the bind. This ensures a consistent view of the PTEs in the userptr notifier. A follow up may only use staging for VM in fault mode as this is the only mode in which the above race exists. v3: - Drop zap PTE change (Thomas) - s/xe_pt_entry/xe_pt_entry_staging (Thomas) Suggested-by: Thomas Hellström Cc: Fixes: e8babb280b5e ("drm/xe: Convert multiple bind ops into single job") Fixes: a708f6501c69 ("drm/xe: Update PT layer with better error handling") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20250228073058.59510-5-thomas.hellstrom@linux.intel.com Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_pt.c | 58 +++++++++++++++++++++++---------- drivers/gpu/drm/xe/xe_pt_walk.c | 3 +- drivers/gpu/drm/xe/xe_pt_walk.h | 4 +++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 12a627a23eb4..dc24baa84092 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -28,6 +28,8 @@ struct xe_pt_dir { struct xe_pt pt; /** @children: Array of page-table child nodes */ struct xe_ptw *children[XE_PDES]; + /** @staging: Array of page-table staging nodes */ + struct xe_ptw *staging[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -48,9 +50,10 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) return container_of(pt, struct xe_pt_dir, pt); } -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) +static struct xe_pt * +xe_pt_entry_staging(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->children[index], struct xe_pt, base); + return container_of(pt_dir->staging[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -125,6 +128,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, } pt->bo = bo; pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; + pt->base.staging = level ? as_xe_pt_dir(pt)->staging : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -206,8 +210,8 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, + if (xe_pt_entry_staging(pt_dir, i)) + xe_pt_destroy(xe_pt_entry_staging(pt_dir, i), flags, deferred); } } @@ -376,8 +380,10 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, /* Continue building a non-connected subtree. */ struct iosys_map *map = &parent->bo->vmap; - if (unlikely(xe_child)) + if (unlikely(xe_child)) { parent->base.children[offset] = &xe_child->base; + parent->base.staging[offset] = &xe_child->base; + } xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -614,6 +620,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_bind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .vm = xe_vma_vm(vma), .tile = tile, @@ -873,7 +880,7 @@ static void xe_pt_cancel_bind(struct xe_vma *vma, } } -static void xe_pt_commit_locks_assert(struct xe_vma *vma) +static void xe_pt_commit_prepare_locks_assert(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); @@ -885,6 +892,16 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma) xe_vm_assert_held(vm); } +static void xe_pt_commit_locks_assert(struct xe_vma *vma) +{ + struct xe_vm *vm = xe_vma_vm(vma); + + xe_pt_commit_prepare_locks_assert(vma); + + if (xe_vma_is_userptr(vma)) + lockdep_assert_held_read(&vm->userptr.notifier_lock); +} + static void xe_pt_commit(struct xe_vma *vma, struct xe_vm_pgtable_update *entries, u32 num_entries, struct llist_head *deferred) @@ -895,13 +912,17 @@ static void xe_pt_commit(struct xe_vma *vma, for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; + struct xe_pt_dir *pt_dir; if (!pt->level) continue; + pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { struct xe_pt *oldpte = entries[i].pt_entries[j].pt; + int j_ = j + entries[i].ofs; + pt_dir->children[j_] = pt_dir->staging[j_]; xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred); } } @@ -913,7 +934,7 @@ static void xe_pt_abort_bind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_pt *pt = entries[i].pt; @@ -928,10 +949,10 @@ static void xe_pt_abort_bind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = 0; j < entries[i].qwords; j++) { u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = xe_pt_entry(pt_dir, j_); + struct xe_pt *newpte = xe_pt_entry_staging(pt_dir, j_); struct xe_pt *oldpte = entries[i].pt_entries[j].pt; - pt_dir->children[j_] = oldpte ? &oldpte->base : 0; + pt_dir->staging[j_] = oldpte ? &oldpte->base : 0; xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL); } } @@ -943,7 +964,7 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, { u32 i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; i++) { struct xe_pt *pt = entries[i].pt; @@ -961,10 +982,10 @@ static void xe_pt_commit_prepare_bind(struct xe_vma *vma, struct xe_pt *newpte = entries[i].pt_entries[j].pt; struct xe_pt *oldpte = NULL; - if (xe_pt_entry(pt_dir, j_)) - oldpte = xe_pt_entry(pt_dir, j_); + if (xe_pt_entry_staging(pt_dir, j_)) + oldpte = xe_pt_entry_staging(pt_dir, j_); - pt_dir->children[j_] = &newpte->base; + pt_dir->staging[j_] = &newpte->base; entries[i].pt_entries[j].pt = oldpte; } } @@ -1494,6 +1515,7 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, .ops = &xe_pt_stage_unbind_ops, .shifts = xe_normal_pt_shifts, .max_level = XE_PT_HIGHEST_LEVEL, + .staging = true, }, .tile = tile, .modified_start = xe_vma_start(vma), @@ -1535,7 +1557,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = num_entries - 1; i >= 0; --i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1548,7 +1570,7 @@ static void xe_pt_abort_unbind(struct xe_vma *vma, continue; for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) - pt_dir->children[j] = + pt_dir->staging[j] = entries[i].pt_entries[j - entry->ofs].pt ? &entries[i].pt_entries[j - entry->ofs].pt->base : NULL; } @@ -1561,7 +1583,7 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, { int i, j; - xe_pt_commit_locks_assert(vma); + xe_pt_commit_prepare_locks_assert(vma); for (i = 0; i < num_entries; ++i) { struct xe_vm_pgtable_update *entry = &entries[i]; @@ -1575,8 +1597,8 @@ xe_pt_commit_prepare_unbind(struct xe_vma *vma, pt_dir = as_xe_pt_dir(pt); for (j = entry->ofs; j < entry->ofs + entry->qwords; j++) { entry->pt_entries[j - entry->ofs].pt = - xe_pt_entry(pt_dir, j); - pt_dir->children[j] = NULL; + xe_pt_entry_staging(pt_dir, j); + pt_dir->staging[j] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index b8b3d2aea492..be602a763ff3 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,8 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->children ? parent->children : NULL; + struct xe_ptw **entries = walk->staging ? (parent->staging ?: NULL) : + (parent->children ?: NULL); const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index 5ecc4d2f0f65..5c02c244f7de 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -11,12 +11,14 @@ /** * struct xe_ptw - base class for driver pagetable subclassing. * @children: Pointer to an array of children if any. + * @staging: Pointer to an array of staging if any. * * Drivers could subclass this, and if it's a page-directory, typically * embed an array of xe_ptw pointers. */ struct xe_ptw { struct xe_ptw **children; + struct xe_ptw **staging; }; /** @@ -41,6 +43,8 @@ struct xe_pt_walk { * as shared pagetables. */ bool shared_pt_mode; + /** @staging: Walk staging PT structure */ + bool staging; }; /** -- 2.50.1 From bbe2b06b55bc061c8fcec034ed26e88287f39143 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:40 +0100 Subject: [PATCH 13/16] drm/xe/hmm: Style- and include fixes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add proper #ifndef around the xe_hmm.h header, proper spacing and since the documentation mostly follows kerneldoc format, make it kerneldoc. Also prepare for upcoming -stable fixes. Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_hmm.c | 9 +++------ drivers/gpu/drm/xe/xe_hmm.h | 5 +++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 2e4ae61567d8..6ddcf88d8a39 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -19,11 +19,10 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/* +/** * xe_mark_range_accessed() - mark a range is accessed, so core mm * have such information for memory eviction or write back to * hard disk - * * @range: the range to mark * @write: if write to this range, we mark pages in this range * as dirty @@ -43,11 +42,10 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } -/* +/** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table * and will be used to program GPU page table later. - * * @xe: the xe device who will access the dma-address in sg table * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. @@ -112,9 +110,8 @@ free_pages: return ret; } -/* +/** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr - * * @uvma: the userptr vma which hold the scatter gather table * * With function xe_userptr_populate_range, we allocate storage of diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 909dc2bdcd97..9602cb7d976d 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -3,9 +3,14 @@ * Copyright © 2024 Intel Corporation */ +#ifndef _XE_HMM_H_ +#define _XE_HMM_H_ + #include struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); + void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); +#endif -- 2.50.1 From ea3e66d280ce2576664a862693d1da8fd324c317 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:41 +0100 Subject: [PATCH 14/16] drm/xe/hmm: Don't dereference struct page pointers without notifier lock MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The pnfs that we obtain from hmm_range_fault() point to pages that we don't have a reference on, and the guarantee that they are still in the cpu page-tables is that the notifier lock must be held and the notifier seqno is still valid. So while building the sg table and marking the pages accesses / dirty we need to hold this lock with a validated seqno. However, the lock is reclaim tainted which makes sg_alloc_table_from_pages_segment() unusable, since it internally allocates memory. Instead build the sg-table manually. For the non-iommu case this might lead to fewer coalesces, but if that's a problem it can be fixed up later in the resource cursor code. For the iommu case, the whole sg-table may still be coalesced to a single contigous device va region. This avoids marking pages that we don't own dirty and accessed, and it also avoid dereferencing struct pages that we don't own. v2: - Use assert to check whether hmm pfns are valid (Matthew Auld) - Take into account that large pages may cross range boundaries (Matthew Auld) v3: - Don't unnecessarily check for a non-freed sg-table. (Matthew Auld) - Add a missing up_read() in an error path. (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_hmm.c | 112 +++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index 6ddcf88d8a39..be284b852307 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -42,6 +42,42 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) } } +static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, + struct hmm_range *range, struct rw_semaphore *notifier_sem) +{ + unsigned long i, npages, hmm_pfn; + unsigned long num_chunks = 0; + int ret; + + /* HMM docs says this is needed. */ + ret = down_read_interruptible(notifier_sem); + if (ret) + return ret; + + if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { + up_read(notifier_sem); + return -EAGAIN; + } + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages;) { + unsigned long len; + + hmm_pfn = range->hmm_pfns[i]; + xe_assert(xe, hmm_pfn & HMM_PFN_VALID); + + len = 1UL << hmm_pfn_to_map_order(hmm_pfn); + + /* If order > 0 the page may extend beyond range->start */ + len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); + i += len; + num_chunks++; + } + up_read(notifier_sem); + + return sg_alloc_table(st, num_chunks, GFP_KERNEL); +} + /** * xe_build_sg() - build a scatter gather table for all the physical pages/pfn * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table @@ -50,6 +86,7 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * @range: the hmm range that we build the sg table from. range->hmm_pfns[] * has the pfn numbers of pages that back up this hmm address range. * @st: pointer to the sg table. + * @notifier_sem: The xe notifier lock. * @write: whether we write to this range. This decides dma map direction * for system pages. If write we map it bi-diretional; otherwise * DMA_TO_DEVICE @@ -76,38 +113,41 @@ static void xe_mark_range_accessed(struct hmm_range *range, bool write) * Returns 0 if successful; -ENOMEM if fails to allocate memory */ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, - struct sg_table *st, bool write) + struct sg_table *st, + struct rw_semaphore *notifier_sem, + bool write) { + unsigned long npages = xe_npages_in_range(range->start, range->end); struct device *dev = xe->drm.dev; - struct page **pages; - u64 i, npages; - int ret; + struct scatterlist *sgl; + struct page *page; + unsigned long i, j; - npages = xe_npages_in_range(range->start, range->end); - pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; + lockdep_assert_held(notifier_sem); - for (i = 0; i < npages; i++) { - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - xe_assert(xe, !is_device_private_page(pages[i])); - } + i = 0; + for_each_sg(st->sgl, sgl, st->nents, j) { + unsigned long hmm_pfn, size; - ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, - xe_sg_segment_size(dev), GFP_KERNEL); - if (ret) - goto free_pages; + hmm_pfn = range->hmm_pfns[i]; + page = hmm_pfn_to_page(hmm_pfn); + xe_assert(xe, !is_device_private_page(page)); + + size = 1UL << hmm_pfn_to_map_order(hmm_pfn); + size -= page_to_pfn(page) & (size - 1); + i += size; - ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(st); - st = NULL; + if (unlikely(j == st->nents - 1)) { + if (i > npages) + size -= (i - npages); + sg_mark_end(sgl); + } + sg_set_page(sgl, page, size << PAGE_SHIFT, 0); } + xe_assert(xe, i == npages); -free_pages: - kvfree(pages); - return ret; + return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } /** @@ -237,16 +277,36 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, if (ret) goto free_pfns; - ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); if (ret) goto free_pfns; + ret = down_read_interruptible(&vm->userptr.notifier_lock); + if (ret) + goto free_st; + + if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { + ret = -EAGAIN; + goto out_unlock; + } + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, + &vm->userptr.notifier_lock, write); + if (ret) + goto out_unlock; + xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; userptr->notifier_seq = hmm_range.notifier_seq; + up_read(&vm->userptr.notifier_lock); + kvfree(pfns); + return 0; +out_unlock: + up_read(&vm->userptr.notifier_lock); +free_st: + sg_free_table(&userptr->sgt); free_pfns: kvfree(pfns); return ret; } - -- 2.50.1 From ba767b9d01a2c552d76cf6f46b125d50ec4147a6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 4 Mar 2025 18:33:42 +0100 Subject: [PATCH 15/16] drm/xe/userptr: Unmap userptrs in the mmu notifier MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If userptr pages are freed after a call to the xe mmu notifier, the device will not be blocked out from theoretically accessing these pages unless they are also unmapped from the iommu, and this violates some aspects of the iommu-imposed security. Ensure that userptrs are unmapped in the mmu notifier to mitigate this. A naive attempt would try to free the sg table, but the sg table itself may be accessed by a concurrent bind operation, so settle for only unmapping. v3: - Update lockdep asserts. - Fix a typo (Matthew Auld) Fixes: 81e058a3e7fd ("drm/xe: Introduce helper to populate userptr") Cc: Oak Zeng Cc: Matthew Auld Cc: # v6.10+ Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Acked-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250304173342.22009-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_hmm.c | 51 ++++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_hmm.h | 2 ++ drivers/gpu/drm/xe/xe_vm.c | 4 +++ drivers/gpu/drm/xe/xe_vm_types.h | 4 +++ 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c index be284b852307..392102515f3d 100644 --- a/drivers/gpu/drm/xe/xe_hmm.c +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -150,6 +150,45 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); } +static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vm *vm = xe_vma_vm(&uvma->vma); + + lockdep_assert_held_write(&vm->lock); + lockdep_assert_held(&vm->userptr.notifier_lock); + + mutex_lock(&userptr->unmap_mutex); + xe_assert(vm->xe, !userptr->mapped); + userptr->mapped = true; + mutex_unlock(&userptr->unmap_mutex); +} + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + + if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && + !lockdep_is_held_type(&vm->lock, 0) && + !(vma->gpuva.flags & XE_VMA_DESTROYED)) { + /* Don't unmap in exec critical section. */ + xe_vm_assert_held(vm); + /* Don't unmap while mapping the sg. */ + lockdep_assert_held(&vm->lock); + } + + mutex_lock(&userptr->unmap_mutex); + if (userptr->sg && userptr->mapped) + dma_unmap_sgtable(xe->drm.dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + userptr->mapped = false; + mutex_unlock(&userptr->unmap_mutex); +} + /** * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr * @uvma: the userptr vma which hold the scatter gather table @@ -161,16 +200,9 @@ static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) { struct xe_userptr *userptr = &uvma->userptr; - struct xe_vma *vma = &uvma->vma; - bool write = !xe_vma_read_only(vma); - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - struct device *dev = xe->drm.dev; - - xe_assert(xe, userptr->sg); - dma_unmap_sgtable(dev, userptr->sg, - write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); + xe_hmm_userptr_unmap(uvma); sg_free_table(userptr->sg); userptr->sg = NULL; } @@ -297,6 +329,7 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, xe_mark_range_accessed(&hmm_range, write); userptr->sg = &userptr->sgt; + xe_hmm_userptr_set_mapped(uvma); userptr->notifier_seq = hmm_range.notifier_seq; up_read(&vm->userptr.notifier_lock); kvfree(pfns); diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h index 9602cb7d976d..0ea98d8e7bbc 100644 --- a/drivers/gpu/drm/xe/xe_hmm.h +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -13,4 +13,6 @@ struct xe_userptr_vma; int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); + +void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma); #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index dd422ac95dc0..3dbd3d38008a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -621,6 +621,8 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uv err = xe_vm_invalidate_vma(vma); XE_WARN_ON(err); } + + xe_hmm_userptr_unmap(uvma); } static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, @@ -1039,6 +1041,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, INIT_LIST_HEAD(&userptr->invalidate_link); INIT_LIST_HEAD(&userptr->repin_link); vma->gpuva.gem.offset = bo_offset_or_userptr; + mutex_init(&userptr->unmap_mutex); err = mmu_interval_notifier_insert(&userptr->notifier, current->mm, @@ -1080,6 +1083,7 @@ static void xe_vma_destroy_late(struct xe_vma *vma) * them anymore */ mmu_interval_notifier_remove(&userptr->notifier); + mutex_destroy(&userptr->unmap_mutex); xe_vm_put(vm); } else if (xe_vma_is_null(vma)) { xe_vm_put(vm); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 1fe79bf23b6b..eca73c4197d4 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -59,12 +59,16 @@ struct xe_userptr { struct sg_table *sg; /** @notifier_seq: notifier sequence number */ unsigned long notifier_seq; + /** @unmap_mutex: Mutex protecting dma-unmapping */ + struct mutex unmap_mutex; /** * @initial_bind: user pointer has been bound at least once. * write: vm->userptr.notifier_lock in read mode and vm->resv held. * read: vm->userptr.notifier_lock in write mode or vm->resv held. */ bool initial_bind; + /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */ + bool mapped; #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) u32 divisor; #endif -- 2.50.1 From c8f33a6fa64735015032cbb2bc5300b93f3f709c Mon Sep 17 00:00:00 2001 From: Gustavo Sousa Date: Fri, 21 Feb 2025 15:51:40 -0300 Subject: [PATCH 16/16] drm/xe: Set IP names in functions handling IP version In an upcoming change, we will handle setting graphics_name and media_name differently for GMDID-based IPs. As such, let's make both handle_pre_gmdid() and handle_gmdid() functions responsible for initializing those fields. While now we have both doing essentially the same thing with respect to those fields, handle_pre_gmdid() will diverge soon. Reviewed-by: Matt Roper Signed-off-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20250221-xe-unify-ip-descriptors-v2-1-5bc0c6d0c13f@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pci.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 8b6658b214be..eb81b1a3b40a 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -559,9 +559,14 @@ static void handle_pre_gmdid(struct xe_device *xe, const struct xe_media_desc *media) { xe->info.graphics_verx100 = graphics->ver * 100 + graphics->rel; + xe->info.graphics_name = graphics->name; - if (media) + if (media) { xe->info.media_verx100 = media->ver * 100 + media->rel; + xe->info.media_name = media->name; + } else { + xe->info.media_name = "none"; + } } @@ -583,6 +588,7 @@ static void handle_gmdid(struct xe_device *xe, if (ver == graphics_ip_map[i].ver) { xe->info.graphics_verx100 = ver; *graphics = graphics_ip_map[i].ip; + xe->info.graphics_name = (*graphics)->name; break; } @@ -593,8 +599,9 @@ static void handle_gmdid(struct xe_device *xe, ver / 100, ver % 100); } - read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); + xe->info.media_name = "none"; + read_gmdid(xe, GMDID_MEDIA, &ver, media_revid); /* Media may legitimately be fused off / not present */ if (ver == 0) return; @@ -603,6 +610,7 @@ static void handle_gmdid(struct xe_device *xe, if (ver == media_ip_map[i].ver) { xe->info.media_verx100 = ver; *media = media_ip_map[i].ip; + xe->info.media_name = (*media)->name; break; } @@ -693,9 +701,6 @@ static int xe_info_init(struct xe_device *xe, if (!graphics_desc) return -ENODEV; - xe->info.graphics_name = graphics_desc->name; - xe->info.media_name = media_desc ? media_desc->name : "none"; - xe->info.vram_flags = graphics_desc->vram_flags; xe->info.va_bits = graphics_desc->va_bits; xe->info.vm_max_level = graphics_desc->vm_max_level; -- 2.50.1