From 121b214cdf10d4129b64f2b1f31807154c74ae55 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:28:59 -0800 Subject: [PATCH 01/16] drm/xe: Fix error handling in xe_irq_install() When devm_add_action_or_reset() fails, it already calls the function passed as parameter and that function is already free'ing the irqs. Drop the goto and just return. The caller, xe_device_probe(), should also do the same thing instead of wrongly doing `goto err` and calling the unrelated xe_display_fini() function. Fixes: 14d25d8d684d ("drm/xe: change old msi irq api to a new one") Reviewed-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-3-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_irq.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index bf092e6391c7..5362d3174b06 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -775,19 +775,7 @@ int xe_irq_install(struct xe_device *xe) xe_irq_postinstall(xe); - err = devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); - if (err) - goto free_irq_handler; - - return 0; - -free_irq_handler: - if (xe_device_has_msix(xe)) - xe_irq_msix_free(xe); - else - xe_irq_msi_free(xe); - - return err; + return devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); } static void xe_irq_msi_synchronize_irq(struct xe_device *xe) -- 2.51.0 From 0bcf41171c64234e79eb3552d00f0aad8a47e8d3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:00 -0800 Subject: [PATCH 02/16] drm/xe: Fix xe_tile_init_noalloc() error propagation Propagate the error to the caller so initialization properly stops if sysfs creation fails. Reviewed-by: Francois Dugast Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-4-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_tile.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index d9a7a04ff652..d29658ff4dd4 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -168,9 +168,7 @@ int xe_tile_init_noalloc(struct xe_tile *tile) xe_wa_apply_tile_workarounds(tile); - err = xe_tile_sysfs_init(tile); - - return 0; + return xe_tile_sysfs_init(tile); } int xe_tile_init(struct xe_tile *tile) -- 2.51.0 From ff57025c358603555f1e0ae0d50282a460433594 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:01 -0800 Subject: [PATCH 03/16] drm/xe: Stop ignoring errors from xe_ttm_stolen_mgr_init() Make sure to differentiate normal behavior, e.g. there's no stolen, from allocation errors or failure to initialize lower layers. Reviewed-by: Francois Dugast Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 4 +++- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 17 +++++++++-------- drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1084beef27e3..2e934ed02713 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -836,7 +836,9 @@ int xe_device_probe(struct xe_device *xe) } /* Allocate and map stolen after potential VRAM resize */ - xe_ttm_stolen_mgr_init(xe); + err = xe_ttm_stolen_mgr_init(xe); + if (err) + return err; /* * Now that GT is initialized (TTM in particular), diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index d414421f8c13..d9c9d2547aad 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -207,17 +207,16 @@ static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) #endif } -void xe_ttm_stolen_mgr_init(struct xe_device *xe) +int xe_ttm_stolen_mgr_init(struct xe_device *xe) { - struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct xe_ttm_stolen_mgr *mgr; u64 stolen_size, io_size; int err; - if (!mgr) { - drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); - return; - } + mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; if (IS_SRIOV_VF(xe)) stolen_size = 0; @@ -230,7 +229,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (!stolen_size) { drm_dbg_kms(&xe->drm, "No stolen memory support\n"); - return; + return 0; } /* @@ -246,7 +245,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); - return; + return err; } drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", @@ -254,6 +253,8 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); + + return 0; } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h index 1777245ff810..8e877d1e839b 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.h @@ -12,7 +12,7 @@ struct ttm_resource; struct xe_bo; struct xe_device; -void xe_ttm_stolen_mgr_init(struct xe_device *xe); +int xe_ttm_stolen_mgr_init(struct xe_device *xe); int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem); bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe); u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset); -- 2.51.0 From c0aeb90b28b88fa2eedef4eae4bd649de6fc2a3e Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:02 -0800 Subject: [PATCH 04/16] drm/xe: Remove leftover pxp comment Not being able to initialize pxp is fatal if the platform is expected to have it. Update comment after commit 9c9dc9ba4a00 ("drm/xe/pxp: Fail the load if PXP fails to initialize"). Cc: Daniele Ceraolo Spurio Reviewed-by: Daniele Ceraolo Spurio Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-6-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 2e934ed02713..8203c80faca5 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -878,7 +878,6 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_fini_oa; - /* A PXP init failure is not fatal */ err = xe_pxp_init(xe); if (err) goto err_fini_display; -- 2.51.0 From ff6cd29b690b11fff7d1d998852fc6eeb02bed73 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:03 -0800 Subject: [PATCH 05/16] drm/xe: Cleanup unwind of gt initialization MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The only thing in xe_gt_remove() that really needs to happen on the device remove callback is the xe_uc_remove(). That's because of the following call chain: xe_gt_remove() xe_uc_remove() xe_gsc_remove() xe_gsc_proxy_remove() Move xe_gsc_proxy_remove() to be handled as a xe_device_remove_action, so it's recorded when it should run during device removal. The rest can be handled normally by devm infra. Besides removing the deep call chain above, xe_device_probe() doesn't have to unwind the gt loop and it's also more in line with the xe_device_probe() style. Cc: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Cc: Thomas Hellström Reviewed-by: Daniele Ceraolo Spurio Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-7-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 21 +---------- drivers/gpu/drm/xe/xe_gsc.c | 9 ----- drivers/gpu/drm/xe/xe_gsc.h | 1 - drivers/gpu/drm/xe/xe_gsc_proxy.c | 63 ++++++++++++++----------------- drivers/gpu/drm/xe/xe_gsc_proxy.h | 1 - drivers/gpu/drm/xe/xe_gsc_types.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 35 ++++++++--------- drivers/gpu/drm/xe/xe_gt.h | 1 - drivers/gpu/drm/xe/xe_uc.c | 13 ------- drivers/gpu/drm/xe/xe_uc.h | 1 - 10 files changed, 47 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8203c80faca5..398fad6c5365 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -750,7 +750,6 @@ int xe_device_probe(struct xe_device *xe) struct xe_tile *tile; struct xe_gt *gt; int err; - u8 last_gt; u8 id; xe->probing = true; @@ -861,18 +860,16 @@ int xe_device_probe(struct xe_device *xe) return err; for_each_gt(gt, xe, id) { - last_gt = id; - err = xe_gt_init(gt); if (err) - goto err_fini_gt; + return err; } xe_heci_gsc_init(xe); err = xe_oa_init(xe); if (err) - goto err_fini_gt; + return err; err = xe_display_init(xe); if (err) @@ -911,14 +908,6 @@ err_fini_display: err_fini_oa: xe_oa_fini(xe); -err_fini_gt: - for_each_gt(gt, xe, id) { - if (id < last_gt) - xe_gt_remove(gt); - else - break; - } - return err; } @@ -987,9 +976,6 @@ static void xe_device_remove_display(struct xe_device *xe) void xe_device_remove(struct xe_device *xe) { - struct xe_gt *gt; - u8 id; - xe_oa_unregister(xe); xe_device_remove_display(xe); @@ -998,9 +984,6 @@ void xe_device_remove(struct xe_device *xe) xe_heci_gsc_fini(xe); - for_each_gt(gt, xe, id) - xe_gt_remove(gt); - xe_device_call_remove_actions(xe); } diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 1eb791ddc375..fd41113f8572 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,15 +555,6 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } -/** - * xe_gsc_remove() - Clean up the GSC structures before driver removal - * @gsc: the GSC uC - */ -void xe_gsc_remove(struct xe_gsc *gsc) -{ - xe_gsc_proxy_remove(gsc); -} - /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index e282b9ef6ec4..d99f66c38075 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -17,7 +17,6 @@ int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); -void xe_gsc_remove(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 24cc6a4f9a96..31c90577faf0 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -423,6 +423,34 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) return 0; } +static void xe_gsc_proxy_remove(void *arg) +{ + struct xe_gsc *gsc = arg; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int fw_ref = 0; + + if (!gsc->proxy.component_added) + return; + + /* disable HECI2 IRQs */ + xe_pm_runtime_get(xe); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (!fw_ref) + xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); + + /* try do disable irq even if forcewake failed */ + gsc_proxy_irq_toggle(gsc, false); + + xe_force_wake_put(gt_to_fw(gt), fw_ref); + xe_pm_runtime_put(xe); + + xe_gsc_wait_for_worker_completion(gsc); + + component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); + gsc->proxy.component_added = false; +} + /** * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy * @gsc: the GSC uC @@ -462,40 +490,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) gsc->proxy.component_added = true; - /* the component must be removed before unload, so can't use drmm for cleanup */ - - return 0; -} - -/** - * xe_gsc_proxy_remove() - remove the GSC proxy MEI component - * @gsc: the GSC uC - */ -void xe_gsc_proxy_remove(struct xe_gsc *gsc) -{ - struct xe_gt *gt = gsc_to_gt(gsc); - struct xe_device *xe = gt_to_xe(gt); - unsigned int fw_ref = 0; - - if (!gsc->proxy.component_added) - return; - - /* disable HECI2 IRQs */ - xe_pm_runtime_get(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - if (!fw_ref) - xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); - - /* try do disable irq even if forcewake failed */ - gsc_proxy_irq_toggle(gsc, false); - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - xe_pm_runtime_put(xe); - - xe_gsc_wait_for_worker_completion(gsc); - - component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); - gsc->proxy.component_added = false; + return xe_device_add_action_or_reset(xe, xe_gsc_proxy_remove, gsc); } /** diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index c511ade6b863..fdef56995cd4 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,7 +12,6 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); -void xe_gsc_proxy_remove(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 5926de20214c..97c056656df0 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -13,6 +13,7 @@ #include #include "xe_uc_fw_types.h" +#include "xe_device_types.h" struct xe_bo; struct xe_exec_queue; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9fb8f1e678dc..c33040278e1a 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -141,26 +141,6 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) xe_force_wake_put(gt_to_fw(gt), fw_ref); } -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) -{ - int i; - - xe_uc_remove(>->uc); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - xe_gt_disable_host_l2_vram(gt); -} - static void gt_reset_worker(struct work_struct *w); static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) @@ -583,6 +563,17 @@ out_fw: return err; } +static void xe_gt_fini(void *arg) +{ + struct xe_gt *gt = arg; + int i; + + for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) + xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); +} + int xe_gt_init(struct xe_gt *gt) { int err; @@ -595,6 +586,10 @@ int xe_gt_init(struct xe_gt *gt) xe_hw_fence_irq_init(>->fence_irq[i]); } + err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt); + if (err) + return err; + err = xe_gt_pagefault_init(gt); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index e504cc33ade4..187fa6490eaf 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -54,7 +54,6 @@ int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); int xe_gt_sanitize_freq(struct xe_gt *gt); -void xe_gt_remove(struct xe_gt *gt); /** * xe_gt_wait_for_reset - wait for gt's async reset to finalize. diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 0d073a9987c2..d8167e818280 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -288,19 +288,6 @@ int xe_uc_suspend(struct xe_uc *uc) return xe_guc_suspend(&uc->guc); } -/** - * xe_uc_remove() - Clean up the UC structures before driver removal - * @uc: the UC object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_uc_remove(struct xe_uc *uc) -{ - xe_gsc_remove(&uc->gsc); -} - /** * xe_uc_declare_wedged() - Declare UC wedged * @uc: the UC object diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 506517c11333..3813c1ede450 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -20,7 +20,6 @@ void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); -void xe_uc_remove(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); #endif -- 2.51.0 From f5ebe80e32f809a52d4f562602f791c350c4a204 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:04 -0800 Subject: [PATCH 06/16] drm/xe: Cleanup extra calls to xe_hw_fence_irq_finish() Now that xe_gt_remove is handled entirely by xe_gt, it's clear there are some extra calls to xe_hw_fence_irq_finish() that aren't necessary. Neither all_fw_domain_init() or gt_fw_domain_init() need to do that since it's handled by the caller on any error. Reviewed-by: Rodrigo Vivi Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-8-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index c33040278e1a..bd16ca070dd2 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -389,13 +389,11 @@ static void dump_pat_on_error(struct xe_gt *gt) static int gt_fw_domain_init(struct xe_gt *gt) { unsigned int fw_ref; - int err, i; + int err; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) { - err = -ETIMEDOUT; - goto err_hw_fence_irq; - } + if (!fw_ref) + return -ETIMEDOUT; if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); @@ -436,9 +434,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) err_force_wake: dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); return err; } @@ -446,7 +441,7 @@ err_hw_fence_irq: static int all_fw_domain_init(struct xe_gt *gt) { unsigned int fw_ref; - int err, i; + int err; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { @@ -524,8 +519,6 @@ static int all_fw_domain_init(struct xe_gt *gt) err_force_wake: xe_force_wake_put(gt_to_fw(gt), fw_ref); - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); return err; } -- 2.51.0 From d3f557d52e2d1be48adf89a6c1e47cc8728b9054 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:05 -0800 Subject: [PATCH 07/16] drm/xe/oa: Move fini to xe_oa Like done with other functions, cleanup the error handling in xe_device_probe() by moving the OA fini to be handled by xe_oa itself, which relies on devm to call the cleanup function. Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 7 +---- drivers/gpu/drm/xe/xe_oa.c | 48 +++++++++++++++++----------------- drivers/gpu/drm/xe/xe_oa.h | 1 - 3 files changed, 25 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 398fad6c5365..d0b1c280ddd3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -873,7 +873,7 @@ int xe_device_probe(struct xe_device *xe) err = xe_display_init(xe); if (err) - goto err_fini_oa; + return err; err = xe_pxp_init(xe); if (err) @@ -905,9 +905,6 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); -err_fini_oa: - xe_oa_fini(xe); - return err; } @@ -980,8 +977,6 @@ void xe_device_remove(struct xe_device *xe) xe_device_remove_display(xe); - xe_oa_fini(xe); - xe_heci_gsc_fini(xe); xe_device_call_remove_actions(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index fa873f3d0a9d..2c640185bdec 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2641,6 +2641,27 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) } } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + + return 0; +} + +static void xe_oa_fini(void *arg) +{ + struct xe_device *xe = arg; + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + + oa->xe = NULL; +} + /** * xe_oa_init - OA initialization during device probe * @xe: @xe_device @@ -2672,31 +2693,10 @@ int xe_oa_init(struct xe_device *xe) } xe_oa_init_supported_formats(oa); - return 0; -exit: - oa->xe = NULL; - return ret; -} -static int destroy_config(int id, void *p, void *data) -{ - xe_oa_config_put(p); - return 0; -} - -/** - * xe_oa_fini - OA de-initialization during device remove - * @xe: @xe_device - */ -void xe_oa_fini(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - idr_for_each(&oa->metrics_idr, destroy_config, oa); - idr_destroy(&oa->metrics_idr); + return devm_add_action_or_reset(xe->drm.dev, xe_oa_fini, xe); +exit: oa->xe = NULL; + return ret; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 87a38820c317..eb36ce250c61 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -15,7 +15,6 @@ struct xe_gt; struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); -void xe_oa_fini(struct xe_device *xe); void xe_oa_register(struct xe_device *xe); void xe_oa_unregister(struct xe_device *xe); int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); -- 2.51.0 From 00f6a86c3c5ec14fc0b51cd7b4662817067c652b Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:06 -0800 Subject: [PATCH 08/16] drm/xe: Move drm_dev_unplug() out of display function This is not really display-related and needed for any sequence on driver removal that has to interact with drm_dev_enter()/drm_dev_exit(). Just remove xe_device_remove_display() and inline it in the single caller to make clear this is not done only for display. Cc: Rodrigo Vivi Cc: Jani Nikula Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-10-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d0b1c280ddd3..6d01932f934c 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -963,20 +963,16 @@ int xe_device_add_action_or_reset(struct xe_device *xe, return 0; } -static void xe_device_remove_display(struct xe_device *xe) +void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); drm_dev_unplug(&xe->drm); + xe_display_driver_remove(xe); -} -void xe_device_remove(struct xe_device *xe) -{ xe_oa_unregister(xe); - xe_device_remove_display(xe); - xe_heci_gsc_fini(xe); xe_device_call_remove_actions(xe); -- 2.51.0 From 960d71044eee8d7ca407ea272989de34f0e718f3 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:07 -0800 Subject: [PATCH 09/16] drm/xe/oa: Handle errors in xe_oa_register() Let xe_oa_unregister() be handled by devm infra since it's only putting the kobject. Also, since kobject_create_and_add may fail, handle the error accordingly. Reviewed-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-11-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 14 ++++++++------ drivers/gpu/drm/xe/xe_oa.c | 30 +++++++++++++++--------------- drivers/gpu/drm/xe/xe_oa.h | 3 +-- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6d01932f934c..89a85f193f3a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -877,15 +877,17 @@ int xe_device_probe(struct xe_device *xe) err = xe_pxp_init(xe); if (err) - goto err_fini_display; + goto err_remove_display; err = drm_dev_register(&xe->drm, 0); if (err) - goto err_fini_display; + goto err_remove_display; xe_display_register(xe); - xe_oa_register(xe); + err = xe_oa_register(xe); + if (err) + goto err_unregister_display; xe_pmu_register(&xe->pmu); @@ -902,7 +904,9 @@ int xe_device_probe(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); -err_fini_display: +err_unregister_display: + xe_display_unregister(xe); +err_remove_display: xe_display_driver_remove(xe); return err; @@ -971,8 +975,6 @@ void xe_device_remove(struct xe_device *xe) xe_display_driver_remove(xe); - xe_oa_unregister(xe); - xe_heci_gsc_fini(xe); xe_device_call_remove_actions(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2c640185bdec..d89e6cabf5a5 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2423,36 +2423,36 @@ err_unlock: return ret; } +static void xe_oa_unregister(void *arg) +{ + struct xe_oa *oa = arg; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + /** * xe_oa_register - Xe OA registration * @xe: @xe_device * * Exposes the metrics sysfs directory upon completion of module initialization */ -void xe_oa_register(struct xe_device *xe) +int xe_oa_register(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; if (!oa->xe) - return; + return 0; oa->metrics_kobj = kobject_create_and_add("metrics", &xe->drm.primary->kdev->kobj); -} - -/** - * xe_oa_unregister - Xe OA de-registration - * @xe: @xe_device - */ -void xe_oa_unregister(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - if (!oa->metrics_kobj) - return; + return -ENOMEM; - kobject_put(oa->metrics_kobj); - oa->metrics_kobj = NULL; + return devm_add_action_or_reset(xe->drm.dev, xe_oa_unregister, oa); } static u32 num_oa_units_per_gt(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index eb36ce250c61..e510826f9efc 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -15,8 +15,7 @@ struct xe_gt; struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); -void xe_oa_register(struct xe_device *xe); -void xe_oa_unregister(struct xe_device *xe); +int xe_oa_register(struct xe_device *xe); int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); -- 2.51.0 From 6b5506158f902b3d427f76b0c243d025de40b333 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:08 -0800 Subject: [PATCH 10/16] drm/xe/pmu: Fail probe if xe_pmu_register() fails Now that previous callers in xe_device_probe() are handling the errors, that can be done for xe_pmu_register() as well. Cc: Riana Tauro Cc: Vinay Belgaumkar Reviewed-by: Tejas Upadhyay Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-12-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 89a85f193f3a..6718b7c2d1ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -889,7 +889,9 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; - xe_pmu_register(&xe->pmu); + err = xe_pmu_register(&xe->pmu); + if (err) + goto err_unregister_display; xe_debugfs_register(xe); -- 2.51.0 From 62fbc75b28a7a2e8619c575d2a0acad595345ed1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:29:09 -0800 Subject: [PATCH 11/16] drm/xe/hwmon: Stop ignoring errors on probe Not registering hwmon because it's not available (SRIOV_VF and DGFX) is different from failing the initialization. Handle the errors appropriately. Cc: Badal Nilawar Cc: Karthik Poosa Reviewed-by: Raag Jadav Reviewed-by: Badal Nilawar Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-13-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 4 +++- drivers/gpu/drm/xe/xe_hwmon.c | 31 ++++++++++++++++--------------- drivers/gpu/drm/xe/xe_hwmon.h | 4 ++-- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6718b7c2d1ea..91525299494e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -895,7 +895,9 @@ int xe_device_probe(struct xe_device *xe) xe_debugfs_register(xe); - xe_hwmon_register(xe); + err = xe_hwmon_register(xe); + if (err) + goto err_unregister_display; for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 7f327e334212..48d80ffdf7bb 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -839,10 +839,9 @@ static const struct hwmon_chip_info hwmon_chip_info = { }; static void -xe_hwmon_get_preregistration_info(struct xe_device *xe) +xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) { - struct xe_mmio *mmio = xe_root_tile_mmio(xe); - struct xe_hwmon *hwmon = xe->hwmon; + struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe); long energy; u64 val_sku_unit = 0; int channel; @@ -876,33 +875,34 @@ static void xe_hwmon_mutex_destroy(void *arg) mutex_destroy(&hwmon->hwmon_lock); } -void xe_hwmon_register(struct xe_device *xe) +int xe_hwmon_register(struct xe_device *xe) { struct device *dev = xe->drm.dev; struct xe_hwmon *hwmon; + int ret; /* hwmon is available only for dGfx */ if (!IS_DGFX(xe)) - return; + return 0; /* hwmon is not available on VFs */ if (IS_SRIOV_VF(xe)) - return; + return 0; hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); if (!hwmon) - return; - - xe->hwmon = hwmon; + return -ENOMEM; mutex_init(&hwmon->hwmon_lock); - if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) - return; + ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon); + if (ret) + return ret; /* There's only one instance of hwmon per device */ hwmon->xe = xe; + xe->hwmon = hwmon; - xe_hwmon_get_preregistration_info(xe); + xe_hwmon_get_preregistration_info(hwmon); drm_dbg(&xe->drm, "Register xe hwmon interface\n"); @@ -910,11 +910,12 @@ void xe_hwmon_register(struct xe_device *xe) hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, &hwmon_chip_info, hwmon_groups); - if (IS_ERR(hwmon->hwmon_dev)) { - drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); + drm_err(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); xe->hwmon = NULL; - return; + return PTR_ERR(hwmon->hwmon_dev); } + + return 0; } diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h index c42a1de2cd7a..d02c1bfe8c0a 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.h +++ b/drivers/gpu/drm/xe/xe_hwmon.h @@ -11,9 +11,9 @@ struct xe_device; #if IS_REACHABLE(CONFIG_HWMON) -void xe_hwmon_register(struct xe_device *xe); +int xe_hwmon_register(struct xe_device *xe); #else -static inline void xe_hwmon_register(struct xe_device *xe) { }; +static inline int xe_hwmon_register(struct xe_device *xe) { return 0; }; #endif #endif /* _XE_HWMON_H_ */ -- 2.51.0 From 611160b02a40ce3f60ab94eea85b394dca1cafd2 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 11 Feb 2025 16:50:34 +0100 Subject: [PATCH 12/16] drm/xe/pf: Release all VFs configs on device removal MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If we try to manually provision VFs using debugfs and then we try to unload the driver, we will see complains like: [ ] Memory manager not clean during takedown. [ ] RIP: 0010:drm_mm_takedown+0x3f/0x100 [ ] [drm:drm_mm_takedown] *ERROR* node [fedff000 + 00001000]: inserted at drm_mm_insert_node_in_range+0x2bd/0x520 xe_ggtt_node_insert+0x52/0x90 [xe] pf_provision_vf_ggtt+0x1fa/0xac0 [xe] xe_gt_sriov_pf_config_set_ggtt+0x79/0x7a0 [xe] ggtt_set+0x53/0x80 [xe] simple_attr_write_xsigned.isra.0+0xd2/0x150 simple_attr_write+0x14/0x30 debugfs_attr_write+0x4e/0x80 [ ] xe 0000:00:02.0: [drm] *ERROR* GT0: GUC ID manager unclean (1/65535) [ ] xe 0000:00:02.0: [drm] GT0: total 65535 [ ] xe 0000:00:02.0: [drm] GT0: used 1 [ ] xe 0000:00:02.0: [drm] GT0: range 65534..65534 (1) [ ] xe 0000:00:02.0: [drm] *ERROR* GT0: GuC doorbells manager unclean (1/256) [ ] xe 0000:00:02.0: [drm] GT0: count: 256 [ ] xe 0000:00:02.0: [drm] GT0: available range: 1..255 (255) [ ] xe 0000:00:02.0: [drm] GT0: available total: 255 [ ] xe 0000:00:02.0: [drm] GT0: reserved range: 0..0 (1) [ ] xe 0000:00:02.0: [drm] GT0: reserved total: 1 This could be easily fixed by adding config release action. Signed-off-by: Michal Wajdeczko Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://patchwork.freedesktop.org/patch/msgid/20250211155034.1028-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 6 +++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 29 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h | 1 + 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index d66478deab98..c08efca6420e 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -89,6 +89,12 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) */ int xe_gt_sriov_pf_init(struct xe_gt *gt) { + int err; + + err = xe_gt_sriov_pf_config_init(gt); + if (err) + return err; + return xe_gt_sriov_pf_migration_init(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 88bd9d97ba5c..10be109bf357 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2356,6 +2356,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void fini_config(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) + pf_release_vf_config(gt, n); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); +} + +/** + * xe_gt_sriov_pf_config_init - Initialize SR-IOV configuration data. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_init(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_PF(xe)); + + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); +} + /** * xe_gt_sriov_pf_config_restart - Restart SR-IOV configurations after a GT reset. * @gt: the &xe_gt diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index f894e9d4abba..513e6512a575 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -63,6 +63,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_init(struct xe_gt *gt); void xe_gt_sriov_pf_config_restart(struct xe_gt *gt); int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); -- 2.51.0 From b5fa0913b56cedf651884d47bac3f1cf6e7e5092 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 13 Feb 2025 11:38:38 +0530 Subject: [PATCH 13/16] drm/xe: Fix typo in xe_job_ptrs %s/uinitialized/uninitialized/gc Reviewed-by: Satyanarayana K V P Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250213060838.32493-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_sched_job_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index d942b20a9f29..dbf260dded8d 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -18,9 +18,9 @@ struct dma_fence_chain; * struct xe_job_ptrs - Per hw engine instance data */ struct xe_job_ptrs { - /** @lrc_fence: Pre-allocated uinitialized lrc fence.*/ + /** @lrc_fence: Pre-allocated uninitialized lrc fence.*/ struct dma_fence *lrc_fence; - /** @chain_fence: Pre-allocated ninitialized fence chain node. */ + /** @chain_fence: Pre-allocated uninitialized fence chain node. */ struct dma_fence_chain *chain_fence; /** @batch_addr: Batch buffer address. */ u64 batch_addr; -- 2.51.0 From 339adeb10472a34bb74624958b031e490531d37c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 17 Jan 2025 12:53:05 +0100 Subject: [PATCH 14/16] drm/xe/display: Clarify XE_IOCTL_DBG message This should make it easier to understand from userspace why importing BO fails. Reviewed-by: Stuart Summers Link: https://patchwork.freedesktop.org/patch/msgid/20250117115305.53113-1-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/display/intel_fb_bo.c | 4 ++-- drivers/gpu/drm/xe/xe_bo.c | 16 ++++++++++++++++ drivers/gpu/drm/xe/xe_bo.h | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index 4d209ebc26c2..b91eec05ce57 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -50,10 +50,10 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, /* * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching - * mode when the boect is VM_BINDed, so we can only set + * mode when the bo is VM_BINDed, so we can only set * coherency with display when unbound. */ - if (XE_IOCTL_DBG(xe, !list_empty(&bo->ttm.base.gpuva.list))) { + if (XE_IOCTL_DBG(xe, xe_bo_is_vm_bound(bo))) { ttm_bo_unreserve(&bo->ttm); ret = -EINVAL; goto err; diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 78d09c5ed26d..25761924a8b4 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -128,6 +128,22 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo) GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270; } +/** + * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND + * @bo: The BO + * + * Check if a given bo is bound through VM_BIND. This requires the + * reservation lock for the BO to be held. + * + * Returns: boolean + */ +bool xe_bo_is_vm_bound(struct xe_bo *bo) +{ + xe_bo_assert_held(bo); + + return !list_empty(&bo->ttm.base.gpuva.list); +} + static bool xe_bo_is_user(struct xe_bo *bo) { return bo->flags & XE_BO_FLAG_USER; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index f09b9315721b..a25340949415 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -241,6 +241,7 @@ bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); bool xe_bo_is_stolen(struct xe_bo *bo); bool xe_bo_is_stolen_devmem(struct xe_bo *bo); +bool xe_bo_is_vm_bound(struct xe_bo *bo); bool xe_bo_has_single_placement(struct xe_bo *bo); uint64_t vram_region_gpu_offset(struct ttm_resource *res); -- 2.51.0 From 5bee1e2de39fe41be132ee389529407212894582 Mon Sep 17 00:00:00 2001 From: Ilia Levi Date: Thu, 30 Jan 2025 12:50:56 +0200 Subject: [PATCH 15/16] drm/xe: s/xe_mmio_init/xe_mmio_probe_early Rename so that xe_mmio_init() can be used in subsequent patches to initialize an instance of struct xe_mmio. Signed-off-by: Ilia Levi Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250130105057.136586-1-ilia.levi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 2 +- drivers/gpu/drm/xe/xe_mmio.c | 6 +++--- drivers/gpu/drm/xe/xe_mmio.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 91525299494e..06ccff145050 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -692,7 +692,7 @@ int xe_device_probe_early(struct xe_device *xe) { int err; - err = xe_mmio_init(xe); + err = xe_mmio_probe_early(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index d321a21aacf0..3aed849a128b 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -60,7 +60,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* * Nothing to be done as tile 0 has already been setup earlier with the - * entire BAR mapped - see xe_mmio_init() + * entire BAR mapped - see xe_mmio_probe_early() */ if (xe->info.tile_count == 1) return; @@ -74,7 +74,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* * Although the per-tile mmio regs are not yet initialized, this * is fine as it's going to the root tile's mmio, that's - * guaranteed to be initialized earlier in xe_mmio_init() + * guaranteed to be initialized earlier in xe_mmio_probe_early() */ mtcfg = xe_mmio_read64_2x32(mmio, XEHP_MTCFG_ADDR); tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; @@ -122,7 +122,7 @@ static void mmio_fini(void *arg) root_tile->mmio.regs = NULL; } -int xe_mmio_init(struct xe_device *xe) +int xe_mmio_probe_early(struct xe_device *xe) { struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 8a46f4006a84..b32e7ee4b23e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -11,7 +11,7 @@ struct xe_device; struct xe_reg; -int xe_mmio_init(struct xe_device *xe); +int xe_mmio_probe_early(struct xe_device *xe); int xe_mmio_probe_tiles(struct xe_device *xe); u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg); -- 2.51.0 From eb79d71e506a1caeb0dedd1bab0e6899e8e74f5b Mon Sep 17 00:00:00 2001 From: Ilia Levi Date: Thu, 13 Feb 2025 11:35:59 +0200 Subject: [PATCH 16/16] drm/xe: Add xe_mmio_init() initialization function Add a convenience function for minimal initialization of struct xe_mmio. This function also validates that the entirety of the provided mmio region is usable with struct xe_reg. v2: Modify commit message, add kernel doc, refactor assert (Michal) v3: Fix off-by-one bug, add clarifying macro (Michal) v4: Derive bitfield width from size (Michal) Signed-off-by: Ilia Levi Reviewed-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20250213093559.204652-1-ilia.levi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_reg_defs.h | 14 +++++++++++- drivers/gpu/drm/xe/xe_gt.c | 7 +++--- drivers/gpu/drm/xe/xe_mmio.c | 32 ++++++++++++++++++--------- drivers/gpu/drm/xe/xe_mmio.h | 2 ++ 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index 89716172fbb8..c39aab843e35 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -7,9 +7,21 @@ #define _XE_REG_DEFS_H_ #include +#include +#include #include "compat-i915-headers/i915_reg_defs.h" +/** + * XE_REG_ADDR_MAX - The upper limit on MMIO register address + * + * This macro specifies the upper limit (not inclusive) on MMIO register offset + * supported by struct xe_reg and functions based on struct xe_mmio. + * + * Currently this is defined as 4 MiB. + */ +#define XE_REG_ADDR_MAX SZ_4M + /** * struct xe_reg - Register definition * @@ -21,7 +33,7 @@ struct xe_reg { union { struct { /** @addr: address */ - u32 addr:22; + u32 addr:const_ilog2(XE_REG_ADDR_MAX); /** * @masked: register is "masked", with upper 16bits used * to identify the bits that are updated on the lower diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bd16ca070dd2..650a0ee56e97 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -626,10 +626,9 @@ int xe_gt_init(struct xe_gt *gt) void xe_gt_mmio_init(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); - gt->mmio.regs = tile->mmio.regs; - gt->mmio.regs_size = tile->mmio.regs_size; - gt->mmio.tile = tile; + xe_mmio_init(>->mmio, tile, tile->mmio.regs, tile->mmio.regs_size); if (gt->info.type == XE_GT_TYPE_MEDIA) { gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET; @@ -639,7 +638,7 @@ void xe_gt_mmio_init(struct xe_gt *gt) gt->mmio.adj_limit = 0; } - if (IS_SRIOV_VF(gt_to_xe(gt))) + if (IS_SRIOV_VF(xe)) gt->mmio.sriov_vf_gt = gt; } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 3aed849a128b..70a36e777546 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -55,7 +55,6 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; - void __iomem *regs; u8 id; /* @@ -94,13 +93,8 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) } } - regs = xe->mmio.regs; - for_each_tile(tile, xe, id) { - tile->mmio.regs_size = SZ_4M; - tile->mmio.regs = regs; - tile->mmio.tile = tile; - regs += tile_mmio_size; - } + for_each_remote_tile(tile, xe, id) + xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M); } int xe_mmio_probe_tiles(struct xe_device *xe) @@ -140,13 +134,29 @@ int xe_mmio_probe_early(struct xe_device *xe) } /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.regs_size = SZ_4M; - root_tile->mmio.regs = xe->mmio.regs; - root_tile->mmio.tile = root_tile; + xe_mmio_init(&root_tile->mmio, root_tile, xe->mmio.regs, SZ_4M); return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +/** + * xe_mmio_init() - Initialize an MMIO instance + * @mmio: Pointer to the MMIO instance to initialize + * @tile: The tile to which the MMIO region belongs + * @ptr: Pointer to the start of the MMIO region + * @size: The size of the MMIO region in bytes + * + * This is a convenience function for minimal initialization of struct xe_mmio. + */ +void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, u32 size) +{ + xe_tile_assert(tile, size <= XE_REG_ADDR_MAX); + + mmio->regs = ptr; + mmio->regs_size = size; + mmio->tile = tile; +} + static void mmio_flush_pending_writes(struct xe_mmio *mmio) { #define DUMMY_REG_OFFSET 0x130030 diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index b32e7ee4b23e..c151ba569003 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -14,6 +14,8 @@ struct xe_reg; int xe_mmio_probe_early(struct xe_device *xe); int xe_mmio_probe_tiles(struct xe_device *xe); +void xe_mmio_init(struct xe_mmio *mmio, struct xe_tile *tile, void __iomem *ptr, u32 size); + u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg); u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg); void xe_mmio_write32(struct xe_mmio *mmio, struct xe_reg reg, u32 val); -- 2.51.0