From 776e3b502b6e49b1a72507d1a01a9b49d67fd843 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 13 Feb 2025 11:28:57 -0800 Subject: [PATCH] drm/xe: Add callback support for driver remove MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit xe device probe uses devm cleanup in most places. However there are a few cases where this is not possible: when the driver interacts with component add/del. In that case, the resource group would be cleanup while the entire device resources are in the process of cleanup. One example is the xe_gsc_proxy and display using that to interact with mei and audio. Add a callback-based remove so the exception doesn't make the probe use multiple error handling styles. v2: Change internal API to mimic the devm API. This will make it easier to migrate in future when devm can be used. Cc: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Cc: Thomas Hellström Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20250213192909.996148-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 68 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_device.h | 3 ++ drivers/gpu/drm/xe/xe_device_types.h | 14 ++++++ drivers/gpu/drm/xe/xe_pci.c | 4 +- 4 files changed, 88 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c641c802d4fb..961df7d5ba63 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -65,6 +65,12 @@ #include +struct xe_device_remove_action { + struct list_head node; + void (*action)(void *); + void *data; +}; + static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -746,6 +752,9 @@ int xe_device_probe(struct xe_device *xe) u8 last_gt; u8 id; + xe->probing = true; + INIT_LIST_HEAD(&xe->remove_action_list); + xe_pat_init_early(xe); err = xe_sriov_init(xe); @@ -886,6 +895,8 @@ int xe_device_probe(struct xe_device *xe) xe_vsec_init(xe); + xe->probing = false; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: @@ -907,6 +918,61 @@ err: return err; } +/** + * xe_device_call_remove_actions - Call the remove actions + * @xe: xe device instance + * + * This is only to be used by xe_pci and xe_device to call the remove actions + * while removing the driver or handling probe failures. + */ +void xe_device_call_remove_actions(struct xe_device *xe) +{ + struct xe_device_remove_action *ra, *tmp; + + list_for_each_entry_safe(ra, tmp, &xe->remove_action_list, node) { + ra->action(ra->data); + list_del(&ra->node); + kfree(ra); + } + + xe->probing = false; +} + +/** + * xe_device_add_action_or_reset - Add an action to run on driver removal + * @xe: xe device instance + * @action: Function that should be called on device remove + * @data: Pointer to data passed to @action implementation + * + * This adds a custom action to the list of remove callbacks executed on device + * remove, before any dev or drm managed resources are removed. This is only + * needed if the action leads to component_del()/component_master_del() since + * that is not compatible with devres cleanup. + * + * Returns: 0 on success or a negative error code on failure, in which case + * @action is already called. + */ +int xe_device_add_action_or_reset(struct xe_device *xe, + void (*action)(void *), void *data) +{ + struct xe_device_remove_action *ra; + + drm_WARN_ON(&xe->drm, !xe->probing); + + ra = kmalloc(sizeof(*ra), GFP_KERNEL); + if (!ra) { + action(data); + return -ENOMEM; + } + + INIT_LIST_HEAD(&ra->node); + ra->action = action; + ra->data = data; + list_add(&ra->node, &xe->remove_action_list); + + return 0; +} + static void xe_device_remove_display(struct xe_device *xe) { xe_display_unregister(xe); @@ -932,6 +998,8 @@ void xe_device_remove(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_remove(gt); + + xe_device_call_remove_actions(xe); } void xe_device_shutdown(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 0bc3bc8e6803..079dad32a6f5 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -45,6 +45,9 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, const struct pci_device_id *ent); int xe_device_probe_early(struct xe_device *xe); int xe_device_probe(struct xe_device *xe); +int xe_device_add_action_or_reset(struct xe_device *xe, + void (*action)(void *), void *data); +void xe_device_call_remove_actions(struct xe_device *xe); void xe_device_remove(struct xe_device *xe); void xe_device_shutdown(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 28d10a1d7b64..4cf08c408b95 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -428,6 +428,20 @@ struct xe_device { /** @tiles: device tiles */ struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE]; + /** + * @remove_action_list: list of actions to execute on device remove. + * Use xe_device_add_remove_action() for that. Actions can only be added + * during probe and are executed during the call from PCI subsystem to + * remove the driver from the device. + */ + struct list_head remove_action_list; + + /** + * @probing: cover the section in which @remove_action_list can be used + * to post cleaning actions + */ + bool probing; + /** * @mem_access: keep track of memory access in the device, possibly * triggering additional actions when they occur. diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 663bfc29cddc..f8417f4d8ce6 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -900,8 +900,10 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; err = xe_device_probe(xe); - if (err) + if (err) { + xe_device_call_remove_actions(xe); return err; + } err = xe_pm_init(xe); if (err) -- 2.50.1