]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/xe/guc: Add devm release action to safely tear down CT
authorSatyanarayana K V P <satyanarayana.k.v.p@intel.com>
Mon, 1 Sep 2025 07:25:41 +0000 (12:55 +0530)
committerMichal Wajdeczko <michal.wajdeczko@intel.com>
Tue, 2 Sep 2025 06:21:58 +0000 (08:21 +0200)
When a buffer object (BO) is allocated with the XE_BO_FLAG_GGTT_INVALIDATE
flag, the driver initiates TLB invalidation requests via the CTB mechanism
while releasing the BO. However a premature release of the CTB BO can lead
to system crashes, as observed in:

Oops: Oops: 0000 [#1] SMP NOPTI
RIP: 0010:h2g_write+0x2f3/0x7c0 [xe]
Call Trace:
 guc_ct_send_locked+0x8b/0x670 [xe]
 xe_guc_ct_send_locked+0x19/0x60 [xe]
 send_tlb_invalidation+0xb4/0x460 [xe]
 xe_gt_tlb_invalidation_ggtt+0x15e/0x2e0 [xe]
 ggtt_invalidate_gt_tlb.part.0+0x16/0x90 [xe]
 ggtt_node_remove+0x110/0x140 [xe]
 xe_ggtt_node_remove+0x40/0xa0 [xe]
 xe_ggtt_remove_bo+0x87/0x250 [xe]

Introduce a devm-managed release action during xe_guc_ct_init() and
xe_guc_ct_init_post_hwconfig() to ensure proper CTB disablement before
resource deallocation, preventing the use-after-free scenario.

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Summers Stuart <stuart.summers@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://lore.kernel.org/r/20250901072541.31461-1-satyanarayana.k.v.p@intel.com
drivers/gpu/drm/xe/xe_guc.c
drivers/gpu/drm/xe/xe_guc_ct.c
drivers/gpu/drm/xe/xe_guc_ct.h

index 37d06c51180cbd38aa01856bbf7fde847ee08828..b3a6408a5760bda7d6ee1ae438924436f6c6055e 100644 (file)
@@ -709,10 +709,6 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
        if (ret)
                return ret;
 
-       ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
-       if (ret)
-               return ret;
-
        return 0;
 }
 
@@ -847,6 +843,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
        if (ret)
                return ret;
 
+       ret = xe_guc_ct_init_post_hwconfig(&guc->ct);
+       if (ret)
+               return ret;
+
        guc_init_params_post_hwconfig(guc);
 
        ret = xe_guc_submit_init(guc, ~0);
index 848065a25c441db4277e6df9d9c87ba0c117ed7c..e431ff73227cc34d1dfc179f238b4cfb100d4e81 100644 (file)
@@ -39,6 +39,8 @@ static void receive_g2h(struct xe_guc_ct *ct);
 static void g2h_worker_func(struct work_struct *w);
 static void safe_mode_worker_func(struct work_struct *w);
 static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+static void guc_ct_change_state(struct xe_guc_ct *ct,
+                               enum xe_guc_ct_state state);
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
 enum {
@@ -252,6 +254,13 @@ int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct)
 }
 ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */
 
+static void guc_action_disable_ct(void *arg)
+{
+       struct xe_guc_ct *ct = arg;
+
+       guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
+}
+
 int xe_guc_ct_init(struct xe_guc_ct *ct)
 {
        struct xe_device *xe = ct_to_xe(ct);
@@ -268,10 +277,40 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
                return PTR_ERR(bo);
 
        ct->bo = bo;
-       return 0;
+
+       return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
 }
 ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
 
+/**
+ * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM
+ * @ct: the &xe_guc_ct
+ *
+ * Allocate a new BO in VRAM and free the previous BO that was allocated
+ * in system memory (SMEM). Applicable only for DGFX products.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
+{
+       struct xe_device *xe = ct_to_xe(ct);
+       struct xe_gt *gt = ct_to_gt(ct);
+       struct xe_tile *tile = gt_to_tile(gt);
+       int ret;
+
+       xe_assert(xe, !xe_guc_ct_enabled(ct));
+
+       if (!IS_DGFX(xe))
+               return 0;
+
+       ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo);
+       if (ret)
+               return ret;
+
+       devm_release_action(xe->drm.dev, guc_action_disable_ct, ct);
+       return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
+}
+
 #define desc_read(xe_, guc_ctb__, field_)                      \
        xe_map_rd_field(xe_, &guc_ctb__->desc, 0,               \
                        struct guc_ct_buffer_desc, field_)
index 18d4225e65024cc942311d4c773252dd05f311de..cf41210ab30ae7975724e6d64b38d8ec596d9d78 100644 (file)
@@ -13,6 +13,7 @@ struct xe_device;
 
 int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct);
 int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
 void xe_guc_ct_disable(struct xe_guc_ct *ct);
 void xe_guc_ct_stop(struct xe_guc_ct *ct);