]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/xe/vf: Fix VM crash during VF driver release
authorSatyanarayana K V P <satyanarayana.k.v.p@intel.com>
Tue, 29 Jul 2025 12:07:20 +0000 (17:37 +0530)
committerMatthew Brost <matthew.brost@intel.com>
Wed, 30 Jul 2025 05:05:14 +0000 (22:05 -0700)
The VF CCS save/restore series (patchwork #149108) has a dependency
on the migration framework. A recent migration update in commit
d65ff1ec8535 ("drm/xe: Split xe_migrate allocation from initialization")
caused a VM crash during XE driver release for iGPU devices.

Oops: general protection fault, probably for non-canonical address
0x6b6b6b6b6b6b6b83: 0000 [#1] SMP NOPTI
RIP: 0010:xe_lrc_ring_head+0x12/0xb0 [xe]
Call Trace:
 xe_sriov_vf_ccs_fini+0x1e/0x40 [xe]
 devm_action_release+0x12/0x30
 release_nodes+0x3a/0x120
 devres_release_all+0x96/0xd0
 device_unbind_cleanup+0x12/0x80
 device_release_driver_internal+0x23a/0x280
 device_release_driver+0x12/0x20
 pci_stop_bus_device+0x69/0x90
 pci_stop_and_remove_bus_device+0x12/0x30
 pci_iov_remove_virtfn+0xbd/0x130
 sriov_disable+0x42/0x100
 pci_disable_sriov+0x34/0x50
 xe_pci_sriov_configure+0xf71/0x1020 [xe]

Update the VF CCS migration initialization sequence to align with the new
migration framework changes, resolving the release-time crash.

Fixes: f3009272ff2e ("drm/xe/vf: Create contexts for CCS read write")
Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://lore.kernel.org/r/20250729120720.13990-1-satyanarayana.k.v.p@intel.com
drivers/gpu/drm/xe/xe_gt.c
drivers/gpu/drm/xe/xe_migrate.c
drivers/gpu/drm/xe/xe_migrate.h
drivers/gpu/drm/xe/xe_sriov_vf_ccs.c

index c8eda36546d343c531502bfed7da9e801e5a7123..5a79c6e3208b4384605efe328124a082bb350d60 100644 (file)
@@ -564,11 +564,9 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt)
        if (xe_gt_is_main_type(gt)) {
                struct xe_tile *tile = gt_to_tile(gt);
 
-               tile->migrate = xe_migrate_init(tile);
-               if (IS_ERR(tile->migrate)) {
-                       err = PTR_ERR(tile->migrate);
+               err = xe_migrate_init(tile->migrate);
+               if (err)
                        goto err_force_wake;
-               }
        }
 
        err = xe_uc_load_hw(&gt->uc);
index 90065d7d29ff8863071d08ef3e7dd5a8bfad7322..3a276e2348a2d5788f6d16934f1f45a67bfdbe7a 100644 (file)
@@ -396,15 +396,15 @@ struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile)
 
 /**
  * xe_migrate_init() - Initialize a migrate context
- * @tile: Back-pointer to the tile we're initializing for.
+ * @m: The migration context
  *
- * Return: Pointer to a migrate context on success. Error pointer on error.
+ * Return: 0 if successful, negative error code on failure
  */
-struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
+int xe_migrate_init(struct xe_migrate *m)
 {
-       struct xe_device *xe = tile_to_xe(tile);
+       struct xe_tile *tile = m->tile;
        struct xe_gt *primary_gt = tile->primary_gt;
-       struct xe_migrate *m = tile->migrate;
+       struct xe_device *xe = tile_to_xe(tile);
        struct xe_vm *vm;
        int err;
 
@@ -412,15 +412,13 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
        vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
                          XE_VM_FLAG_SET_TILE_ID(tile));
        if (IS_ERR(vm))
-               return ERR_CAST(vm);
+               return PTR_ERR(vm);
 
        xe_vm_lock(vm, false);
        err = xe_migrate_prepare_vm(tile, m, vm);
        xe_vm_unlock(vm);
-       if (err) {
-               xe_vm_close_and_put(vm);
-               return ERR_PTR(err);
-       }
+       if (err)
+               goto err_out;
 
        if (xe->info.has_usm) {
                struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -429,8 +427,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
                                                           false);
                u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
 
-               if (!hwe || !logical_mask)
-                       return ERR_PTR(-EINVAL);
+               if (!hwe || !logical_mask) {
+                       err = -EINVAL;
+                       goto err_out;
+               }
 
                /*
                 * XXX: Currently only reserving 1 (likely slow) BCS instance on
@@ -449,8 +449,8 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
                                                  EXEC_QUEUE_FLAG_MIGRATE, 0);
        }
        if (IS_ERR(m->q)) {
-               xe_vm_close_and_put(vm);
-               return ERR_CAST(m->q);
+               err = PTR_ERR(m->q);
+               goto err_out;
        }
 
        mutex_init(&m->job_mutex);
@@ -460,7 +460,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 
        err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
        if (err)
-               return ERR_PTR(err);
+               return err;
 
        if (IS_DGFX(xe)) {
                if (xe_migrate_needs_ccs_emit(xe))
@@ -475,7 +475,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
                        (unsigned long long)m->min_chunk_size);
        }
 
-       return m;
+       return err;
+
+err_out:
+       xe_vm_close_and_put(vm);
+       return err;
+
 }
 
 static u64 max_mem_transfer_per_pass(struct xe_device *xe)
index 3758f961548465a804935c8158012e76e79669bc..e81ea6b27fb5de56cf845e1e7dda226c0a7f9241 100644 (file)
@@ -105,7 +105,7 @@ struct xe_migrate_pt_update {
 };
 
 struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile);
-struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
+int xe_migrate_init(struct xe_migrate *m);
 
 struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
                                     unsigned long npages,
index af43e04179aaed864b851c44dff2607aa8f46e2f..bf9fa1238462a439017bc42b967267afbf825fb4 100644 (file)
@@ -270,11 +270,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
                ctx = &tile->sriov.vf.ccs[ctx_id];
                ctx->ctx_id = ctx_id;
 
-               migrate = xe_migrate_init(tile);
+               migrate = xe_migrate_alloc(tile);
                if (IS_ERR(migrate)) {
                        err = PTR_ERR(migrate);
                        goto err_ret;
                }
+
+               err = xe_migrate_init(migrate);
+               if (err)
+                       goto err_ret;
+
                ctx->migrate = migrate;
 
                err = alloc_bb_pool(tile, ctx);