From 7e3f4a352337f1ce09c1795b58be23dfab29e610 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 16 Apr 2025 16:09:17 +0100 Subject: [PATCH] drm/xe: handle pinned memory in PM notifier MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Userspace is still alive and kicking at this point so actually moving pinned stuff here is tricky. However, we can instead pre-allocate the backup storage upfront from the notifier, such that we scoop up as much as we can, and then leave the final .suspend() to do the actual copy (or allocate anything that we missed). That way the bulk of our allocations will hopefully be done outside the more restrictive .suspend(). We do need to be extra careful though, since the pinned handling can now race with PM notifier, like something becoming unpinned after we prepare it from the notifier. v2 (Thomas): - Fix kernel doc and drop the pin as soon as we are done with the restore, instead of deferring to later. Suggested-by: Thomas Hellström Signed-off-by: Matthew Auld Reviewed-by: Thomas Hellström Link: https://lore.kernel.org/r/20250416150913.434369-8-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 123 +++++++++++++++++++++++++++---- drivers/gpu/drm/xe/xe_bo.h | 2 + drivers/gpu/drm/xe/xe_bo_evict.c | 51 ++++++++++++- drivers/gpu/drm/xe/xe_bo_evict.h | 2 + drivers/gpu/drm/xe/xe_pm.c | 17 ++++- 5 files changed, 176 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 79adaee5a0e9..61420f863ac5 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1084,6 +1084,80 @@ out_unref: return lret; } +/** + * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed + * up in system memory. + * @bo: The buffer object to prepare. + * + * On successful completion, the object backup pages are allocated. Expectation + * is that this is called from the PM notifier, prior to suspend/hibernation. + * + * Return: 0 on success. Negative error code on failure. + */ +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) +{ + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup; + int ret = 0; + + xe_bo_lock(bo, false); + + xe_assert(xe, !bo->backup_obj); + + /* + * Since this is called from the PM notifier we might have raced with + * someone unpinning this after we dropped the pinned list lock and + * grabbing the above bo lock. + */ + if (!xe_bo_is_pinned(bo)) + goto out_unlock_bo; + + if (!xe_bo_is_vram(bo)) + goto out_unlock_bo; + + if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) + goto out_unlock_bo; + + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + ttm_bo_pin(&backup->ttm); + bo->backup_obj = backup; + +out_unlock_bo: + xe_bo_unlock(bo); + return ret; +} + +/** + * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation. + * @bo: The buffer object to undo the prepare for. + * + * Always returns 0. The backup object is removed, if still present. Expectation + * it that this called from the PM notifier when undoing the prepare step. + * + * Return: Always returns 0. + */ +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo) +{ + xe_bo_lock(bo, false); + if (bo->backup_obj) { + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } + xe_bo_unlock(bo); + + return 0; +} + /** * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory * @bo: The buffer object to move. @@ -1098,7 +1172,8 @@ out_unref: int xe_bo_evict_pinned(struct xe_bo *bo) { struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_bo *backup; + struct xe_bo *backup = bo->backup_obj; + bool backup_created = false; bool unmap = false; int ret = 0; @@ -1120,15 +1195,18 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) goto out_unlock_bo; - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, - DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | - XE_BO_FLAG_PINNED); - if (IS_ERR(backup)) { - ret = PTR_ERR(backup); - goto out_unlock_bo; + if (!backup) { + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ + backup_created = true; } - backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */ if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { struct xe_migrate *migrate; @@ -1175,11 +1253,12 @@ int xe_bo_evict_pinned(struct xe_bo *bo) bo->size); } - bo->backup_obj = backup; + if (!bo->backup_obj) + bo->backup_obj = backup; out_backup: xe_bo_vunmap(backup); - if (ret) + if (ret && backup_created) xe_bo_put(backup); out_unlock_bo: if (unmap) @@ -1215,9 +1294,11 @@ int xe_bo_restore_pinned(struct xe_bo *bo) xe_bo_lock(bo, false); - ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); - if (ret) - goto out_backup; + if (!xe_bo_is_pinned(backup)) { + ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); + if (ret) + goto out_unlock_bo; + } if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) { struct xe_migrate *migrate; @@ -1257,7 +1338,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (iosys_map_is_null(&bo->vmap)) { ret = xe_bo_vmap(bo); if (ret) - goto out_unlock_bo; + goto out_backup; unmap = true; } @@ -1269,8 +1350,11 @@ int xe_bo_restore_pinned(struct xe_bo *bo) out_backup: xe_bo_vunmap(backup); - if (!bo->backup_obj) + if (!bo->backup_obj) { + if (xe_bo_is_pinned(backup)) + ttm_bo_unpin(&backup->ttm); xe_bo_put(backup); + } out_unlock_bo: if (unmap) xe_bo_vunmap(bo); @@ -2304,6 +2388,13 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, !list_empty(&bo->pinned_link)); list_del_init(&bo->pinned_link); spin_unlock(&xe->pinned.lock); + + if (bo->backup_obj) { + if (xe_bo_is_pinned(bo->backup_obj)) + ttm_bo_unpin(&bo->backup_obj->ttm); + xe_bo_put(bo->backup_obj); + bo->backup_obj = NULL; + } } ttm_bo_unpin(&bo->ttm); if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm)) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 0a19b50045b2..8bc449c78cc7 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -277,6 +277,8 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type); int xe_bo_evict(struct xe_bo *bo, bool force_alloc); int xe_bo_evict_pinned(struct xe_bo *bo); +int xe_bo_notifier_prepare_pinned(struct xe_bo *bo); +int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); int xe_bo_dma_unmap_pinned(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 748360fd2439..ed3746d32b27 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -34,7 +34,13 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe, ret = pinned_fn(bo); if (ret && pinned_list != new_list) { spin_lock(&xe->pinned.lock); - list_move(&bo->pinned_link, pinned_list); + /* + * We might no longer be pinned, since PM notifier can + * call this. If the pinned link is now empty, keep it + * that way. + */ + if (!list_empty(&bo->pinned_link)) + list_move(&bo->pinned_link, pinned_list); spin_unlock(&xe->pinned.lock); } xe_bo_put(bo); @@ -46,6 +52,49 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe, return ret; } +/** + * xe_bo_notifier_prepare_all_pinned() - Pre-allocate the backing pages for all + * pinned VRAM objects which need to be saved. + * @xe: xe device + * + * Should be called from PM notifier when preparing for s3/s4. + * + * Return: 0 on success, negative error code on error. + */ +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe) +{ + int ret; + + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + if (!ret) + ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_prepare_pinned); + + return ret; +} + +/** + * xe_bo_notifier_unprepare_all_pinned() - Remove the backing pages for all + * pinned VRAM objects which have been restored. + * @xe: xe device + * + * Should be called from PM notifier after exiting s3/s4 (either on success or + * failure). + */ +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe) +{ + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.early.kernel_bo_present, + &xe->pinned.early.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); + + (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present, + &xe->pinned.late.kernel_bo_present, + xe_bo_notifier_unprepare_pinned); +} + /** * xe_bo_evict_all_user - evict all non-pinned user BOs from VRAM * @xe: xe device diff --git a/drivers/gpu/drm/xe/xe_bo_evict.h b/drivers/gpu/drm/xe/xe_bo_evict.h index e7f048634b32..e8385cb7f5e9 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.h +++ b/drivers/gpu/drm/xe/xe_bo_evict.h @@ -10,6 +10,8 @@ struct xe_device; int xe_bo_evict_all(struct xe_device *xe); int xe_bo_evict_all_user(struct xe_device *xe); +int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe); +void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe); int xe_bo_restore_early(struct xe_device *xe); int xe_bo_restore_late(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index d8a411d3ee96..38514cef817e 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -297,9 +297,22 @@ static int xe_pm_notifier_callback(struct notifier_block *nb, case PM_SUSPEND_PREPARE: xe_pm_runtime_get(xe); err = xe_bo_evict_all_user(xe); - xe_pm_runtime_put(xe); - if (err) + if (err) { drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err); + xe_pm_runtime_put(xe); + break; + } + + err = xe_bo_notifier_prepare_all_pinned(xe); + if (err) { + drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err); + xe_pm_runtime_put(xe); + } + break; + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + xe_bo_notifier_unprepare_all_pinned(xe); + xe_pm_runtime_put(xe); break; } -- 2.50.1