From 86f69c26113c3b35967f29db052d40a10317cac0 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 3 Apr 2025 11:24:42 +0100 Subject: [PATCH] drm/xe: use backup object for pinned save/restore MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently we move pinned objects, relying on the fact that the lpfn/fpfn will force the placement to occupy the same pages when restoring. However this then limits all such pinned objects to be contig underneath. In addition it is likely a little fragile moving pinned objects in the first place. Rather than moving such objects rather copy the page contents to a secondary system memory object, that way the VRAM pages never move and remain pinned. This also opens the door for eventually having non-contig pinned objects that can also be saved/restored using blitter. v2: - Make sure to drop the fence ref. - Handle NULL bo->migrate. v3: - Ensure we add the copy fence to the BOs, otherwise backup_obj can be freed before pipelined copy finishes. v4: - Rebase on newly added apply-to-pinned infra. Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1182 Signed-off-by: Matthew Auld Cc: Satyanarayana K V P Cc: Thomas Hellström Cc: Matthew Brost Reviewed-by: Satyanarayana K V P Link: https://lore.kernel.org/r/20250403102440.266113-10-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_bo.c | 315 ++++++++++++++++--------------- drivers/gpu/drm/xe/xe_bo_evict.c | 2 - drivers/gpu/drm/xe/xe_bo_types.h | 2 + 3 files changed, 167 insertions(+), 152 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 3c7c2353d3c8..1c6b2718fb11 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -916,79 +916,44 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_pm_runtime_get_noresume(xe); } - if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { - /* - * Kernel memory that is pinned should only be moved on suspend - * / resume, some of the pinned memory is required for the - * device to resume / use the GPU to move other evicted memory - * (user memory) around. This likely could be optimized a bit - * further where we find the minimum set of pinned memory - * required for resume but for simplity doing a memcpy for all - * pinned memory. - */ - ret = xe_bo_vmap(bo); - if (!ret) { - ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem); - - /* Create a new VMAP once kernel BO back in VRAM */ - if (!ret && resource_is_vram(new_mem)) { - struct xe_vram_region *vram = res_to_mem_region(new_mem); - void __iomem *new_addr = vram->mapping + - (new_mem->start << PAGE_SHIFT); + if (move_lacks_source) { + u32 flags = 0; - if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { - ret = -EINVAL; - xe_pm_runtime_put(xe); - goto out; - } + if (mem_type_is_vram(new_mem->mem_type)) + flags |= XE_MIGRATE_CLEAR_FLAG_FULL; + else if (handle_system_ccs) + flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - xe_assert(xe, new_mem->start == - bo->placements->fpfn); - - iosys_map_set_vaddr_iomem(&bo->vmap, new_addr); - } - } + fence = xe_migrate_clear(migrate, bo, new_mem, flags); } else { - if (move_lacks_source) { - u32 flags = 0; - - if (mem_type_is_vram(new_mem->mem_type)) - flags |= XE_MIGRATE_CLEAR_FLAG_FULL; - else if (handle_system_ccs) - flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA; - - fence = xe_migrate_clear(migrate, bo, new_mem, flags); - } - else - fence = xe_migrate_copy(migrate, bo, bo, old_mem, - new_mem, handle_system_ccs); - if (IS_ERR(fence)) { - ret = PTR_ERR(fence); - xe_pm_runtime_put(xe); - goto out; - } - if (!move_lacks_source) { - ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, - true, new_mem); - if (ret) { - dma_fence_wait(fence, false); - ttm_bo_move_null(ttm_bo, new_mem); - ret = 0; - } - } else { - /* - * ttm_bo_move_accel_cleanup() may blow up if - * bo->resource == NULL, so just attach the - * fence and set the new resource. - */ - dma_resv_add_fence(ttm_bo->base.resv, fence, - DMA_RESV_USAGE_KERNEL); + fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem, + handle_system_ccs); + } + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + xe_pm_runtime_put(xe); + goto out; + } + if (!move_lacks_source) { + ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true, + new_mem); + if (ret) { + dma_fence_wait(fence, false); ttm_bo_move_null(ttm_bo, new_mem); + ret = 0; } - - dma_fence_put(fence); + } else { + /* + * ttm_bo_move_accel_cleanup() may blow up if + * bo->resource == NULL, so just attach the + * fence and set the new resource. + */ + dma_resv_add_fence(ttm_bo->base.resv, fence, + DMA_RESV_USAGE_KERNEL); + ttm_bo_move_null(ttm_bo, new_mem); } + dma_fence_put(fence); xe_pm_runtime_put(xe); out: @@ -1125,59 +1090,90 @@ out_unref: */ int xe_bo_evict_pinned(struct xe_bo *bo) { - struct ttm_place place = { - .mem_type = XE_PL_TT, - }; - struct ttm_placement placement = { - .placement = &place, - .num_placement = 1, - }; - struct ttm_operation_ctx ctx = { - .interruptible = false, - .gfp_retry_mayfail = true, - }; - struct ttm_resource *new_mem; - int ret; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup; + bool unmap = false; + int ret = 0; - xe_bo_assert_held(bo); + xe_bo_lock(bo, false); - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + if (WARN_ON(!bo->ttm.resource)) { + ret = -EINVAL; + goto out_unlock_bo; + } - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + if (WARN_ON(!xe_bo_is_pinned(bo))) { + ret = -EINVAL; + goto out_unlock_bo; + } if (!xe_bo_is_vram(bo)) - return 0; + goto out_unlock_bo; - ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx); - if (ret) - return ret; + backup = xe_bo_create_locked(xe, NULL, NULL, bo->size, ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); + if (IS_ERR(backup)) { + ret = PTR_ERR(backup); + goto out_unlock_bo; + } + + if (xe_bo_is_user(bo)) { + struct xe_migrate *migrate; + struct dma_fence *fence; + + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); + + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_backup; + + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_backup; - if (!bo->ttm.ttm) { - bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0); - if (!bo->ttm.ttm) { - ret = -ENOMEM; - goto err_res_free; + fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource, + backup->ttm.resource, false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_backup; } - } - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_backup; - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_backup; + unmap = true; + } - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, + bo->size); + } - return 0; + bo->backup_obj = backup; -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); +out_backup: + xe_bo_vunmap(backup); + xe_bo_unlock(backup); + if (ret) + xe_bo_put(backup); +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); return ret; } @@ -1198,47 +1194,82 @@ int xe_bo_restore_pinned(struct xe_bo *bo) .interruptible = false, .gfp_retry_mayfail = false, }; - struct ttm_resource *new_mem; - struct ttm_place *place = &bo->placements[0]; + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); + struct xe_bo *backup = bo->backup_obj; + bool unmap = false; int ret; - xe_bo_assert_held(bo); + if (!backup) + return 0; - if (WARN_ON(!bo->ttm.resource)) - return -EINVAL; + xe_bo_lock(backup, false); - if (WARN_ON(!xe_bo_is_pinned(bo))) - return -EINVAL; + ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx); + if (ret) + goto out_backup; - if (WARN_ON(xe_bo_is_vram(bo))) - return -EINVAL; + if (WARN_ON(!dma_resv_trylock(bo->ttm.base.resv))) { + ret = -EBUSY; + goto out_backup; + } - if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo))) - return -EINVAL; + if (xe_bo_is_user(bo)) { + struct xe_migrate *migrate; + struct dma_fence *fence; - if (!mem_type_is_vram(place->mem_type)) - return 0; + if (bo->tile) + migrate = bo->tile->migrate; + else + migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type); - ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx); - if (ret) - return ret; + ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = ttm_bo_populate(&bo->ttm, &ctx); - if (ret) - goto err_res_free; + ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1); + if (ret) + goto out_unlock_bo; - ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1); - if (ret) - goto err_res_free; + fence = xe_migrate_copy(migrate, backup, bo, + backup->ttm.resource, bo->ttm.resource, + false); + if (IS_ERR(fence)) { + ret = PTR_ERR(fence); + goto out_unlock_bo; + } - ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL); - if (ret) - goto err_res_free; + dma_resv_add_fence(bo->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_resv_add_fence(backup->ttm.base.resv, fence, + DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); + } else { + ret = xe_bo_vmap(backup); + if (ret) + goto out_unlock_bo; - return 0; + if (iosys_map_is_null(&bo->vmap)) { + ret = xe_bo_vmap(bo); + if (ret) + goto out_unlock_bo; + unmap = true; + } + + xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, + bo->size); + } -err_res_free: - ttm_resource_free(&bo->ttm, &new_mem); + bo->backup_obj = NULL; + +out_unlock_bo: + if (unmap) + xe_bo_vunmap(bo); + xe_bo_unlock(bo); +out_backup: + xe_bo_vunmap(backup); + xe_bo_unlock(backup); + if (!bo->backup_obj) + xe_bo_put(backup); return ret; } @@ -2195,22 +2226,6 @@ int xe_bo_pin(struct xe_bo *bo) if (err) return err; - /* - * For pinned objects in on DGFX, which are also in vram, we expect - * these to be in contiguous VRAM memory. Required eviction / restore - * during suspend / resume (force restore to same physical address). - */ - if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { - if (mem_type_is_vram(place->mem_type)) { - xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS); - - place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) - - vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT; - place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT); - } - } - if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) { spin_lock(&xe->pinned.lock); list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index a1f0661e7b0c..f83444f7f34d 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -31,14 +31,12 @@ static int xe_bo_apply_to_pinned(struct xe_device *xe, list_move_tail(&bo->pinned_link, &still_in_list); spin_unlock(&xe->pinned.lock); - xe_bo_lock(bo, false); ret = pinned_fn(bo); if (ret && pinned_list != new_list) { spin_lock(&xe->pinned.lock); list_move(&bo->pinned_link, pinned_list); spin_unlock(&xe->pinned.lock); } - xe_bo_unlock(bo); xe_bo_put(bo); spin_lock(&xe->pinned.lock); } diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 15a92e3d4898..81396181aaea 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -28,6 +28,8 @@ struct xe_vm; struct xe_bo { /** @ttm: TTM base buffer object */ struct ttm_buffer_object ttm; + /** @backup_obj: The backup object when pinned and suspended (vram only) */ + struct xe_bo *backup_obj; /** @size: Size of this buffer object */ size_t size; /** @flags: flags for this buffer object */ -- 2.50.1