From fb796c308767606bbd6c2e1bf4fa9446ec68ce51 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 27 Aug 2024 15:44:43 +0530 Subject: [PATCH 01/16] drm/amdgpu: add gfx eviction fence helpers This patch adds basic eviction fence framework for the gfx buffers. The idea is to: - One eviction fence is created per gfx process, at kms_open. - This fence is attached to all the gem buffers created by this process. - This fence is detached to all the gem buffers at postclose_kms. This framework will be further used for usermode queues. V2: Addressed review comments from Christian - keep fence_ctx and fence_seq directly in fpriv - evcition_fence should be dynamically allocated - do not save eviction fence instance in BO, there could be many such fences attached to one BO - use dma_resv_replace_fence() in detach V3: Addressed review comments from Christian - eviction fence create and destroy functions should be called only once from fpriv create/destroy - use dma_fence_put() in eviction_fence_destroy V4: Addressed review comments from Christian: - create a separate ev_fence_mgr structure - cleanup fence init part - do not add a domain for fence owner KGD V5: Addressed review comments from Christian: - drop the dma_fence_is_signaled check - use a local variable to access evf_mgr->ev_fence under the spin_lock() multiple places - remove the vm->is_compute_ctx check to attach gfx eviction fence, in gem_object_open V6: Addressed review comments from Christian: - drop the return value from eviction_fence_signal - reserve_fence should be the first thing inside the attach_eviction_fence function, also keep the resv_add_fence inside the lock - remove the unwanted ev_fence check inside detach function - fix wrong variable check in eviction_fence_init function - return the error value of eviction_fence_init to the caller, dont keep it void. - fail gem_object_open if attaching of eviction_fence fails - detach the eviction fence only when amdgpu_vm_is_bo_always_valid is not true. V7: Addressed review comments from Christian: - Do not add a uq_mgr ptr in ev_fence, rather add evf_mgr V8: Move eviction fence enabling into separate patch for CI Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 144 ++++++++++++++++++ .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 63 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 + 6 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 1cc307904089..0b0c8ec46516 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o amdgpu_userq_fence.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index be10fbe293e4..8c6c3c5451e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -114,6 +114,7 @@ #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" #include "amdgpu_userqueue.h" +#include "amdgpu_eviction_fence.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -490,7 +491,6 @@ struct amdgpu_flip_work { bool async; }; - /* * file private structure */ @@ -504,6 +504,10 @@ struct amdgpu_fpriv { struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_userq_mgr userq_mgr; + + /* Eviction fence infra */ + struct amdgpu_eviction_fence_mgr evf_mgr; + /** GPU partition selection */ uint32_t xcp_id; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c new file mode 100644 index 000000000000..056798e2b050 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include "amdgpu.h" + +static const char * +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu"; +} + +static const char * +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_eviction_fence *ef; + + ef = container_of(f, struct amdgpu_eviction_fence, base); + return ef->timeline_name; +} + +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, +}; + +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + spin_lock(&evf_mgr->ev_fence_lock); + dma_fence_signal(&evf_mgr->ev_fence->base); + spin_unlock(&evf_mgr->ev_fence_lock); +} + +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL); + if (!ev_fence) + return NULL; + + ev_fence->evf_mgr = evf_mgr; + get_task_comm(ev_fence->timeline_name, current); + spin_lock_init(&ev_fence->lock); + dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops, + &ev_fence->lock, evf_mgr->ev_fence_ctx, + atomic_inc_return(&evf_mgr->ev_fence_seq)); + return ev_fence; +} + +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + + if (!ev_fence) + return; + + /* Last unref of ev_fence */ + dma_fence_put(&evf_mgr->ev_fence->base); +} + +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *ef; + struct amdgpu_eviction_fence *ev_fence; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; + + if (!resv) + return 0; + + ret = dma_resv_reserve_fences(resv, 1); + if (ret) { + DRM_DEBUG_DRIVER("Failed to resv fence space\n"); + return ret; + } + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) { + ef = dma_fence_get(&ev_fence->base); + dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP); + } + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} + +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *stub = dma_fence_get_stub(); + + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, + stub, DMA_RESV_USAGE_BOOKKEEP); + dma_fence_put(stub); +} + +int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + /* This needs to be done one time per open */ + atomic_set(&evf_mgr->ev_fence_seq, 0); + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); + spin_lock_init(&evf_mgr->ev_fence_lock); + + ev_fence = amdgpu_eviction_fence_create(evf_mgr); + if (!ev_fence) { + DRM_ERROR("Failed to craete eviction fence\n"); + return -ENOMEM; + } + + spin_lock(&evf_mgr->ev_fence_lock); + evf_mgr->ev_fence = ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h new file mode 100644 index 000000000000..aba12a43f433 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_EV_FENCE_H_ +#define AMDGPU_EV_FENCE_H_ + +struct amdgpu_eviction_fence { + struct dma_fence base; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + struct amdgpu_eviction_fence_mgr *evf_mgr; +}; + +struct amdgpu_eviction_fence_mgr { + u64 ev_fence_ctx; + atomic_t ev_fence_seq; + spinlock_t ev_fence_lock; + struct amdgpu_eviction_fence *ev_fence; +}; + +/* Eviction fence helper functions */ +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); + +int +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +void +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +int +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 06171eab6e92..682b76cad359 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -293,6 +293,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, bo_va = amdgpu_vm_bo_add(adev, vm, abo); else ++bo_va->ref_count; + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2e07776dd408..700442584f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1413,6 +1413,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); + if (r) + goto error_vm; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev); @@ -1486,6 +1490,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); -- 2.51.0 From 30e4d781385dda92fdee574b0a95094dfa143b52 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 3 Jun 2024 11:13:03 +0200 Subject: [PATCH 02/16] drm/amdgpu: add userqueue suspend/resume functions This patch adds userqueue suspend/resume functions at core MES V11 IP level. V2: use true/false for queue_active status (Christian) added Christian's R-B V3: reset/set queue status in mqd.create and mqd.destroy Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 33 +++++++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 5 +++ 2 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index fe4efe5ba6ac..ee0a757fcfa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -331,6 +331,7 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_ctx; } + queue->queue_active = true; return 0; free_ctx: @@ -354,9 +355,41 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + queue->queue_active = false; +} + +static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + if (queue->queue_active) { + mes_v11_0_userq_unmap(uq_mgr, queue); + queue->queue_active = false; + } + + return 0; +} + +static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) +{ + int ret; + + if (queue->queue_active) + return 0; + + ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop); + if (ret) { + DRM_ERROR("Failed to resume queue\n"); + return ret; + } + + queue->queue_active = true; + return 0; } const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { .mqd_create = mes_v11_0_userq_mqd_create, .mqd_destroy = mes_v11_0_userq_mqd_destroy, + .suspend = mes_v11_0_userq_suspend, + .resume = mes_v11_0_userq_resume, }; diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index b942f3f5ea35..ede0178e0332 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -37,6 +37,7 @@ struct amdgpu_userq_obj { struct amdgpu_usermode_queue { int queue_type; + uint8_t queue_active; uint64_t doorbell_handle; uint64_t doorbell_index; uint64_t flags; @@ -57,6 +58,10 @@ struct amdgpu_userq_funcs { struct amdgpu_usermode_queue *queue); void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *uq); + int (*suspend)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); + int (*resume)(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue); }; /* Usermode queues for gfx */ -- 2.51.0 From b0328087c179f47ea6558c3b91b4487c5e10deda Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:59:49 +0100 Subject: [PATCH 03/16] drm/amdgpu: suspend gfx userqueues This patch adds suspend support for gfx userqueues. It typically does the following: - adds an enable_signaling function for the eviction fence, so that it can trigger the userqueue suspend, - adds a delayed work to handle suspending of the eviction_fence - adds a suspend function to handle suspending of userqueues which suspends all the queues under this userq manager and signals the eviction fence, - adds a function to replace the old eviction fence with a new one and attach it to each of the objects, - adds reference of userq manager in the eviction fence container so that it can be used in the suspend function. V2: Addressed Christian's review comments: - schedule suspend work immediately V4: Addressed Christian's review comments: - wait for pending uq fences before starting suspend, added queue->last_fence for the same - accommodate ev_fence_mgr into existing code - some bug fixes and NULL checks V5: Addressed Christian's review comments (gitlab) - Wait for eviction fence to get signaled in destroy, don't signal it - Wait for eviction fence to get signaled in replace fence, don't signal it V6: Addressed Christian's review comments - Do not destroy the old eviction fence until we have it replaced - Change the sequence of fence replacement sub-tasks - reusing the ev_fence delayed work for userqueue suspend as well (Shashank). V7: Addressed Christian's review comments - give evf_mgr as argument (instead of fpriv) to replace_fence() - save ptr to evf_mgr in ev_fence (instead of uq_mgr) - modify suspend_all_queues logic to reflect error properly - remove the garbage drm_exec_lock section in wait_for_signal - grab the userqueue mutex before starting the wait for fence - remove the unrelated gobj check from signal_ioctl V8: Added race condition fixes Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 127 ++++++++++++++++++ .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 4 + .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 37 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 110 +++++++++++++++ .../gpu/drm/amd/include/amdgpu_userqueue.h | 9 ++ 5 files changed, 276 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 056798e2b050..189afb872775 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -22,8 +22,12 @@ * */ #include +#include #include "amdgpu.h" +#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name) +#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr) + static const char * amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) { @@ -39,10 +43,131 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) return ef->timeline_name; } +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec) +{ + struct amdgpu_eviction_fence *old_ef, *new_ef; + struct drm_gem_object *obj; + unsigned long index; + int ret; + + /* + * Steps to replace eviction fence: + * * lock all objects in exec (caller) + * * create a new eviction fence + * * update new eviction fence in evf_mgr + * * attach the new eviction fence to BOs + * * release the old fence + * * unlock the objects (caller) + */ + new_ef = amdgpu_eviction_fence_create(evf_mgr); + if (!new_ef) { + DRM_ERROR("Failed to create new eviction fence\n"); + return -ENOMEM; + } + + /* Update the eviction fence now */ + spin_lock(&evf_mgr->ev_fence_lock); + old_ef = evf_mgr->ev_fence; + evf_mgr->ev_fence = new_ef; + spin_unlock(&evf_mgr->ev_fence_lock); + + /* Attach the new fence */ + drm_exec_for_each_locked_object(exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (!bo) + continue; + ret = amdgpu_eviction_fence_attach(evf_mgr, bo); + if (ret) { + DRM_ERROR("Failed to attch new eviction fence\n"); + goto free_err; + } + } + + /* Free old fence */ + dma_fence_put(&old_ef->base); + return 0; + +free_err: + kfree(new_ef); + return ret; +} + +static void +amdgpu_eviction_fence_suspend_worker(struct work_struct *work) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); + struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); + struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va *bo_va; + struct drm_exec exec; + bool userq_active = amdgpu_userqueue_active(uq_mgr); + int ret; + + + /* For userqueues, the fence replacement happens in resume path */ + if (userq_active) { + amdgpu_userqueue_suspend(uq_mgr); + return; + } + + /* Signal old eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr); + + /* Prepare the objects to replace eviction fence */ + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_drm; + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + struct amdgpu_bo *bo = bo_va->base.bo; + + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_drm; + } + } + + /* Replace old eviction fence with new one */ + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + DRM_ERROR("Failed to replace eviction fence\n"); + +unlock_drm: + drm_exec_fini(&exec); +} + +static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_eviction_fence_mgr *evf_mgr; + struct amdgpu_eviction_fence *ev_fence; + + if (!f) + return true; + + ev_fence = to_ev_fence(f); + evf_mgr = ev_fence->evf_mgr; + + schedule_delayed_work(&evf_mgr->suspend_work, 0); + return true; +} + static const struct dma_fence_ops amdgpu_eviction_fence_ops = { .use_64bit_seqno = true, .get_driver_name = amdgpu_eviction_fence_get_driver_name, .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, + .enable_signaling = amdgpu_eviction_fence_enable_signaling, }; void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) @@ -140,5 +265,7 @@ int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) spin_lock(&evf_mgr->ev_fence_lock); evf_mgr->ev_fence = ev_fence; spin_unlock(&evf_mgr->ev_fence_lock); + + INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index aba12a43f433..12f168ec3ef0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -37,6 +37,7 @@ struct amdgpu_eviction_fence_mgr { atomic_t ev_fence_seq; spinlock_t ev_fence_lock; struct amdgpu_eviction_fence *ev_fence; + struct delayed_work suspend_work; }; /* Eviction fence helper functions */ @@ -60,4 +61,7 @@ amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); +int +amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct drm_exec *exec); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 6157a540c929..877cb17a14e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -466,6 +466,16 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } + /* Save the fence to wait for during suspend */ + mutex_lock(&userq_mgr->userq_mutex); + + /* Retrieve the user queue */ + queue = idr_find(&userq_mgr->userq_idr, args->queue_id); + if (!queue) { + r = -ENOENT; + mutex_unlock(&userq_mgr->userq_mutex); + } + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -473,30 +483,35 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; - } - - /*Retrieve the user queue */ - queue = idr_find(&userq_mgr->userq_idr, args->queue_id); - if (!queue) { - r = -ENOENT; - goto exec_fini; + } } r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } /* Create a new fence */ r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); goto exec_fini; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); for (i = 0; i < num_read_bo_handles; i++) { if (!gobj_read || !gobj_read[i]->resv) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index e946c6d1dd6b..43fff8869ac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -60,6 +60,16 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, { struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; + struct dma_fence *f = queue->last_fence; + int ret; + + if (f && !dma_fence_is_signaled(f)) { + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + DRM_ERROR("Timed out waiting for fence f=%p\n", f); + return; + } + } uq_funcs->mqd_destroy(uq_mgr, queue); amdgpu_userq_fence_driver_free(queue); @@ -67,6 +77,22 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, kfree(queue); } +int +amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + mutex_lock(&uq_mgr->userq_mutex); + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += queue->queue_active; + + mutex_unlock(&uq_mgr->userq_mutex); + return ret; +} + #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ static struct amdgpu_usermode_queue * amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) @@ -202,6 +228,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); + uq_mgr->num_userqs--; mutex_unlock(&uq_mgr->userq_mutex); return 0; } @@ -277,6 +304,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } args->out.queue_id = qid; + uq_mgr->num_userqs++; unlock: mutex_unlock(&uq_mgr->userq_mutex); @@ -317,11 +345,93 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, } #endif +static int +amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; + + /* Try to suspend all the queues in this process ctx */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret += userq_funcs->suspend(uq_mgr, queue); + + if (ret) + DRM_ERROR("Couldn't suspend all the queues\n"); + return ret; +} + +static int +amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_usermode_queue *queue; + int queue_id, ret; + + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + struct dma_fence *f = queue->last_fence; + + if (!f || dma_fence_is_signaled(f)) + continue; + ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); + if (ret <= 0) { + DRM_ERROR("Timed out waiting for fence f=%p\n", f); + return -ETIMEDOUT; + } + } + + return 0; +} + +void +amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) +{ + int ret; + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + + mutex_lock(&uq_mgr->userq_mutex); + + /* Wait for any pending userqueue fence to signal */ + ret = amdgpu_userqueue_wait_for_signal(uq_mgr); + if (ret) { + DRM_ERROR("Not suspending userqueue, timeout waiting for work\n"); + goto unlock; + } + + ret = amdgpu_userqueue_suspend_all(uq_mgr); + if (ret) { + DRM_ERROR("Failed to evict userqueue\n"); + goto unlock; + } + + /* Signal current eviction fence */ + amdgpu_eviction_fence_signal(evf_mgr); + + /* Cleanup old eviction fence entry */ + amdgpu_eviction_fence_destroy(evf_mgr); + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { + struct amdgpu_fpriv *fpriv; + mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; + userq_mgr->num_userqs = 0; + + fpriv = uq_mgr_to_fpriv(userq_mgr); + if (!fpriv->evf_mgr.ev_fence) { + DRM_ERROR("Eviction fence not initialized yet\n"); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index ede0178e0332..7781ee59653b 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -27,6 +27,9 @@ #define AMDGPU_MAX_USERQ_COUNT 512 +#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) +#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) + struct amdgpu_mqd_prop; struct amdgpu_userq_obj { @@ -50,6 +53,7 @@ struct amdgpu_usermode_queue { struct amdgpu_userq_obj wptr_obj; struct xarray fence_drv_xa; struct amdgpu_userq_fence_driver *fence_drv; + struct dma_fence *last_fence; }; struct amdgpu_userq_funcs { @@ -69,6 +73,7 @@ struct amdgpu_userq_mgr { struct idr userq_idr; struct mutex userq_mutex; struct amdgpu_device *adev; + int num_userqs; }; int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -83,4 +88,8 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); + +void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr); + +int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); #endif -- 2.51.0 From 44cfdf368fb72c03e4137709803d58288a22cb06 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 19:02:26 +0100 Subject: [PATCH 04/16] drm/amdgpu: resume gfx userqueues This patch adds support for userqueue resume. What it typically does is this: - adds a new delayed work for resuming all the queues. - schedules this delayed work from the suspend work. - validates the BOs and replaces the eviction fence before resuming all the queues running under this instance of userq manager. V2: Addressed Christian's review comments: - declare local variables like ret at the bottom. - lock all the object first, then start attaching the new fence. - dont replace old eviction fence, just attach new eviction fence. - no error logs for drm_exec_lock failures - no need to reserve bos after drm_exec_locked - schedule the resume worker immediately (not after 100 ms) - check for NULL BO (Arvind) V5: Rebased wrt changes in suspend patch - moved amdgpu_userqueue_validate_vm_bo in this patch - initialized ret in resume_all V6: Rebase V7: Addressed review comments from Christian - Do not use list_for_each_safe() with vm->invalidated, its not correct way V8: Fixed the race condition between suspend/close/fence Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 183 +++++++++++++++++- .../gpu/drm/amd/include/amdgpu_userqueue.h | 3 +- 3 files changed, 181 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index 12f168ec3ef0..787182bd1069 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -38,6 +38,7 @@ struct amdgpu_eviction_fence_mgr { spinlock_t ev_fence_lock; struct amdgpu_eviction_fence *ev_fence; struct delayed_work suspend_work; + uint8_t fd_closing; }; /* Eviction fence helper functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 43fff8869ac9..57e502d014a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -22,6 +22,7 @@ * */ +#include #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_userqueue.h" @@ -216,6 +217,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; struct amdgpu_usermode_queue *queue; + cancel_delayed_work(&uq_mgr->resume_work); mutex_lock(&uq_mgr->userq_mutex); queue = amdgpu_userqueue_find(uq_mgr, queue_id); @@ -228,7 +230,6 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id) amdgpu_bo_unpin(queue->db_obj.obj); amdgpu_bo_unref(&queue->db_obj.obj); amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id); - uq_mgr->num_userqs--; mutex_unlock(&uq_mgr->userq_mutex); return 0; } @@ -304,10 +305,18 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } args->out.queue_id = qid; - uq_mgr->num_userqs++; unlock: mutex_unlock(&uq_mgr->userq_mutex); + if (!r) { + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + */ + flush_delayed_work(&uq_mgr->resume_work); + } + return r; } @@ -345,6 +354,161 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, } #endif +static int +amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_device *adev = uq_mgr->adev; + const struct amdgpu_userq_funcs *userq_funcs; + struct amdgpu_usermode_queue *queue; + int queue_id; + int ret = 0; + + userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; + + /* Resume all the queues for this process */ + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + ret = userq_funcs->resume(uq_mgr, queue); + + if (ret) + DRM_ERROR("Failed to resume all the queue\n"); + return ret; +} + +static int +amdgpu_userqueue_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + DRM_ERROR("Fail to validate\n"); + + return ret; +} + +static int +amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr) +{ + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_device *adev = uq_mgr->adev; + struct amdgpu_bo_va *bo_va; + struct ww_acquire_ctx *ticket; + struct drm_exec exec; + struct amdgpu_bo *bo; + struct dma_resv *resv; + bool clear, unlock; + int ret = 0; + + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + ret = amdgpu_vm_lock_pd(vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) { + DRM_ERROR("Failed to lock PD\n"); + goto unlock_all; + } + + /* Lock the done list */ + list_for_each_entry(bo_va, &vm->done, base.vm_status) { + bo = bo_va->base.bo; + if (!bo) + continue; + + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unlock_all; + } + } + + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + + /* Per VM BOs never need to bo cleared in the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) + goto unlock_all; + spin_lock(&vm->status_lock); + } + + ticket = &exec.ticket; + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.base.resv; + spin_unlock(&vm->status_lock); + + bo = bo_va->base.bo; + ret = amdgpu_userqueue_validate_vm_bo(NULL, bo); + if (ret) { + DRM_ERROR("Failed to validate BO\n"); + goto unlock_all; + } + + /* Try to reserve the BO to avoid clearing its ptes */ + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; + /* Somebody else is using the BO right now */ + } else { + clear = true; + unlock = false; + } + + ret = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); + if (ret) + goto unlock_all; + + spin_lock(&vm->status_lock); + } + spin_unlock(&vm->status_lock); + + ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); + if (ret) + DRM_ERROR("Failed to replace eviction fence\n"); + +unlock_all: + drm_exec_fini(&exec); + return ret; +} + +static void amdgpu_userqueue_resume_worker(struct work_struct *work) +{ + struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); + int ret; + + mutex_lock(&uq_mgr->userq_mutex); + + ret = amdgpu_userqueue_validate_bos(uq_mgr); + if (ret) { + DRM_ERROR("Failed to validate BOs to restore\n"); + goto unlock; + } + + ret = amdgpu_userqueue_resume_all(uq_mgr); + if (ret) { + DRM_ERROR("Failed to resume all queues\n"); + goto unlock; + } + +unlock: + mutex_unlock(&uq_mgr->userq_mutex); +} + static int amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) { @@ -393,6 +557,7 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; + mutex_lock(&uq_mgr->userq_mutex); /* Wait for any pending userqueue fence to signal */ @@ -411,8 +576,14 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) /* Signal current eviction fence */ amdgpu_eviction_fence_signal(evf_mgr); - /* Cleanup old eviction fence entry */ - amdgpu_eviction_fence_destroy(evf_mgr); + if (evf_mgr->fd_closing) { + mutex_unlock(&uq_mgr->userq_mutex); + cancel_delayed_work(&uq_mgr->resume_work); + return; + } + + /* Schedule a resume work */ + schedule_delayed_work(&uq_mgr->resume_work, 0); unlock: mutex_unlock(&uq_mgr->userq_mutex); @@ -425,7 +596,6 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; - userq_mgr->num_userqs = 0; fpriv = uq_mgr_to_fpriv(userq_mgr); if (!fpriv->evf_mgr.ev_fence) { @@ -433,6 +603,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi return -EINVAL; } + INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker); return 0; } @@ -441,6 +612,8 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) uint32_t queue_id; struct amdgpu_usermode_queue *queue; + cancel_delayed_work(&userq_mgr->resume_work); + idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 7781ee59653b..2bf28f3454cb 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -29,6 +29,7 @@ #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base) #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr) +#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name) struct amdgpu_mqd_prop; @@ -73,7 +74,7 @@ struct amdgpu_userq_mgr { struct idr userq_idr; struct mutex userq_mutex; struct amdgpu_device *adev; - int num_userqs; + struct delayed_work resume_work; }; int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -- 2.51.0 From b8e6d3f68c3bd1ac54492e210ece87475e7f862b Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:04:33 +0100 Subject: [PATCH 05/16] drm/amdgpu: handle eviction fence race MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The eviction process can get into a race condition between the eviction fence suspend work (which replaces the old fence with new) and kms_close (which destroys the fence and doesn't expect a new one). This patch: - adds a flag to indicate that fd is closing, so fence replacement is not required (evf_mgr->fd_closing) - adds a flush_work() during the ev_fence_destroy routine V2: Addressed review comments from Christian: - Do not use mutex to sync - Use flush_work and wait for suspend_work to be done V3: Fixed state machine for queue->active, which adds into race between suspend/resume and queue ops Cc: Alex Deucher Cc: Christian König Reviewed-by: Christian König Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 3 ++- drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 9 +++++---- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 189afb872775..c22767a75348 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -117,6 +117,10 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) /* Signal old eviction fence */ amdgpu_eviction_fence_signal(evf_mgr); + /* Do not replace eviction fence is fd is getting closed */ + if (evf_mgr->fd_closing) + return; + /* Prepare the objects to replace eviction fence */ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -199,6 +203,9 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) { struct amdgpu_eviction_fence *ev_fence; + /* Wait for any pending work to execute */ + flush_delayed_work(&evf_mgr->suspend_work); + spin_lock(&evf_mgr->ev_fence_lock); ev_fence = evf_mgr->ev_fence; spin_unlock(&evf_mgr->ev_fence_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 700442584f97..f8370da08038 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1490,10 +1490,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); - amdgpu_userq_mgr_fini(&fpriv->userq_mgr); if (pasid) amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 57e502d014a1..cba51bdf2e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -614,9 +614,10 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) cancel_delayed_work(&userq_mgr->resume_work); + mutex_lock(&userq_mgr->userq_mutex); idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id); - idr_destroy(&userq_mgr->userq_idr); + mutex_unlock(&userq_mgr->userq_mutex); mutex_destroy(&userq_mgr->userq_mutex); } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index ee0a757fcfa3..b1b7bc47d39f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -139,6 +139,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, return r; } + queue->queue_active = true; DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index); return 0; } @@ -160,6 +161,7 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, amdgpu_mes_unlock(&adev->mes); if (r) DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r); + queue->queue_active = false; } static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, @@ -331,7 +333,6 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_ctx; } - queue->queue_active = true; return 0; free_ctx: @@ -350,12 +351,12 @@ static void mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { - mes_v11_0_userq_unmap(uq_mgr, queue); - amdgpu_bo_unref(&queue->wptr_obj.obj); + if (queue->queue_active) + mes_v11_0_userq_unmap(uq_mgr, queue); + amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); - queue->queue_active = false; } static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, -- 2.51.0 From 825f82cf936aea7f2d25d47efd27f90ba90e4ee2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 13:58:23 -0400 Subject: [PATCH 06/16] drm/amdgpu: add some additional members to amdgpu_mqd_prop These are needed to make userqueue infrastructure generic. Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8c6c3c5451e9..ea5a1e6f01c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -830,6 +830,9 @@ struct amdgpu_mqd_prop { uint32_t hqd_queue_priority; bool allow_tunneling; bool hqd_active; + uint64_t shadow_addr; + uint64_t gds_bkup_addr; + uint64_t csa_addr; }; struct amdgpu_mqd { -- 2.51.0 From 7179439e34bbeef28e1b5083c365e7295b07f9bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:14:34 -0400 Subject: [PATCH 07/16] drm/amdgpu/gfx11: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address (Shashank) V3: Restore lower_32_bits() for MQD addresses (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 933c4ad5eff9..b8f75e1ba72c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4127,6 +4127,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); + mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + return 0; } -- 2.51.0 From f2234816a31d0ec85ab63899762ec962ab682704 Mon Sep 17 00:00:00 2001 From: Amaranath Somalapuram Date: Wed, 27 Nov 2024 17:06:45 +0100 Subject: [PATCH 08/16] drm/amdgpu: fix IGT CI regression with eviction fence This patch fixes one of the regressions in eviction fence code with IGT tests. Reviewed-by: Shashank Sharma Signed-off-by: Amaranath Somalapuram Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index c22767a75348..f7fb1674278c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -136,6 +136,9 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) if (!bo) continue; + if (vm != bo_va->base.vm) + continue; + ret = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); if (unlikely(ret)) -- 2.51.0 From ab328d9a7b614ba8c6f63096eae817dd6e7f72f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:12 -0400 Subject: [PATCH 09/16] drm/amdgpu/gfx12: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address mask (Shashank) V3: Use lower_32_bits() for MQD objects (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 4f9a73bae332..8b409b4a1cb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3027,6 +3027,12 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* active the queue */ mqd->cp_gfx_hqd_active = 1; + /* set gfx UQ items */ + mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); + mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); + mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); + mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + return 0; } -- 2.51.0 From d07a7fcb8d25724351b7f9c03739b814da343b72 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:31 -0400 Subject: [PATCH 10/16] drm/amdgpu/sdma6: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower address mask (Shashank) V3: Use lower_32_bits for MQD objects (Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d4e1b2cd1f35..fe389379e890 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -892,6 +892,9 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } -- 2.51.0 From 21926b5db8c1d4e82b47bbd484b085a0e604bfd6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Oct 2024 14:15:51 -0400 Subject: [PATCH 11/16] drm/amdgpu/sdma7: update mqd init for UQ Set the addresses for the UQ metadata. V2: Fix lower offset mask (Shashank) V2: Use lower_32_bits for mqd objects(Alex) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index b2706221df99..3a0f38fc003e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -935,6 +935,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd, m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT; m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_QUEUE0_DUMMY_REG_DEFAULT; + m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); + m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); + return 0; } -- 2.51.0 From b965c5d87108add7941528569d7acdfabcd86b55 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Oct 2024 15:16:12 +0200 Subject: [PATCH 12/16] drm/amdgpu/uq: remove gfx11 specifics from UQ setup This can all be handled by in the IP specific mpd init code. V2: Removed setting of gds_va, which was removed during UAPI review (Shashank) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c | 83 ++++++++----------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c index b1b7bc47d39f..1ba6b91b03c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c @@ -23,8 +23,6 @@ */ #include "amdgpu.h" #include "amdgpu_gfx.h" -#include "v11_structs.h" -#include "mes_v11_0.h" #include "mes_v11_0_userqueue.h" #include "amdgpu_userq_fence.h" @@ -183,52 +181,6 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return r; } - /* Shadow, GDS and CSA objects come directly from userspace */ - if (mqd_user->ip_type == AMDGPU_HW_IP_GFX) { - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid GFX MQD\n"); - return -EINVAL; - } - - mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_gfx_v11)) { - DRM_ERROR("Failed to read user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC; - mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va); - - mqd->gds_bkup_base_lo = 0; - mqd->gds_bkup_base_hi = 0; - - mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC; - mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va); - kfree(mqd_gfx_v11); - } else if (mqd_user->ip_type == AMDGPU_HW_IP_DMA) { - struct v11_sdma_mqd *mqd = queue->mqd.cpu_ptr; - struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; - - if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { - DRM_ERROR("Invalid SDMA MQD\n"); - return -EINVAL; - } - - mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); - if (IS_ERR(mqd_sdma_v11)) { - DRM_ERROR("Failed to read sdma user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; - } - - mqd->sdmax_rlcx_csa_addr_lo = mqd_sdma_v11->csa_va & 0xFFFFFFFC; - mqd->sdmax_rlcx_csa_addr_hi = upper_32_bits(mqd_sdma_v11->csa_va); - } - return 0; } @@ -300,6 +252,41 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; userq_props->hqd_active = false; kfree(compute_mqd); + } else if (queue->queue_type == AMDGPU_HW_IP_GFX) { + struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid GFX MQD\n"); + return -EINVAL; + } + + mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_gfx_v11)) { + DRM_ERROR("Failed to read user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + userq_props->shadow_addr = mqd_gfx_v11->shadow_va; + userq_props->csa_addr = mqd_gfx_v11->csa_va; + kfree(mqd_gfx_v11); + } else if (queue->queue_type == AMDGPU_HW_IP_DMA) { + struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11; + + if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { + DRM_ERROR("Invalid SDMA MQD\n"); + return -EINVAL; + } + + mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); + if (IS_ERR(mqd_sdma_v11)) { + DRM_ERROR("Failed to read sdma user MQD\n"); + amdgpu_userqueue_destroy_object(uq_mgr, ctx); + return -ENOMEM; + } + + userq_props->csa_addr = mqd_sdma_v11->csa_va; + kfree(mqd_sdma_v11); } queue->userq_prop = userq_props; -- 2.51.0 From 79819d9a0ac38bf83ac712e00be2d53104895b84 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Nov 2024 15:45:19 +0100 Subject: [PATCH 13/16] drm/amdgpu/uq: make MES UQ setup generic Now that all of the IP specific code has been moved into the IP specific functions, we can make this code generic. V2: Fixed build errors and porting logics (Shashank) Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +-- ...{mes_v11_0_userqueue.c => mes_userqueue.c} | 77 ++++++++++--------- ...{mes_v11_0_userqueue.h => mes_userqueue.h} | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +- 5 files changed, 51 insertions(+), 48 deletions(-) rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.c => mes_userqueue.c} (84%) rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.h => mes_userqueue.h} (91%) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 0b0c8ec46516..513c4d64f554 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -177,7 +177,7 @@ amdgpu-y += \ mes_v12_0.o \ # add GFX userqueue support -amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o +amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_userqueue.o # add UVD block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b8f75e1ba72c..63b7c7bfcc4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -48,7 +48,7 @@ #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #define GFX11_NUM_GFX_RINGS 1 @@ -1623,8 +1623,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1640,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMD_USERQ_GFX - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c similarity index 84% rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 1ba6b91b03c4..9c2fc8ae0d56 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -23,14 +23,15 @@ */ #include "amdgpu.h" #include "amdgpu_gfx.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" #include "amdgpu_userq_fence.h" +#include "v11_structs.h" #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE static int -mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_bo *bo) +mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo) { int ret; @@ -58,7 +59,7 @@ err_reserve_bo_failed: } static int -mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, +mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue, uint64_t wptr) { @@ -86,7 +87,7 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return -EINVAL; } - ret = mes_v11_0_map_gtt_bo_to_gart(wptr_obj->obj); + ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj); if (ret) { DRM_ERROR("Failed to map wptr bo to GART\n"); return ret; @@ -96,9 +97,9 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct amdgpu_mqd_prop *userq_props) +static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct amdgpu_mqd_prop *userq_props) { struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_userq_obj *ctx = &queue->fw_obj; @@ -142,8 +143,8 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) +static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct mes_remove_queue_input queue_input; @@ -162,9 +163,9 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr, queue->queue_active = false; } -static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct drm_amdgpu_userq_in *mqd_user) +static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_usermode_queue *queue, + struct drm_amdgpu_userq_in *mqd_user) { struct amdgpu_userq_obj *ctx = &queue->fw_obj; int r, size; @@ -184,7 +185,7 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) +static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) { struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; @@ -192,9 +193,9 @@ static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue) mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); } -static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, - struct drm_amdgpu_userq_in *args_in, - struct amdgpu_usermode_queue *queue) +static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, + struct drm_amdgpu_userq_in *args_in, + struct amdgpu_usermode_queue *queue) { struct amdgpu_device *adev = uq_mgr->adev; struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type]; @@ -257,14 +258,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) { DRM_ERROR("Invalid GFX MQD\n"); - return -EINVAL; + r = -EINVAL; + goto free_mqd; } mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); if (IS_ERR(mqd_gfx_v11)) { DRM_ERROR("Failed to read user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; + r = -ENOMEM; + goto free_mqd; } userq_props->shadow_addr = mqd_gfx_v11->shadow_va; @@ -275,14 +277,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) { DRM_ERROR("Invalid SDMA MQD\n"); - return -EINVAL; + r = -EINVAL; + goto free_mqd; } mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size); if (IS_ERR(mqd_sdma_v11)) { DRM_ERROR("Failed to read sdma user MQD\n"); - amdgpu_userqueue_destroy_object(uq_mgr, ctx); - return -ENOMEM; + r = -ENOMEM; + goto free_mqd; } userq_props->csa_addr = mqd_sdma_v11->csa_va; @@ -298,23 +301,23 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, } /* Create BO for FW operations */ - r = mes_v11_0_userq_create_ctx_space(uq_mgr, queue, mqd_user); + r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user); if (r) { DRM_ERROR("Failed to allocate BO for userqueue (%d)", r); goto free_mqd; } - mes_v11_0_userq_set_fence_space(queue); + mes_userq_set_fence_space(queue); /* FW expects WPTR BOs to be mapped into GART */ - r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); + r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { DRM_ERROR("Failed to create WPTR mapping\n"); goto free_ctx; } /* Map userqueue into FW using MES */ - r = mes_v11_0_userq_map(uq_mgr, queue, userq_props); + r = mes_userq_map(uq_mgr, queue, userq_props); if (r) { DRM_ERROR("Failed to init MQD\n"); goto free_ctx; @@ -335,29 +338,29 @@ free_props: } static void -mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, +mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { if (queue->queue_active) - mes_v11_0_userq_unmap(uq_mgr, queue); + mes_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); } -static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, +static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { if (queue->queue_active) { - mes_v11_0_userq_unmap(uq_mgr, queue); + mes_userq_unmap(uq_mgr, queue); queue->queue_active = false; } return 0; } -static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, +static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue) { int ret; @@ -365,7 +368,7 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, if (queue->queue_active) return 0; - ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop); + ret = mes_userq_map(uq_mgr, queue, queue->userq_prop); if (ret) { DRM_ERROR("Failed to resume queue\n"); return ret; @@ -375,9 +378,9 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr, return 0; } -const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = { - .mqd_create = mes_v11_0_userq_mqd_create, - .mqd_destroy = mes_v11_0_userq_mqd_destroy, - .suspend = mes_v11_0_userq_suspend, - .resume = mes_v11_0_userq_resume, +const struct amdgpu_userq_funcs userq_mes_funcs = { + .mqd_create = mes_userq_mqd_create, + .mqd_destroy = mes_userq_mqd_destroy, + .suspend = mes_userq_suspend, + .resume = mes_userq_resume, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h similarity index 91% rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.h index 2c102361ca82..d0a521312ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h @@ -22,9 +22,9 @@ * */ -#ifndef MES_V11_0_USERQ_H -#define MES_V11_0_USERQ_H +#ifndef MES_USERQ_H +#define MES_USERQ_H #include "amdgpu_userqueue.h" -extern const struct amdgpu_userq_funcs userq_mes_v11_0_funcs; +extern const struct amdgpu_userq_funcs userq_mes_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index fe389379e890..9bc3c7a35d18 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -43,7 +43,7 @@ #include "sdma_common.h" #include "sdma_v6_0.h" #include "v11_structs.h" -#include "mes_v11_0_userqueue.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); @@ -1381,7 +1381,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs; + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) -- 2.51.0 From 988c9e704670a39ef2fd23f3eeb2d3b9c813dab1 Mon Sep 17 00:00:00 2001 From: Somalapuram Amaranath Date: Thu, 10 Oct 2024 20:08:06 +0200 Subject: [PATCH 14/16] drm/amdgpu: enable userqueue support for GFX12 This patch enables Usermode queue support across GFX, Compute and SDMA IPs on GFX12/SDMA7. It typically reuses Navi3X userqueue IP functions to create and destroy MQDs. v2: rebase on proposed changes (Alex) Cc: Alex Deucher Cc: Christian Koenig Cc: Arvind Yadav Reviewed-by: Alex Deucher Signed-off-by: Somalapuram Amaranath Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 8b409b4a1cb3..ee65f4057872 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -44,6 +44,7 @@ #include "gfx_v12_0.h" #include "nbif_v6_3_1.h" #include "mes_v12_0.h" +#include "mes_userqueue.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -1417,6 +1418,10 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; +#endif break; default: adev->gfx.me.num_me = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 3a0f38fc003e..3514089acd94 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -42,6 +42,7 @@ #include "sdma_common.h" #include "sdma_v7_0.h" #include "v12_structs.h" +#include "mes_userqueue.h" MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin"); @@ -1381,6 +1382,11 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) else DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; +#endif + + return r; } -- 2.51.0 From dd5a376cd234c3fff79667598a1e06300cf75026 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Tue, 26 Nov 2024 15:51:08 +0100 Subject: [PATCH 15/16] drm/amdgpu: enable userqueue secure sem for GFX 12 - Add a field in struct amdgpu_mqd_prop for userqueue secure sem fence address since now we have a generic file for mes_userqueue.c - Add secure sem fence address mqd support to gfx12 into their corresponding init functions. - Enable secure semaphore IRQ handling V2: Address review comment from Alex: Use fence_address instead of fenceaddress (Shashank) Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Signed-off-by: Arunpravin Paneer Selvam Signed-off-by: Somalapuram Amaranath Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 27 +++++++++++----------- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 11 +-------- drivers/gpu/drm/amd/include/v11_structs.h | 4 ++-- drivers/gpu/drm/amd/include/v12_structs.h | 4 ++-- 6 files changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ea5a1e6f01c3..3cdb5f8325aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -833,6 +833,7 @@ struct amdgpu_mqd_prop { uint64_t shadow_addr; uint64_t gds_bkup_addr; uint64_t csa_addr; + uint64_t fence_address; }; struct amdgpu_mqd { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 63b7c7bfcc4a..114284e65c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4134,6 +4134,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index ee65f4057872..1030f2985c7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -45,6 +45,7 @@ #include "nbif_v6_3_1.h" #include "mes_v12_0.h" #include "mes_userqueue.h" +#include "amdgpu_userq_fence.h" #define GFX12_NUM_GFX_RINGS 1 #define GFX12_MEC_HPD_SIZE 2048 @@ -3037,6 +3038,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); + mqd->fence_address_lo = lower_32_bits(prop->fence_address); + mqd->fence_address_hi = upper_32_bits(prop->fence_address); return 0; } @@ -4819,25 +4822,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int i; + u32 doorbell_offset = entry->src_data[0]; u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring; - uint32_t mes_queue_id = entry->src_data[0]; + int i; DRM_DEBUG("IH: CP EOP\n"); - if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { - struct amdgpu_mes_queue *queue; + if (adev->enable_mes && doorbell_offset) { + struct amdgpu_userq_fence_driver *fence_drv = NULL; + struct xarray *xa = &adev->userq_xa; + unsigned long flags; - mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; - - spin_lock(&adev->mes.queue_id_lock); - queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); - if (queue) { - DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); - amdgpu_fence_process(queue->ring); - } - spin_unlock(&adev->mes.queue_id_lock); + xa_lock_irqsave(xa, flags); + fence_drv = xa_load(xa, doorbell_offset); + if (fence_drv) + amdgpu_userq_fence_driver_process(fence_drv); + xa_unlock_irqrestore(xa, flags); } else { me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9c2fc8ae0d56..1dde099382ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -185,14 +185,6 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr, return 0; } -static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue) -{ - struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr; - - mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr); - mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr); -} - static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, struct drm_amdgpu_userq_in *args_in, struct amdgpu_usermode_queue *queue) @@ -231,6 +223,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, userq_props->mqd_gpu_addr = queue->mqd.gpu_addr; userq_props->use_doorbell = true; userq_props->doorbell_index = queue->doorbell_index; + userq_props->fence_address = queue->fence_drv->gpu_addr; if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) { struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd; @@ -307,8 +300,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, goto free_mqd; } - mes_userq_set_fence_space(queue); - /* FW expects WPTR BOs to be mapped into GART */ r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr); if (r) { diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h index 797ce6a1e56e..f6d4dab849eb 100644 --- a/drivers/gpu/drm/amd/include/v11_structs.h +++ b/drivers/gpu/drm/amd/include/v11_structs.h @@ -535,8 +535,8 @@ struct v11_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t fenceaddress_lo; // offset: 510 (0x1FE) - uint32_t fenceaddress_hi; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v11_sdma_mqd { diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h index 5eabab611b02..5787c8a51b7c 100644 --- a/drivers/gpu/drm/amd/include/v12_structs.h +++ b/drivers/gpu/drm/amd/include/v12_structs.h @@ -535,8 +535,8 @@ struct v12_gfx_mqd { uint32_t reserved_507; // offset: 507 (0x1FB) uint32_t reserved_508; // offset: 508 (0x1FC) uint32_t reserved_509; // offset: 509 (0x1FD) - uint32_t reserved_510; // offset: 510 (0x1FE) - uint32_t reserved_511; // offset: 511 (0x1FF) + uint32_t fence_address_lo; // offset: 510 (0x1FE) + uint32_t fence_address_hi; // offset: 511 (0x1FF) }; struct v12_sdma_mqd { -- 2.51.0 From a242a3e4b5be22ffd85fb768d8c96df79b018136 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 11 Dec 2024 12:09:00 +0100 Subject: [PATCH 16/16] drm/amdgpu: simplify eviction fence suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The basic idea in this redesign is to add an eviction fence only in UQ resume path. When userqueue is not present, keep ev_fence as NULL Main changes are: - do not create the eviction fence during evf_mgr_init, keeping evf_mgr->ev_fence=NULL until UQ get active. - do not replace the ev_fence in evf_resume path, but replace it only in uq_resume path, so remove all the unnecessary code from ev_fence_resume. - add a new helper function (amdgpu_userqueue_ensure_ev_fence) which will do the following: - flush any pending uq_resume work, so that it could create an eviction_fence - if there is no pending uq_resume_work, add a uq_resume work and wait for it to execute so that we always have a valid ev_fence - call this helper function from two places, to ensure we have a valid ev_fence: - when a new uq is created - when a new uq completion fence is created v2: Worked on review comments by Christian. v3: Addressed few more review comments by Christian. v4: Move mutex lock outside of the amdgpu_userqueue_suspend() function (Christian). v5: squash in build fix (Alex) Cc: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 78 ++++--------------- .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 3 +- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 24 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 68 +++++++++------- .../gpu/drm/amd/include/amdgpu_userqueue.h | 7 +- 5 files changed, 69 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index f7fb1674278c..8358dc6b68ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -87,7 +87,8 @@ amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, } /* Free old fence */ - dma_fence_put(&old_ef->base); + if (old_ef) + dma_fence_put(&old_ef->base); return 0; free_err: @@ -101,58 +102,17 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work) struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work); struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr); struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_bo_va *bo_va; - struct drm_exec exec; - bool userq_active = amdgpu_userqueue_active(uq_mgr); - int ret; - - - /* For userqueues, the fence replacement happens in resume path */ - if (userq_active) { - amdgpu_userqueue_suspend(uq_mgr); - return; - } - - /* Signal old eviction fence */ - amdgpu_eviction_fence_signal(evf_mgr); - - /* Do not replace eviction fence is fd is getting closed */ - if (evf_mgr->fd_closing) - return; - - /* Prepare the objects to replace eviction fence */ - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { - ret = amdgpu_vm_lock_pd(vm, &exec, 2); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_drm; - - /* Lock the done list */ - list_for_each_entry(bo_va, &vm->done, base.vm_status) { - struct amdgpu_bo *bo = bo_va->base.bo; - - if (!bo) - continue; - - if (vm != bo_va->base.vm) - continue; + struct amdgpu_eviction_fence *ev_fence; - ret = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) - goto unlock_drm; - } - } + mutex_lock(&uq_mgr->userq_mutex); + ev_fence = evf_mgr->ev_fence; + if (!ev_fence) + goto unlock; - /* Replace old eviction fence with new one */ - ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); - if (ret) - DRM_ERROR("Failed to replace eviction fence\n"); + amdgpu_userqueue_suspend(uq_mgr, ev_fence); -unlock_drm: - drm_exec_fini(&exec); +unlock: + mutex_unlock(&uq_mgr->userq_mutex); } static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f) @@ -177,10 +137,11 @@ static const struct dma_fence_ops amdgpu_eviction_fence_ops = { .enable_signaling = amdgpu_eviction_fence_enable_signaling, }; -void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence) { spin_lock(&evf_mgr->ev_fence_lock); - dma_fence_signal(&evf_mgr->ev_fence->base); + dma_fence_signal(&ev_fence->base); spin_unlock(&evf_mgr->ev_fence_lock); } @@ -244,6 +205,7 @@ int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP); } spin_unlock(&evf_mgr->ev_fence_lock); + return 0; } @@ -259,23 +221,11 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) { - struct amdgpu_eviction_fence *ev_fence; - /* This needs to be done one time per open */ atomic_set(&evf_mgr->ev_fence_seq, 0); evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); spin_lock_init(&evf_mgr->ev_fence_lock); - ev_fence = amdgpu_eviction_fence_create(evf_mgr); - if (!ev_fence) { - DRM_ERROR("Failed to craete eviction fence\n"); - return -ENOMEM; - } - - spin_lock(&evf_mgr->ev_fence_lock); - evf_mgr->ev_fence = ev_fence; - spin_unlock(&evf_mgr->ev_fence_lock); - INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h index 787182bd1069..fcd867b7147d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -60,7 +60,8 @@ int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); void -amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_eviction_fence *ev_fence); int amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 877cb17a14e9..20c36dc97c2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -466,14 +466,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, } } - /* Save the fence to wait for during suspend */ - mutex_lock(&userq_mgr->userq_mutex); - /* Retrieve the user queue */ queue = idr_find(&userq_mgr->userq_idr, args->queue_id); if (!queue) { r = -ENOENT; - mutex_unlock(&userq_mgr->userq_mutex); + goto put_gobj_write; } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, @@ -483,31 +480,26 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } } r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } /* Create a new fence */ r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) { - mutex_unlock(&userq_mgr->userq_mutex); + if (r) goto exec_fini; - } + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); dma_fence_put(queue->last_fence); queue->last_fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index cba51bdf2e2c..c11fcdd604fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -101,6 +101,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid) return idr_find(&uq_mgr->userq_idr, qid); } +void +amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + +retry: + /* Flush any pending resume work to create ev_fence */ + flush_delayed_work(&uq_mgr->resume_work); + + mutex_lock(&uq_mgr->userq_mutex); + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { + mutex_unlock(&uq_mgr->userq_mutex); + /* + * Looks like there was no pending resume work, + * add one now to create a valid eviction fence + */ + schedule_delayed_work(&uq_mgr->resume_work, 0); + goto retry; + } +} + int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj, int size) @@ -253,7 +278,14 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) return -EINVAL; } - mutex_lock(&uq_mgr->userq_mutex); + /* + * There could be a situation that we are creating a new queue while + * the other queues under this UQ_mgr are suspended. So if there is any + * resume work pending, wait for it to get done. + * + * This will also make sure we have a valid eviction fence ready to be used. + */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); uq_funcs = adev->userq_funcs[args->in.ip_type]; if (!uq_funcs) { @@ -308,14 +340,6 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) unlock: mutex_unlock(&uq_mgr->userq_mutex); - if (!r) { - /* - * There could be a situation that we are creating a new queue while - * the other queues under this UQ_mgr are suspended. So if there is any - * resume work pending, wait for it to get done. - */ - flush_delayed_work(&uq_mgr->resume_work); - } return r; } @@ -551,58 +575,44 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) } void -amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr) +amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence) { int ret; struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; - - mutex_lock(&uq_mgr->userq_mutex); - - /* Wait for any pending userqueue fence to signal */ + /* Wait for any pending userqueue fence work to finish */ ret = amdgpu_userqueue_wait_for_signal(uq_mgr); if (ret) { DRM_ERROR("Not suspending userqueue, timeout waiting for work\n"); - goto unlock; + return; } ret = amdgpu_userqueue_suspend_all(uq_mgr); if (ret) { DRM_ERROR("Failed to evict userqueue\n"); - goto unlock; + return; } /* Signal current eviction fence */ - amdgpu_eviction_fence_signal(evf_mgr); + amdgpu_eviction_fence_signal(evf_mgr, ev_fence); if (evf_mgr->fd_closing) { - mutex_unlock(&uq_mgr->userq_mutex); cancel_delayed_work(&uq_mgr->resume_work); return; } /* Schedule a resume work */ schedule_delayed_work(&uq_mgr->resume_work, 0); - -unlock: - mutex_unlock(&uq_mgr->userq_mutex); } int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev) { - struct amdgpu_fpriv *fpriv; - mutex_init(&userq_mgr->userq_mutex); idr_init_base(&userq_mgr->userq_idr, 1); userq_mgr->adev = adev; - fpriv = uq_mgr_to_fpriv(userq_mgr); - if (!fpriv->evf_mgr.ev_fence) { - DRM_ERROR("Eviction fence not initialized yet\n"); - return -EINVAL; - } - INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker); return 0; } diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index 2bf28f3454cb..e7e8d79b689d 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -24,6 +24,7 @@ #ifndef AMDGPU_USERQUEUE_H_ #define AMDGPU_USERQUEUE_H_ +#include "amdgpu_eviction_fence.h" #define AMDGPU_MAX_USERQ_COUNT 512 @@ -90,7 +91,11 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr, void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_userq_obj *userq_obj); -void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr); +void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_eviction_fence *ev_fence); int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); + +void amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, + struct amdgpu_eviction_fence_mgr *evf_mgr); #endif -- 2.51.0