From c9e20cb005fdb6a727dc1a85d7192a35eeb11987 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 23:02:28 +0530 Subject: [PATCH 01/16] drm/amdgpu: Fix NULL ptr dereference issue for non userq fences MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add the correct fences count variable [num_fences] in the fences array iteration to handle the userq / non-userq fences. v2:(Christian) - All fences in the array either come from some reservation object or drm_syncobj. If any of those are NULL then there is a bug somewhere else. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 20c36dc97c2e..8a4d9495f9d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -817,7 +817,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data, fences[num_fences++] = fence; } - for (i = 0, cnt = 0; i < wait_info->num_fences; i++) { + for (i = 0, cnt = 0; i < num_fences; i++) { struct amdgpu_userq_fence_driver *fence_drv; struct amdgpu_userq_fence *userq_fence; u32 index; -- 2.51.0 From ed5fdc1fc282dbe7bdf4968fb9dafb4366114160 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 9 Dec 2024 23:04:34 +0530 Subject: [PATCH 02/16] drm/amdgpu: Fix the use-after-free issue in wait IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The xarray pointer which has the userqueue xarray structure reference should be cleared when the userqueue gets destroyed. Otherwise, we may access the freed xa memory and see the below warnings. warning 1: BUG: KASAN: slab-use-after-free in _raw_spin_lock+0x7a/0xe0 [ +0.000044] Call Trace: [ +0.000017] [ +0.000016] dump_stack_lvl+0x6c/0x90 [ +0.000025] print_report+0xc4/0x5e0 [ +0.000025] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? kasan_complete_mode_report_info+0x60/0x1d0 [ +0.000030] ? _raw_spin_lock+0x7a/0xe0 [ +0.000023] kasan_report+0xdf/0x120 [ +0.000023] ? _raw_spin_lock+0x7a/0xe0 [ +0.000025] kasan_check_range+0xf7/0x1b0 [ +0.000025] __kasan_check_write+0x14/0x20 [ +0.000024] _raw_spin_lock+0x7a/0xe0 [ +0.000023] ? __pfx__raw_spin_lock+0x10/0x10 [ +0.000024] ? amdgpu_userq_wait_ioctl+0xac0/0x1f30 [amdgpu] [ +0.000442] amdgpu_userq_wait_ioctl+0x18fc/0x1f30 [amdgpu] [ +0.000428] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000424] ? __pfx_idr_alloc_u32+0x10/0x10 [ +0.000027] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? __kasan_check_write+0x14/0x20 [ +0.000025] ? srso_return_thunk+0x5/0x5f [ +0.000024] ? idr_alloc+0x72/0xc0 [ +0.000023] ? srso_return_thunk+0x5/0x5f [ +0.000023] ? fput+0x1c/0x2f0 [ +0.000025] drm_ioctl_kernel+0x178/0x2f0 [drm] [ +0.000065] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000425] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ +0.000064] ? srso_return_thunk+0x5/0x5f [ +0.000023] ? __kasan_check_write+0x14/0x20 [ +0.000025] drm_ioctl+0x513/0xd20 [drm] [ +0.000058] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ +0.000428] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ +0.000061] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.000027] ? __count_memcg_events+0x11f/0x3a0 [ +0.000027] ? srso_return_thunk+0x5/0x5f [ +0.001040] ? srso_return_thunk+0x5/0x5f [ +0.000969] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ +0.000966] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu] [ +0.001352] __x64_sys_ioctl+0x135/0x1b0 [ +0.000966] x64_sys_call+0x1205/0x20d0 [ +0.000968] do_syscall_64+0x4d/0x120 [ +0.000960] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000962] RIP: 0033:0x7f42af11a94f warning 2: WARNING: at lib/xarray.c:1849 __xa_alloc+0x13a/0x150 [ 366.491409] RIP: 0010:__xa_alloc+0x13a/0x150 [ 366.491434] Call Trace: [ 366.491437] [ 366.491440] ? show_regs+0x6d/0x80 [ 366.491445] ? __warn+0x91/0x140 [ 366.491450] ? __xa_alloc+0x13a/0x150 [ 366.491453] ? report_bug+0x1c9/0x1e0 [ 366.491459] ? handle_bug+0x63/0xa0 [ 366.491463] ? exc_invalid_op+0x1d/0x80 [ 366.491467] ? asm_exc_invalid_op+0x1f/0x30 [ 366.491476] ? __xa_alloc+0x13a/0x150 [ 366.491484] amdgpu_userq_wait_ioctl+0xe0e/0xfe0 [amdgpu] [ 366.491743] ? idr_alloc_u32+0x97/0xd0 [ 366.491749] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ 366.491912] drm_ioctl_kernel+0xae/0x100 [drm] [ 366.491942] drm_ioctl+0x2a1/0x500 [drm] [ 366.491961] ? __pfx_amdgpu_userq_wait_ioctl+0x10/0x10 [amdgpu] [ 366.492127] ? srso_return_thunk+0x5/0x5f [ 366.492132] ? srso_return_thunk+0x5/0x5f [ 366.492135] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 366.492139] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 366.492288] __x64_sys_ioctl+0x99/0xd0 [ 366.492295] x64_sys_call+0x1209/0x20d0 [ 366.492299] do_syscall_64+0x51/0x120 [ 366.492303] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 366.492418] RIP: 0033:0x7f86f3b1a94f Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index c11fcdd604fc..85baba323ba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -73,6 +73,7 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr, } uq_funcs->mqd_destroy(uq_mgr, queue); + queue->fence_drv->fence_drv_xa_ptr = NULL; amdgpu_userq_fence_driver_free(queue); idr_remove(&uq_mgr->userq_idr, queue_id); kfree(queue); -- 2.51.0 From 91acb5d47b7cc9fe17a7d0034e5f5ac996633bae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Mon, 9 Dec 2024 23:10:48 +0530 Subject: [PATCH 03/16] drm/amdgpu: Modify the MES process va end limit MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Modify the MES process va end limit to max pfn. Signed-off-by: Christian König Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 3 +-- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index acdca3110c24..503add938634 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -656,8 +656,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, adev->gmc.vram_start; queue_input.process_va_start = 0; - queue_input.process_va_end = - (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; queue_input.process_quantum = gang->process->process_quantum; queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; queue_input.gang_quantum = gang->gang_quantum; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 1dde099382ea..9a6a5553bbc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -109,7 +109,7 @@ static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr, memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_va_start = 0; - queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + queue_input.process_va_end = adev->vm_manager.max_pfn - 1; /* set process quantum to 10 ms and gang quantum to 1 ms as default */ queue_input.process_quantum = 100000; -- 2.51.0 From 02521454f0552e289e6d95a2b55c8395285b0e01 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 12 Dec 2024 19:36:16 +0530 Subject: [PATCH 04/16] drm/amdgpu: Apply sign extension to seq64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Apply sign extension to seq64 va address. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 898d215a8d99..2de1a844282e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -45,7 +45,11 @@ */ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) { - return AMDGPU_VA_RESERVED_SEQ64_START(adev); + u64 addr = AMDGPU_VA_RESERVED_SEQ64_START(adev); + + addr = amdgpu_gmc_sign_extend(addr); + + return addr; } /** @@ -88,7 +92,7 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - seq64_addr = amdgpu_seq64_get_va_base(adev); + seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, AMDGPU_PTE_READABLE); if (r) { -- 2.51.0 From adba0929736a6a2d2780e8e6e4082e42e5ba025c Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 19 Dec 2024 19:43:54 +0530 Subject: [PATCH 05/16] drm/amdgpu: Fix Illegal opcode in command stream Error When applications closes, it triggers the drm_file_free function which subsequently releases all allocated buffer objects. Concurrently, the resume_worker thread will attempt to map the usermode queue. However, since the wptr buffer object has already been deallocated, this will result in an Illegal opcode error being raised in the command stream. Now replacing drm_release() with a new function amdgpu_drm_release(). This function will set the flag to prevent the scheduling of any new queue resume/map, stop all queues and then call drm_release(). V2: - Replace drm_release with amdgpu_drm_release(Christian). Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 148f859c3b98..5c55f6e01b41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2851,6 +2851,20 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2902,7 +2916,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, -- 2.51.0 From 8639d2f5ca27ca533e782cc8f8de62ea002f1833 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Fri, 20 Dec 2024 13:44:23 +0100 Subject: [PATCH 06/16] drm/amdgpu: fix call to amdgpu_eviction_fence_detach MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit That needs to be done after grabbing the lock, not before. Signed-off-by: Christian König Acked-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6850a6954a71..b9b80b0b60ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -351,9 +351,6 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); @@ -367,6 +364,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; } + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; -- 2.51.0 From 3e37fcb57bdf794804bec9538d20673a5bf4bdd8 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Fri, 3 Jan 2025 19:02:59 +0530 Subject: [PATCH 07/16] drm/amdgpu: map doorbell for the requested userq Introduce db_info structure to the populate the doorbell information that is required to be mapped. Made changes to the doorbell mapping func more generic, by taking parameters that vary based on IPs and/or usecase into db_info structure. v2 - Fix space alignment and checkpatch warnings(Shashank) Signed-off-by: Saleemkhan Jamadar Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 27 +++++++++++-------- .../gpu/drm/amd/include/amdgpu_userqueue.h | 12 +++++++++ 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 85baba323ba5..769154223e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -189,18 +189,17 @@ void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr, amdgpu_bo_unref(&userq_obj->obj); } -static uint64_t +uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue, - struct drm_file *filp, - uint32_t doorbell_offset) + struct amdgpu_db_info *db_info, + struct drm_file *filp) { uint64_t index; struct drm_gem_object *gobj; - struct amdgpu_userq_obj *db_obj = &queue->db_obj; - int r; + struct amdgpu_userq_obj *db_obj = db_info->db_obj; + int r, db_size; - gobj = drm_gem_object_lookup(filp, queue->doorbell_handle); + gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); if (gobj == NULL) { DRM_ERROR("Can't find GEM object for doorbell\n"); return -EINVAL; @@ -222,8 +221,9 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } + db_size = sizeof(u64); index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, - doorbell_offset, sizeof(u64)); + db_info->doorbell_offset, db_size); DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index); amdgpu_bo_unreserve(db_obj->obj); return index; @@ -268,6 +268,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) struct amdgpu_device *adev = uq_mgr->adev; const struct amdgpu_userq_funcs *uq_funcs; struct amdgpu_usermode_queue *queue; + struct amdgpu_db_info db_info; uint64_t index; int qid, r = 0; @@ -302,19 +303,23 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args) goto unlock; } queue->doorbell_handle = args->in.doorbell_handle; - queue->doorbell_index = args->in.doorbell_offset; queue->queue_type = args->in.ip_type; queue->vm = &fpriv->vm; + db_info.queue_type = queue->queue_type; + db_info.doorbell_handle = queue->doorbell_handle; + db_info.db_obj = &queue->db_obj; + db_info.doorbell_offset = args->in.doorbell_offset; + /* Convert relative doorbell offset into absolute doorbell index */ - index = amdgpu_userqueue_get_doorbell_index(uq_mgr, queue, filp, args->in.doorbell_offset); + index = amdgpu_userqueue_get_doorbell_index(uq_mgr, &db_info, filp); if (index == (uint64_t)-EINVAL) { DRM_ERROR("Failed to get doorbell for queue\n"); kfree(queue); goto unlock; } - queue->doorbell_index = index; + queue->doorbell_index = index; xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); r = amdgpu_userq_fence_driver_alloc(adev, queue); if (r) { diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h index e7e8d79b689d..0f358f77f2d9 100644 --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h @@ -78,6 +78,13 @@ struct amdgpu_userq_mgr { struct delayed_work resume_work; }; +struct amdgpu_db_info { + uint64_t doorbell_handle; + uint32_t queue_type; + uint32_t doorbell_offset; + struct amdgpu_userq_obj *db_obj; +}; + int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev); @@ -98,4 +105,9 @@ int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr); void amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_eviction_fence_mgr *evf_mgr); + +uint64_t amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, + struct amdgpu_db_info *db_info, + struct drm_file *filp); + #endif -- 2.51.0 From 49cd3353dbea6bb5c5a9c3201852fc363a2f34ad Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Mon, 6 Jan 2025 12:50:50 +0530 Subject: [PATCH 08/16] drm/amdgpu: add db size and offset range for VCN and VPE VCN and VPE have different offset range, update the doorbell offset range repsectively. Doorbell size for VCN and VPE is 32bit. v1 : add gfx switch case and fix checkpatch warnings (Shashank) Signed-off-by: Saleemkhan Jamadar Reviewed-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 769154223e2d..2eac83ba8bdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -221,7 +221,29 @@ amdgpu_userqueue_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, goto unpin_bo; } - db_size = sizeof(u64); + switch (db_info->queue_type) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_COMPUTE: + case AMDGPU_HW_IP_DMA: + db_size = sizeof(u64); + break; + + case AMDGPU_HW_IP_VCN_ENC: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; + break; + + case AMDGPU_HW_IP_VPE: + db_size = sizeof(u32); + db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; + break; + + default: + DRM_ERROR("[Usermode queues] IP %d not support\n", db_info->queue_type); + r = -EINVAL; + goto unpin_bo; + } + index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, db_info->doorbell_offset, db_size); DRM_DEBUG_DRIVER("[Usermode queues] doorbell index=%lld\n", index); -- 2.51.0 From 239a310b4942a81f5e87a442bf1bcb759494f0c1 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 1 Jan 2025 14:22:29 +0530 Subject: [PATCH 09/16] drm/amdgpu: Fix out-of-bounds issue in user fence MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fix out-of-bounds issue in userq fence create when accessing the userq xa structure. Added a lock to protect the race condition. v2:(Christian) - Allocate memory with GFP_ATOMIC. v3: - Moved to 2 xa approach. v4:(Christian) - Lock the xa_for_each blocks and memory allocation part as well to make sure that xa is not modified in between the 2 xa_for_each blocks. BUG: KASAN: slab-out-of-bounds in amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000006] Call Trace: [ +0.000005] [ +0.000005] dump_stack_lvl+0x6c/0x90 [ +0.000011] print_report+0xc4/0x5e0 [ +0.000009] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? kasan_complete_mode_report_info+0x26/0x1d0 [ +0.000007] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000405] kasan_report+0xdf/0x120 [ +0.000009] ? amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000405] __asan_report_store8_noabort+0x17/0x20 [ +0.000007] amdgpu_userq_fence_create+0x726/0x880 [amdgpu] [ +0.000406] ? __pfx_amdgpu_userq_fence_create+0x10/0x10 [amdgpu] [ +0.000408] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? ttm_resource_move_to_lru_tail+0x235/0x4f0 [ttm] [ +0.000013] ? srso_return_thunk+0x5/0x5f [ +0.000008] amdgpu_userq_signal_ioctl+0xd29/0x1c70 [amdgpu] [ +0.000412] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000404] ? try_to_wake_up+0x165/0x1840 [ +0.000010] ? __pfx_futex_wake_mark+0x10/0x10 [ +0.000011] drm_ioctl_kernel+0x178/0x2f0 [drm] [ +0.000050] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000404] ? __pfx_drm_ioctl_kernel+0x10/0x10 [drm] [ +0.000043] ? __kasan_check_read+0x11/0x20 [ +0.000007] ? srso_return_thunk+0x5/0x5f [ +0.000007] ? __kasan_check_write+0x14/0x20 [ +0.000008] drm_ioctl+0x513/0xd20 [drm] [ +0.000040] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ +0.000407] ? __pfx_drm_ioctl+0x10/0x10 [drm] [ +0.000044] ? srso_return_thunk+0x5/0x5f [ +0.000007] ? _raw_spin_lock_irqsave+0x99/0x100 [ +0.000007] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ +0.000006] ? __rseq_handle_notify_resume+0x188/0xc30 [ +0.000008] ? srso_return_thunk+0x5/0x5f [ +0.000008] ? srso_return_thunk+0x5/0x5f [ +0.000006] ? _raw_spin_unlock_irqrestore+0x27/0x50 [ +0.000010] amdgpu_drm_ioctl+0xcd/0x1d0 [amdgpu] [ +0.000388] __x64_sys_ioctl+0x135/0x1b0 [ +0.000009] x64_sys_call+0x1205/0x20d0 [ +0.000007] do_syscall_64+0x4d/0x120 [ +0.000008] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000007] RIP: 0033:0x7f7c3d31a94f Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 8a4d9495f9d7..567a5ffa7765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -229,23 +229,25 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, unsigned long index, count = 0; int i = 0; + xa_lock(&userq->fence_drv_xa); xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) count++; userq_fence->fence_drv_array = kvmalloc_array(count, sizeof(struct amdgpu_userq_fence_driver *), - GFP_KERNEL); + GFP_ATOMIC); if (userq_fence->fence_drv_array) { xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) { userq_fence->fence_drv_array[i] = stored_fence_drv; - xa_erase(&userq->fence_drv_xa, index); + __xa_erase(&userq->fence_drv_xa, index); i++; } } userq_fence->fence_drv_array_count = i; + xa_unlock(&userq->fence_drv_xa); } else { userq_fence->fence_drv_array = NULL; userq_fence->fence_drv_array_count = 0; -- 2.51.0 From fc4a85c6b2cc1b0f4e682180165a4e629711701f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 10 Feb 2025 22:17:28 +0530 Subject: [PATCH 10/16] drm/amdgpu: Modify the seq64 VM cache policy MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The seq64 VM cache policy should be set to UC (Uncached) to match with userqueue fence address kernel mapped memory's cache settings. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 2de1a844282e..3939761be31c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; - u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -93,8 +93,10 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, } seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; + + va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - AMDGPU_PTE_READABLE); + va_flags); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); -- 2.51.0 From f15d4e92f7d35128d6416c473c2eab24188bd7e8 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 27 Jan 2025 18:22:01 +0530 Subject: [PATCH 11/16] drm/amdgpu: Fix display freeze lockup error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A deadlock situation has arised between the userq signal ioctl and the eviction fence. In this scenario, the function amdgpu_userq_signal_ioctl() has acquired a reservation lock on the read/write buffer object (BO) through drm_exec. Subsequently, it calls amdgpu_userqueue_ensure_ev_fence(), which is in a waiting for the userq resume work. Meanwhile, the userq suspend worker has initiated the userq resume work(amdgpu_userqueue_resume_worker). This userq resume work attempts to validate the vm->done BO, leading to amdgpu_userqueue_validate_bos also attempting to reservation lock the same write BO that is already locked by amdgpu_userq_signal_ioctl. As a result, the resume work becomes stalled, causing amdgpu_userqueue_ensure_ev_fence to remain in a waiting state. Call Trace: [ 242.836469] INFO: task gnome-shel:cs0:1288 blocked for more than 120 seconds. [ 242.836486] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.836491] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.836494] task:gnome-shel:cs0 state:D stack:0 pid:1288 tgid:1282 ppid:1180 flags:0x00000002 [ 242.836503] Call Trace: [ 242.836508] [ 242.836517] __schedule+0x3e0/0xb10 [ 242.836530] ? srso_return_thunk+0x5/0x5f [ 242.836541] schedule+0x31/0x120 [ 242.836546] schedule_timeout+0x150/0x160 [ 242.836551] ? srso_return_thunk+0x5/0x5f [ 242.836555] ? sysvec_call_function+0x69/0xd0 [ 242.836562] ? srso_return_thunk+0x5/0x5f [ 242.836567] ? preempt_count_add+0x7f/0xd0 [ 242.836577] __wait_for_common+0x91/0x180 [ 242.836582] ? __pfx_schedule_timeout+0x10/0x10 [ 242.836590] wait_for_completion+0x28/0x30 [ 242.836595] __flush_work+0x16c/0x290 [ 242.836602] ? __pfx_wq_barrier_func+0x10/0x10 [ 242.836611] flush_delayed_work+0x3a/0x60 [ 242.836621] amdgpu_userqueue_ensure_ev_fence+0x2d/0xb0 [amdgpu] [ 242.836966] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.837171] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837365] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.837398] drm_ioctl+0x2a1/0x500 [drm] [ 242.837420] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837622] ? srso_return_thunk+0x5/0x5f [ 242.837627] ? srso_return_thunk+0x5/0x5f [ 242.837630] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.837635] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.837811] __x64_sys_ioctl+0x99/0xd0 [ 242.837820] x64_sys_call+0x1209/0x20d0 [ 242.837825] do_syscall_64+0x51/0x120 [ 242.837830] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.837835] RIP: 0033:0x7f2f33f1a94f [ 242.837838] RSP: 002b:00007f2f24ffea30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.837842] RAX: ffffffffffffffda RBX: 00007f2f24ffebd0 RCX: 00007f2f33f1a94f [ 242.837845] RDX: 00007f2f24ffebd0 RSI: 00000000c0306457 RDI: 000000000000000d [ 242.837847] RBP: 00007f2f24ffeab0 R08: 0000000000000000 R09: 0000000000000000 [ 242.837849] R10: 00007f2f24ffecd0 R11: 0000000000000246 R12: 00007f2f25000640 [ 242.837851] R13: 00000000c0306457 R14: 000000000000000d R15: 00007fff3b39c1e0 [ 242.837858] [ 242.837865] INFO: task Xwayland:cs0:1517 blocked for more than 120 seconds. [ 242.837869] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.837872] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.837874] task:Xwayland:cs0 state:D stack:0 pid:1517 tgid:1338 ppid:1282 flags:0x00004002 [ 242.837878] Call Trace: [ 242.837880] [ 242.837883] __schedule+0x3e0/0xb10 [ 242.837890] schedule+0x31/0x120 [ 242.837894] schedule_preempt_disabled+0x1c/0x30 [ 242.837897] __mutex_lock.constprop.0+0x386/0x6e0 [ 242.837902] ? srso_return_thunk+0x5/0x5f [ 242.837905] ? __timer_delete_sync+0x81/0xe0 [ 242.837911] __mutex_lock_slowpath+0x13/0x20 [ 242.837915] mutex_lock+0x3b/0x50 [ 242.837919] amdgpu_userqueue_ensure_ev_fence+0x35/0xb0 [amdgpu] [ 242.838138] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.838340] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838531] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.838559] drm_ioctl+0x2a1/0x500 [drm] [ 242.838580] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838778] ? srso_return_thunk+0x5/0x5f [ 242.838783] ? srso_return_thunk+0x5/0x5f [ 242.838786] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.838791] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.838967] __x64_sys_ioctl+0x99/0xd0 [ 242.838972] x64_sys_call+0x1209/0x20d0 [ 242.838975] do_syscall_64+0x51/0x120 [ 242.838979] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.838982] RIP: 0033:0x7f9118b1a94f [ 242.838985] RSP: 002b:00007f910cdff760 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.838989] RAX: ffffffffffffffda RBX: 00007f910cdff910 RCX: 00007f9118b1a94f [ 242.838991] RDX: 00007f910cdff910 RSI: 00000000c0306457 RDI: 000000000000000c [ 242.838993] RBP: 00007f910cdff7e0 R08: 0000000000000000 R09: 0000000000000001 [ 242.838995] R10: 00007f910cdff9d4 R11: 0000000000000246 R12: 00007f910ce00640 [ 242.838997] R13: 00000000c0306457 R14: 000000000000000c R15: 00007fff9dd11d10 [ 242.839004] v2: Addressed review comemnts from Christian. v3/v4: Addressed review comemnts from Christian. - Move drm_exec drm_exec loop after userq fence create. - cleanup the newly created userq fence in case of error. v5 - Addressed review comemnts from Christian. - Create a new amdgpu_userq_fence_alloc() function for allocation. - Calling dma_fence_put for cleanup procedure. - make amdgpu_userq_fence_create() function static. - drm_exec_init is called after mutex_unlock. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 71 ++++++++++++------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 567a5ffa7765..a4953d668972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -197,11 +197,18 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f) +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ +static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +{ + *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + return *userq_fence ? 0 : -ENOMEM; +} + +static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *userq_fence, + u64 seq, struct dma_fence **f) { struct amdgpu_userq_fence_driver *fence_drv; - struct amdgpu_userq_fence *userq_fence; struct dma_fence *fence; unsigned long flags; @@ -209,10 +216,6 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, if (!fence_drv) return -EINVAL; - userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); - if (!userq_fence) - return -ENOMEM; - spin_lock_init(&userq_fence->lock); INIT_LIST_HEAD(&userq_fence->link); fence = &userq_fence->base; @@ -266,6 +269,7 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, return 0; } +#endif static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) { @@ -383,6 +387,11 @@ map_error: return r; } +static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) +{ + dma_fence_put(fence); +} + int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -392,6 +401,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_gem_object **gobj_write = NULL; struct drm_gem_object **gobj_read = NULL; struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *userq_fence; struct drm_syncobj **syncobj = NULL; u32 *bo_handles_write, num_write_bo_handles; u32 *syncobj_handles, num_syncobj_handles; @@ -475,6 +485,29 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, goto put_gobj_write; } + r = amdgpu_userq_fence_read_wptr(queue, &wptr); + if (r) + goto put_gobj_write; + + r = amdgpu_userq_fence_alloc(&userq_fence); + if (r) + goto put_gobj_write; + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + goto put_gobj_write; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -482,31 +515,19 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } } - r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) - goto exec_fini; - - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) - goto exec_fini; - - /* We are here means UQ is active, make sure the eviction fence is valid */ - amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - mutex_unlock(&userq_mgr->userq_mutex); - for (i = 0; i < num_read_bo_handles; i++) { if (!gobj_read || !gobj_read[i]->resv) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index f1a90840ac1f..f0a91cc02880 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -61,8 +61,7 @@ struct amdgpu_userq_fence_driver { int amdgpu_userq_fence_slab_init(void); void amdgpu_userq_fence_slab_fini(void); -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f); + void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2eac83ba8bdf..f1d4e29772a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -455,7 +455,7 @@ amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr) bool clear, unlock; int ret = 0; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = amdgpu_vm_lock_pd(vm, &exec, 2); drm_exec_retry_on_contention(&exec); -- 2.51.0 From 29adc5c2dd7a0e8dba9aac38a3116df38220dc4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 16:08:02 -0500 Subject: [PATCH 12/16] drm/amdgpu/userq: fix hardcoded uq functions Use the IP type to look up the userq functions rather than hardcoding it. Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index f1d4e29772a5..0664e04828c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -415,11 +415,11 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret = userq_funcs->resume(uq_mgr, queue); + } if (ret) DRM_ERROR("Failed to resume all the queue\n"); @@ -570,11 +570,11 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Try to suspend all the queues in this process ctx */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret += userq_funcs->suspend(uq_mgr, queue); + } if (ret) DRM_ERROR("Couldn't suspend all the queues\n"); -- 2.51.0 From 2a060b3ae92ec1951a8a94bf64580ccb4fedf38c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 09:44:39 -0500 Subject: [PATCH 13/16] drm/amdgpu/userq: handle runtime pm Take a reference when we create a queue and drop it when we destroy the queue. We need to keep the device active while user queues are active. v2: squash in fix from Sunil v3: squash in fix from Prike Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9a6a5553bbc3..b469b800119f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -26,6 +26,7 @@ #include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #include "v11_structs.h" +#include #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -287,6 +288,12 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, queue->userq_prop = userq_props; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue mqd create\n"); + goto deference_pm; + } + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); if (r) { DRM_ERROR("Failed to initialize MQD for userqueue\n"); @@ -321,6 +328,9 @@ free_ctx: free_mqd: amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); +deference_pm: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); free_props: kfree(userq_props); @@ -330,14 +340,19 @@ free_props: static void mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) + struct amdgpu_usermode_queue *queue) { + struct amdgpu_device *adev = uq_mgr->adev; + if (queue->queue_active) mes_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, -- 2.51.0 From df85baa767ca39c0a28d56a5a02251844c122a09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:14:35 -0500 Subject: [PATCH 14/16] drm/amdgpu: return an error in the userq IOCTL when DRM_AMDGPU_NAVI3X_USERQ=n I'd swear this was already fixed, but I guess the patch never landed. Add it now. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 0664e04828c0..a78a3728f6b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -402,7 +402,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - return 0; + return -ENOTSUPP; } #endif -- 2.51.0 From c4f42c8d0b97c2f16a4fc90f77eb3c0315c4cf6b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:37:31 -0500 Subject: [PATCH 15/16] drm/amdgpu/Kconfig: fix wording of DRM_AMDGPU_NAVI3X_USERQ The feature is not navi3x specific at this point. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 5bb44ea80164..b8f39377b24a 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -97,13 +97,13 @@ config DRM_AMDGPU_WERROR Only enable this if you are warning code for amdgpu.ko. config DRM_AMDGPU_NAVI3X_USERQ - bool "Enable Navi 3x gfx usermode queues" + bool "Enable amdgpu usermode queues" depends on DRM_AMDGPU depends on BROKEN default n help Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute - workload submission. This feature is experimental and supported on Navi 3X only. + workload submission. This feature is experimental and supported on GFX11+. source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" -- 2.51.0 From f36e4876c8e1ea61c48f6048bfcfd540c7abef2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:45:37 -0500 Subject: [PATCH 16/16] drm/amdgpu/gfx11: fix config guard s/CONFIG_DRM_AMD_USERQ_GFX/CONFIG_DRM_AMDGPU_NAVI3X_USERQ/ Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d3b4018580c6..80af0b92d6b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1639,7 +1639,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; -#ifdef CONFIG_DRM_AMD_USERQ_GFX +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif -- 2.51.0