From fc4a85c6b2cc1b0f4e682180165a4e629711701f Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Mon, 10 Feb 2025 22:17:28 +0530 Subject: [PATCH 01/16] drm/amdgpu: Modify the seq64 VM cache policy MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The seq64 VM cache policy should be set to UC (Uncached) to match with userqueue fence address kernel mapped memory's cache settings. Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 2de1a844282e..3939761be31c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + u64 seq64_addr, va_flags; struct amdgpu_bo *bo; struct drm_exec exec; - u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -93,8 +93,10 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, } seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK; + + va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC); r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, - AMDGPU_PTE_READABLE); + va_flags); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); -- 2.51.0 From f15d4e92f7d35128d6416c473c2eab24188bd7e8 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 27 Jan 2025 18:22:01 +0530 Subject: [PATCH 02/16] drm/amdgpu: Fix display freeze lockup error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit A deadlock situation has arised between the userq signal ioctl and the eviction fence. In this scenario, the function amdgpu_userq_signal_ioctl() has acquired a reservation lock on the read/write buffer object (BO) through drm_exec. Subsequently, it calls amdgpu_userqueue_ensure_ev_fence(), which is in a waiting for the userq resume work. Meanwhile, the userq suspend worker has initiated the userq resume work(amdgpu_userqueue_resume_worker). This userq resume work attempts to validate the vm->done BO, leading to amdgpu_userqueue_validate_bos also attempting to reservation lock the same write BO that is already locked by amdgpu_userq_signal_ioctl. As a result, the resume work becomes stalled, causing amdgpu_userqueue_ensure_ev_fence to remain in a waiting state. Call Trace: [ 242.836469] INFO: task gnome-shel:cs0:1288 blocked for more than 120 seconds. [ 242.836486] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.836491] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.836494] task:gnome-shel:cs0 state:D stack:0 pid:1288 tgid:1282 ppid:1180 flags:0x00000002 [ 242.836503] Call Trace: [ 242.836508] [ 242.836517] __schedule+0x3e0/0xb10 [ 242.836530] ? srso_return_thunk+0x5/0x5f [ 242.836541] schedule+0x31/0x120 [ 242.836546] schedule_timeout+0x150/0x160 [ 242.836551] ? srso_return_thunk+0x5/0x5f [ 242.836555] ? sysvec_call_function+0x69/0xd0 [ 242.836562] ? srso_return_thunk+0x5/0x5f [ 242.836567] ? preempt_count_add+0x7f/0xd0 [ 242.836577] __wait_for_common+0x91/0x180 [ 242.836582] ? __pfx_schedule_timeout+0x10/0x10 [ 242.836590] wait_for_completion+0x28/0x30 [ 242.836595] __flush_work+0x16c/0x290 [ 242.836602] ? __pfx_wq_barrier_func+0x10/0x10 [ 242.836611] flush_delayed_work+0x3a/0x60 [ 242.836621] amdgpu_userqueue_ensure_ev_fence+0x2d/0xb0 [amdgpu] [ 242.836966] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.837171] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837365] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.837398] drm_ioctl+0x2a1/0x500 [drm] [ 242.837420] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.837622] ? srso_return_thunk+0x5/0x5f [ 242.837627] ? srso_return_thunk+0x5/0x5f [ 242.837630] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.837635] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.837811] __x64_sys_ioctl+0x99/0xd0 [ 242.837820] x64_sys_call+0x1209/0x20d0 [ 242.837825] do_syscall_64+0x51/0x120 [ 242.837830] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.837835] RIP: 0033:0x7f2f33f1a94f [ 242.837838] RSP: 002b:00007f2f24ffea30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.837842] RAX: ffffffffffffffda RBX: 00007f2f24ffebd0 RCX: 00007f2f33f1a94f [ 242.837845] RDX: 00007f2f24ffebd0 RSI: 00000000c0306457 RDI: 000000000000000d [ 242.837847] RBP: 00007f2f24ffeab0 R08: 0000000000000000 R09: 0000000000000000 [ 242.837849] R10: 00007f2f24ffecd0 R11: 0000000000000246 R12: 00007f2f25000640 [ 242.837851] R13: 00000000c0306457 R14: 000000000000000d R15: 00007fff3b39c1e0 [ 242.837858] [ 242.837865] INFO: task Xwayland:cs0:1517 blocked for more than 120 seconds. [ 242.837869] Tainted: G OE 6.12.0-rc2rebased-oct-24+ #4 [ 242.837872] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 242.837874] task:Xwayland:cs0 state:D stack:0 pid:1517 tgid:1338 ppid:1282 flags:0x00004002 [ 242.837878] Call Trace: [ 242.837880] [ 242.837883] __schedule+0x3e0/0xb10 [ 242.837890] schedule+0x31/0x120 [ 242.837894] schedule_preempt_disabled+0x1c/0x30 [ 242.837897] __mutex_lock.constprop.0+0x386/0x6e0 [ 242.837902] ? srso_return_thunk+0x5/0x5f [ 242.837905] ? __timer_delete_sync+0x81/0xe0 [ 242.837911] __mutex_lock_slowpath+0x13/0x20 [ 242.837915] mutex_lock+0x3b/0x50 [ 242.837919] amdgpu_userqueue_ensure_ev_fence+0x35/0xb0 [amdgpu] [ 242.838138] amdgpu_userq_signal_ioctl+0x959/0xec0 [amdgpu] [ 242.838340] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838531] drm_ioctl_kernel+0xae/0x100 [drm] [ 242.838559] drm_ioctl+0x2a1/0x500 [drm] [ 242.838580] ? __pfx_amdgpu_userq_signal_ioctl+0x10/0x10 [amdgpu] [ 242.838778] ? srso_return_thunk+0x5/0x5f [ 242.838783] ? srso_return_thunk+0x5/0x5f [ 242.838786] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 242.838791] amdgpu_drm_ioctl+0x4f/0x90 [amdgpu] [ 242.838967] __x64_sys_ioctl+0x99/0xd0 [ 242.838972] x64_sys_call+0x1209/0x20d0 [ 242.838975] do_syscall_64+0x51/0x120 [ 242.838979] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 242.838982] RIP: 0033:0x7f9118b1a94f [ 242.838985] RSP: 002b:00007f910cdff760 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 242.838989] RAX: ffffffffffffffda RBX: 00007f910cdff910 RCX: 00007f9118b1a94f [ 242.838991] RDX: 00007f910cdff910 RSI: 00000000c0306457 RDI: 000000000000000c [ 242.838993] RBP: 00007f910cdff7e0 R08: 0000000000000000 R09: 0000000000000001 [ 242.838995] R10: 00007f910cdff9d4 R11: 0000000000000246 R12: 00007f910ce00640 [ 242.838997] R13: 00000000c0306457 R14: 000000000000000c R15: 00007fff9dd11d10 [ 242.839004] v2: Addressed review comemnts from Christian. v3/v4: Addressed review comemnts from Christian. - Move drm_exec drm_exec loop after userq fence create. - cleanup the newly created userq fence in case of error. v5 - Addressed review comemnts from Christian. - Create a new amdgpu_userq_fence_alloc() function for allocation. - Calling dma_fence_put for cleanup procedure. - make amdgpu_userq_fence_create() function static. - drm_exec_init is called after mutex_unlock. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 71 ++++++++++++------- .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 567a5ffa7765..a4953d668972 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -197,11 +197,18 @@ void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv) kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy); } -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f) +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ +static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence) +{ + *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); + return *userq_fence ? 0 : -ENOMEM; +} + +static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, + struct amdgpu_userq_fence *userq_fence, + u64 seq, struct dma_fence **f) { struct amdgpu_userq_fence_driver *fence_drv; - struct amdgpu_userq_fence *userq_fence; struct dma_fence *fence; unsigned long flags; @@ -209,10 +216,6 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, if (!fence_drv) return -EINVAL; - userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC); - if (!userq_fence) - return -ENOMEM; - spin_lock_init(&userq_fence->lock); INIT_LIST_HEAD(&userq_fence->link); fence = &userq_fence->base; @@ -266,6 +269,7 @@ int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, return 0; } +#endif static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f) { @@ -383,6 +387,11 @@ map_error: return r; } +static void amdgpu_userq_fence_cleanup(struct dma_fence *fence) +{ + dma_fence_put(fence); +} + int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -392,6 +401,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, struct drm_gem_object **gobj_write = NULL; struct drm_gem_object **gobj_read = NULL; struct amdgpu_usermode_queue *queue; + struct amdgpu_userq_fence *userq_fence; struct drm_syncobj **syncobj = NULL; u32 *bo_handles_write, num_write_bo_handles; u32 *syncobj_handles, num_syncobj_handles; @@ -475,6 +485,29 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, goto put_gobj_write; } + r = amdgpu_userq_fence_read_wptr(queue, &wptr); + if (r) + goto put_gobj_write; + + r = amdgpu_userq_fence_alloc(&userq_fence); + if (r) + goto put_gobj_write; + + /* We are here means UQ is active, make sure the eviction fence is valid */ + amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); + + /* Create a new fence */ + r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence); + if (r) { + mutex_unlock(&userq_mgr->userq_mutex); + kmem_cache_free(amdgpu_userq_fence_slab, userq_fence); + goto put_gobj_write; + } + + dma_fence_put(queue->last_fence); + queue->last_fence = dma_fence_get(fence); + mutex_unlock(&userq_mgr->userq_mutex); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, (num_read_bo_handles + num_write_bo_handles)); @@ -482,31 +515,19 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data, drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1); drm_exec_retry_on_contention(&exec); - if (r) + if (r) { + amdgpu_userq_fence_cleanup(fence); goto exec_fini; + } } - r = amdgpu_userq_fence_read_wptr(queue, &wptr); - if (r) - goto exec_fini; - - /* Create a new fence */ - r = amdgpu_userq_fence_create(queue, wptr, &fence); - if (r) - goto exec_fini; - - /* We are here means UQ is active, make sure the eviction fence is valid */ - amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); - - dma_fence_put(queue->last_fence); - queue->last_fence = dma_fence_get(fence); - mutex_unlock(&userq_mgr->userq_mutex); - for (i = 0; i < num_read_bo_handles; i++) { if (!gobj_read || !gobj_read[i]->resv) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h index f1a90840ac1f..f0a91cc02880 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h @@ -61,8 +61,7 @@ struct amdgpu_userq_fence_driver { int amdgpu_userq_fence_slab_init(void); void amdgpu_userq_fence_slab_fini(void); -int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq, - u64 seq, struct dma_fence **f); + void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv); void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv); int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 2eac83ba8bdf..f1d4e29772a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -455,7 +455,7 @@ amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr) bool clear, unlock; int ret = 0; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { ret = amdgpu_vm_lock_pd(vm, &exec, 2); drm_exec_retry_on_contention(&exec); -- 2.51.0 From 29adc5c2dd7a0e8dba9aac38a3116df38220dc4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 16:08:02 -0500 Subject: [PATCH 03/16] drm/amdgpu/userq: fix hardcoded uq functions Use the IP type to look up the userq functions rather than hardcoding it. Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index f1d4e29772a5..0664e04828c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -415,11 +415,11 @@ amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Resume all the queues for this process */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret = userq_funcs->resume(uq_mgr, queue); + } if (ret) DRM_ERROR("Failed to resume all the queue\n"); @@ -570,11 +570,11 @@ amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr) int queue_id; int ret = 0; - userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX]; - /* Try to suspend all the queues in this process ctx */ - idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) + idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { + userq_funcs = adev->userq_funcs[queue->queue_type]; ret += userq_funcs->suspend(uq_mgr, queue); + } if (ret) DRM_ERROR("Couldn't suspend all the queues\n"); -- 2.51.0 From 2a060b3ae92ec1951a8a94bf64580ccb4fedf38c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2025 09:44:39 -0500 Subject: [PATCH 04/16] drm/amdgpu/userq: handle runtime pm Take a reference when we create a queue and drop it when we destroy the queue. We need to keep the device active while user queues are active. v2: squash in fix from Sunil v3: squash in fix from Prike Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c index 9a6a5553bbc3..b469b800119f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c @@ -26,6 +26,7 @@ #include "mes_userqueue.h" #include "amdgpu_userq_fence.h" #include "v11_structs.h" +#include #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -287,6 +288,12 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr, queue->userq_prop = userq_props; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + dev_err(adev->dev, "pm_runtime_get_sync() failed for userqueue mqd create\n"); + goto deference_pm; + } + r = mqd_hw_default->init_mqd(adev, (void *)queue->mqd.cpu_ptr, userq_props); if (r) { DRM_ERROR("Failed to initialize MQD for userqueue\n"); @@ -321,6 +328,9 @@ free_ctx: free_mqd: amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); +deference_pm: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); free_props: kfree(userq_props); @@ -330,14 +340,19 @@ free_props: static void mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, - struct amdgpu_usermode_queue *queue) + struct amdgpu_usermode_queue *queue) { + struct amdgpu_device *adev = uq_mgr->adev; + if (queue->queue_active) mes_userq_unmap(uq_mgr, queue); amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj); kfree(queue->userq_prop); amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd); + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr, -- 2.51.0 From df85baa767ca39c0a28d56a5a02251844c122a09 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:14:35 -0500 Subject: [PATCH 05/16] drm/amdgpu: return an error in the userq IOCTL when DRM_AMDGPU_NAVI3X_USERQ=n I'd swear this was already fixed, but I guess the patch never landed. Add it now. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index 0664e04828c0..a78a3728f6b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -402,7 +402,7 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - return 0; + return -ENOTSUPP; } #endif -- 2.51.0 From c4f42c8d0b97c2f16a4fc90f77eb3c0315c4cf6b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:37:31 -0500 Subject: [PATCH 06/16] drm/amdgpu/Kconfig: fix wording of DRM_AMDGPU_NAVI3X_USERQ The feature is not navi3x specific at this point. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 5bb44ea80164..b8f39377b24a 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -97,13 +97,13 @@ config DRM_AMDGPU_WERROR Only enable this if you are warning code for amdgpu.ko. config DRM_AMDGPU_NAVI3X_USERQ - bool "Enable Navi 3x gfx usermode queues" + bool "Enable amdgpu usermode queues" depends on DRM_AMDGPU depends on BROKEN default n help Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute - workload submission. This feature is experimental and supported on Navi 3X only. + workload submission. This feature is experimental and supported on GFX11+. source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" -- 2.51.0 From f36e4876c8e1ea61c48f6048bfcfd540c7abef2f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:45:37 -0500 Subject: [PATCH 07/16] drm/amdgpu/gfx11: fix config guard s/CONFIG_DRM_AMD_USERQ_GFX/CONFIG_DRM_AMDGPU_NAVI3X_USERQ/ Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index d3b4018580c6..80af0b92d6b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1639,7 +1639,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_mec = 1; adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; -#ifdef CONFIG_DRM_AMD_USERQ_GFX +#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; #endif -- 2.51.0 From 5ca4095960a8a8b7f24f02af6e5ce7b284e04559 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:50:11 -0500 Subject: [PATCH 08/16] drm/amdgpu: add userq firmware version checks Currently disabled until the firmwares are officially released. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 14 ++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 4 +++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 80af0b92d6b1..f98c9c8253f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1623,8 +1623,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; case IP_VERSION(11, 0, 1): @@ -1640,8 +1643,11 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 0e95c1cfeca6..58f17d95b2f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1420,8 +1420,11 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->gfx.mec.num_pipe_per_mec = 2; adev->gfx.mec.num_queue_per_pipe = 4; #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; - adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) { + adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; + adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; + } #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 9bc3c7a35d18..3aa4fec4d9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1381,7 +1381,9 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 3514089acd94..8e3aa4536e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1383,7 +1383,9 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ - adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; + /* add firmware version checks here */ + if (0) + adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; #endif -- 2.51.0 From 665de8c94792d095c9dbd0ef16b0532f546aa8f9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Feb 2025 16:46:52 -0500 Subject: [PATCH 09/16] drm/amdgpu/userq: remove BROKEN from config This can be enabled now. We have the firmware checks in place. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index b8f39377b24a..7b95221d2f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -99,7 +99,6 @@ config DRM_AMDGPU_WERROR config DRM_AMDGPU_NAVI3X_USERQ bool "Enable amdgpu usermode queues" depends on DRM_AMDGPU - depends on BROKEN default n help Choose this option to enable GFX usermode queue support for GFX/SDMA/Compute -- 2.51.0 From ecdb0b32e518a69a724d3cb7a6e5a4d2be2db8a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 28 Feb 2025 14:55:57 -0500 Subject: [PATCH 10/16] drm/amdgpu/userq: move the header to amdgpu directory To align with other headers. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/{include => amdgpu}/amdgpu_userqueue.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename drivers/gpu/drm/amd/{include => amdgpu}/amdgpu_userqueue.h (100%) diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h similarity index 100% rename from drivers/gpu/drm/amd/include/amdgpu_userqueue.h rename to drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.h -- 2.51.0 From ad6c120f6888033b1eb198a7a15ed4c6355edf6d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 18 Feb 2025 18:56:25 +0530 Subject: [PATCH 11/16] drm/amdgpu: fix the memleak caused by fence not released MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Encountering a taint issue during the unloading of gpu_sched due to the fence not being released/put. In this context, amdgpu_vm_clear_freed is responsible for creating a job to update the page table (PT). It allocates kmem_cache for drm_sched_fence and returns the finished fence associated with job->base.s_fence. In case of Usermode queue this finished fence is added to the timeline sync object through amdgpu_gem_update_bo_mapping, which is utilized by user space to ensure the completion of the PT update. [ 508.900587] ============================================================================= [ 508.900605] BUG drm_sched_fence (Tainted: G N): Objects remaining in drm_sched_fence on __kmem_cache_shutdown() [ 508.900617] ----------------------------------------------------------------------------- [ 508.900627] Slab 0xffffe0cc04548780 objects=32 used=2 fp=0xffff8ea81521f000 flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff) [ 508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G N 6.12.0+ #1 [ 508.900651] Tainted: [N]=TEST [ 508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021 [ 508.900656] Call Trace: [ 508.900659] [ 508.900665] dump_stack_lvl+0x70/0x90 [ 508.900674] dump_stack+0x14/0x20 [ 508.900678] slab_err+0xcb/0x110 [ 508.900687] ? srso_return_thunk+0x5/0x5f [ 508.900692] ? try_to_grab_pending+0xd3/0x1d0 [ 508.900697] ? srso_return_thunk+0x5/0x5f [ 508.900701] ? mutex_lock+0x17/0x50 [ 508.900708] __kmem_cache_shutdown+0x144/0x2d0 [ 508.900713] ? flush_rcu_work+0x50/0x60 [ 508.900719] kmem_cache_destroy+0x46/0x1f0 [ 508.900728] drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched] [ 508.900736] __do_sys_delete_module.constprop.0+0x184/0x320 [ 508.900744] ? srso_return_thunk+0x5/0x5f [ 508.900747] ? debug_smp_processor_id+0x1b/0x30 [ 508.900754] __x64_sys_delete_module+0x16/0x20 [ 508.900758] x64_sys_call+0xdf/0x20d0 [ 508.900763] do_syscall_64+0x51/0x120 [ 508.900769] entry_SYSCALL_64_after_hwframe+0x76/0x7e v2: call dma_fence_put in amdgpu_gem_va_update_vm v3: Addressed review comments from Christian. - calling amdgpu_gem_update_timeline_node before switch. - puting a dma_fence in case of error or !timeline_syncobj. v4: Addressed review comments from Christian. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Cc: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Le Ma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 28 ++++++++++++++----------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b9b80b0b60ca..f03fc3cf4d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -934,6 +934,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + if (r) + goto error; + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -960,22 +968,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { - - r = amdgpu_gem_update_timeline_node(filp, - args->vm_timeline_syncobj_out, - args->vm_timeline_point, - &timeline_syncobj, - &timeline_chain); - fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - if (!r) + if (timeline_syncobj) amdgpu_gem_update_bo_mapping(filp, bo_va, - args->operation, - args->vm_timeline_point, - fence, timeline_syncobj, - timeline_chain); + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + else + dma_fence_put(fence); + } error: -- 2.51.0 From 158bfbc72c5d7675039df540b120ccdd37bca5f0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:31:57 -0500 Subject: [PATCH 12/16] drm/amdgpu: validate user queue parameters Make sure these are set properly to ensure compatibility if we ever update the IOCTL interface. Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c index a78a3728f6b8..a02614cbda36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c @@ -380,12 +380,26 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_USERQ_OP_CREATE: + if (args->in._pad) + return -EINVAL; r = amdgpu_userqueue_create(filp, args); if (r) DRM_ERROR("Failed to create usermode queue\n"); break; case AMDGPU_USERQ_OP_FREE: + if (args->in.ip_type || + args->in.doorbell_handle || + args->in.doorbell_offset || + args->in._pad || + args->in.queue_va || + args->in.queue_size || + args->in.rptr_va || + args->in.wptr_va || + args->in.wptr_va || + args->in.mqd || + args->in.mqd_size) + return -EINVAL; r = amdgpu_userqueue_destroy(filp, args->in.queue_id); if (r) DRM_ERROR("Failed to destroy usermode queue\n"); -- 2.51.0 From cb17fff3a254c3beb02101c68b64066797ec9028 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 15:39:02 -0500 Subject: [PATCH 13/16] drm/amdgpu/mes: remove unused functions Leftover from the MES self tests that were removed previously. Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 800 ------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 41 -- 2 files changed, 841 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 503add938634..1d355b0fd4b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -39,42 +39,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - int ip_type, uint64_t *doorbell_index) -{ - unsigned int offset, found; - struct amdgpu_mes *mes = &adev->mes; - - if (ip_type == AMDGPU_RING_TYPE_SDMA) - offset = adev->doorbell_index.sdma_engine[0]; - else - offset = 0; - - found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); - if (found >= mes->num_mes_dbs) { - DRM_WARN("No doorbell available\n"); - return -ENOSPC; - } - - set_bit(found, mes->doorbell_bitmap); - - /* Get the absolute doorbell index on BAR */ - *doorbell_index = mes->db_start_dw_offset + found * 2; - return 0; -} - -static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - uint32_t doorbell_index) -{ - unsigned int old, rel_index; - struct amdgpu_mes *mes = &adev->mes; - - /* Find the relative index of the doorbell in this object */ - rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; - old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); - WARN_ON(!old); -} - static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { int i; @@ -237,244 +201,6 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) mutex_destroy(&adev->mes.mutex_hidden); } -static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) -{ - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); -} - -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm) -{ - struct amdgpu_mes_process *process; - int r; - - /* allocate the mes process buffer */ - process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL); - if (!process) { - DRM_ERROR("no more memory to create mes process\n"); - return -ENOMEM; - } - - /* allocate the process context bo and map it */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_memory; - } - memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* add the mes process to idr list */ - r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to lock pasid=%d\n", pasid); - goto clean_up_ctx; - } - - INIT_LIST_HEAD(&process->gang_list); - process->vm = vm; - process->pasid = pasid; - process->process_quantum = adev->mes.default_process_quantum; - process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); -clean_up_memory: - kfree(process); - return r; -} - -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang, *tmp1; - struct amdgpu_mes_queue *queue, *tmp2; - struct mes_remove_queue_input queue_input; - unsigned long flags; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_WARN("pasid %d doesn't exist\n", pasid); - amdgpu_mes_unlock(&adev->mes); - return; - } - - /* Remove all queues from hardware */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, - &queue_input); - if (r) - DRM_WARN("failed to remove hardware queue\n"); - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - } - - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); - - /* free all memory allocated by the process */ - list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) { - /* free all queues in the gang */ - list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) { - amdgpu_mes_queue_free_mqd(queue); - list_del(&queue->list); - kfree(queue); - } - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - list_del(&gang->list); - kfree(gang); - - } - amdgpu_bo_free_kernel(&process->proc_ctx_bo, - &process->proc_ctx_gpu_addr, - &process->proc_ctx_cpu_ptr); - kfree(process); -} - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id) -{ - struct amdgpu_mes_process *process; - struct amdgpu_mes_gang *gang; - int r; - - /* allocate the mes gang buffer */ - gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL); - if (!gang) { - return -ENOMEM; - } - - /* allocate the gang context bo and map it to cpu space */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - if (r) { - DRM_ERROR("failed to allocate process context bo\n"); - goto clean_up_mem; - } - memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - process = idr_find(&adev->mes.pasid_idr, pasid); - if (!process) { - DRM_ERROR("pasid %d doesn't exist\n", pasid); - r = -EINVAL; - goto clean_up_ctx; - } - - /* add the mes gang to idr list */ - r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, - GFP_KERNEL); - if (r < 0) { - DRM_ERROR("failed to allocate idr for gang\n"); - goto clean_up_ctx; - } - - gang->gang_id = r; - *gang_id = r; - - INIT_LIST_HEAD(&gang->queue_list); - gang->process = process; - gang->priority = gprops->priority; - gang->gang_quantum = gprops->gang_quantum ? - gprops->gang_quantum : adev->mes.default_gang_quantum; - gang->global_priority_level = gprops->global_priority_level; - gang->inprocess_gang_priority = gprops->inprocess_gang_priority; - list_add_tail(&gang->list, &process->gang_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_ctx: - amdgpu_mes_unlock(&adev->mes); - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); -clean_up_mem: - kfree(gang); - return r; -} - -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) -{ - struct amdgpu_mes_gang *gang; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - - if (!list_empty(&gang->queue_list)) { - DRM_ERROR("queue list is not empty\n"); - amdgpu_mes_unlock(&adev->mes); - return -EBUSY; - } - - idr_remove(&adev->mes.gang_id_idr, gang->gang_id); - list_del(&gang->list); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_bo_free_kernel(&gang->gang_ctx_bo, - &gang->gang_ctx_gpu_addr, - &gang->gang_ctx_cpu_ptr); - - kfree(gang); - - return 0; -} - int amdgpu_mes_suspend(struct amdgpu_device *adev) { struct mes_suspend_gang_input input; @@ -523,241 +249,6 @@ int amdgpu_mes_resume(struct amdgpu_device *adev) return r; } -static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - u32 mqd_size = mqd_mgr->mqd_size; - int r; - - r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - &q->mqd_obj, - &q->mqd_gpu_addr, &q->mqd_cpu_ptr); - if (r) { - dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r); - return r; - } - memset(q->mqd_cpu_ptr, 0, mqd_size); - - r = amdgpu_bo_reserve(q->mqd_obj, false); - if (unlikely(r != 0)) - goto clean_up; - - return 0; - -clean_up: - amdgpu_bo_free_kernel(&q->mqd_obj, - &q->mqd_gpu_addr, - &q->mqd_cpu_ptr); - return r; -} - -static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, - struct amdgpu_mes_queue *q, - struct amdgpu_mes_queue_properties *p) -{ - struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; - struct amdgpu_mqd_prop mqd_prop = {0}; - - mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; - mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; - mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; - mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; - mqd_prop.queue_size = p->queue_size; - mqd_prop.use_doorbell = true; - mqd_prop.doorbell_index = p->doorbell_off; - mqd_prop.eop_gpu_addr = p->eop_gpu_addr; - mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; - mqd_prop.hqd_queue_priority = p->hqd_queue_priority; - mqd_prop.hqd_active = false; - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - mutex_lock(&adev->srbm_mutex); - amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); - } - - mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); - - if (p->queue_type == AMDGPU_RING_TYPE_GFX || - p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { - amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } - - amdgpu_bo_unreserve(q->mqd_obj); -} - -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id) -{ - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_add_queue_input queue_input; - unsigned long flags; - int r; - - memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); - - /* allocate the mes queue buffer */ - queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); - if (!queue) { - DRM_ERROR("Failed to allocate memory for queue\n"); - return -ENOMEM; - } - - /* Allocate the queue mqd */ - r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); - if (r) - goto clean_up_memory; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - r = -EINVAL; - goto clean_up_mqd; - } - - /* add the mes gang to idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, - GFP_ATOMIC); - if (r < 0) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - goto clean_up_mqd; - } - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - *queue_id = queue->queue_id = r; - - /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, - qprops->queue_type, - &qprops->doorbell_off); - if (r) - goto clean_up_queue_id; - - /* initialize the queue mqd */ - amdgpu_mes_queue_init_mqd(adev, queue, qprops); - - /* add hw queue to mes */ - queue_input.process_id = gang->process->pasid; - - queue_input.page_table_base_addr = - adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - - adev->gmc.vram_start; - - queue_input.process_va_start = 0; - queue_input.process_va_end = adev->vm_manager.max_pfn - 1; - queue_input.process_quantum = gang->process->process_quantum; - queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; - queue_input.gang_quantum = gang->gang_quantum; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; - queue_input.gang_global_priority_level = gang->global_priority_level; - queue_input.doorbell_offset = qprops->doorbell_off; - queue_input.mqd_addr = queue->mqd_gpu_addr; - queue_input.wptr_addr = qprops->wptr_gpu_addr; - queue_input.wptr_mc_addr = qprops->wptr_mc_addr; - queue_input.queue_type = qprops->queue_type; - queue_input.paging = qprops->paging; - queue_input.is_kfd_process = 0; - - r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); - if (r) { - DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n", - qprops->doorbell_off); - goto clean_up_doorbell; - } - - DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, " - "queue type=%d, doorbell=0x%llx\n", - gang->process->pasid, gang_id, qprops->queue_type, - qprops->doorbell_off); - - queue->ring = qprops->ring; - queue->doorbell_off = qprops->doorbell_off; - queue->wptr_gpu_addr = qprops->wptr_gpu_addr; - queue->queue_type = qprops->queue_type; - queue->paging = qprops->paging; - queue->gang = gang; - queue->ring->mqd_ptr = queue->mqd_cpu_ptr; - list_add_tail(&queue->list, &gang->queue_list); - - amdgpu_mes_unlock(&adev->mes); - return 0; - -clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); -clean_up_queue_id: - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - idr_remove(&adev->mes.queue_id_idr, queue->queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); -clean_up_mqd: - amdgpu_mes_unlock(&adev->mes); - amdgpu_mes_queue_free_mqd(queue); -clean_up_memory: - kfree(queue); - return r; -} - -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) -{ - unsigned long flags; - struct amdgpu_mes_queue *queue; - struct amdgpu_mes_gang *gang; - struct mes_remove_queue_input queue_input; - int r; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - - /* remove the mes gang from idr list */ - spin_lock_irqsave(&adev->mes.queue_id_lock, flags); - - queue = idr_find(&adev->mes.queue_id_idr, queue_id); - if (!queue) { - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - amdgpu_mes_unlock(&adev->mes); - DRM_ERROR("queue id %d doesn't exist\n", queue_id); - return -EINVAL; - } - - idr_remove(&adev->mes.queue_id_idr, queue_id); - spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags); - - DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n", - queue->doorbell_off); - - gang = queue->gang; - queue_input.doorbell_offset = queue->doorbell_off; - queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; - - r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); - if (r) - DRM_ERROR("failed to remove hardware queue, queue id = %d\n", - queue_id); - - list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); - amdgpu_mes_unlock(&adev->mes); - - amdgpu_mes_queue_free_mqd(queue); - kfree(queue); - return 0; -} - int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id) { unsigned long flags; @@ -1071,25 +562,6 @@ int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, return r; } -static void -amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_mes_queue_properties *props) -{ - props->queue_type = ring->funcs->type; - props->hqd_base_gpu_addr = ring->gpu_addr; - props->rptr_gpu_addr = ring->rptr_gpu_addr; - props->wptr_gpu_addr = ring->wptr_gpu_addr; - props->wptr_mc_addr = - ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; - props->queue_size = ring->ring_size; - props->eop_gpu_addr = ring->eop_gpu_addr; - props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; - props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM; - props->paging = false; - props->ring = ring; -} - #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng) \ do { \ if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ @@ -1126,284 +598,12 @@ int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) return -EINVAL; } -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out) -{ - struct amdgpu_ring *ring; - struct amdgpu_mes_gang *gang; - struct amdgpu_mes_queue_properties qprops = {0}; - int r, queue_id, pasid; - - /* - * Avoid taking any other locks under MES lock to avoid circular - * lock dependencies. - */ - amdgpu_mes_lock(&adev->mes); - gang = idr_find(&adev->mes.gang_id_idr, gang_id); - if (!gang) { - DRM_ERROR("gang id %d doesn't exist\n", gang_id); - amdgpu_mes_unlock(&adev->mes); - return -EINVAL; - } - pasid = gang->process->pasid; - - ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL); - if (!ring) { - amdgpu_mes_unlock(&adev->mes); - return -ENOMEM; - } - - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->is_mes_queue = true; - ring->mes_ctx = ctx_data; - ring->idx = idx; - ring->no_scheduler = true; - - if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { - int offset = offsetof(struct amdgpu_mes_ctx_meta_data, - compute[ring->idx].mec_hpd); - ring->eop_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } - - switch (queue_type) { - case AMDGPU_RING_TYPE_GFX: - ring->funcs = adev->gfx.gfx_ring[0].funcs; - ring->me = adev->gfx.gfx_ring[0].me; - ring->pipe = adev->gfx.gfx_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_COMPUTE: - ring->funcs = adev->gfx.compute_ring[0].funcs; - ring->me = adev->gfx.compute_ring[0].me; - ring->pipe = adev->gfx.compute_ring[0].pipe; - break; - case AMDGPU_RING_TYPE_SDMA: - ring->funcs = adev->sdma.instance[0].ring.funcs; - break; - default: - BUG(); - } - - r = amdgpu_ring_init(adev, ring, 1024, NULL, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) { - amdgpu_mes_unlock(&adev->mes); - goto clean_up_memory; - } - - amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); - - dma_fence_wait(gang->process->vm->last_update, false); - dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false); - amdgpu_mes_unlock(&adev->mes); - - r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); - if (r) - goto clean_up_ring; - - ring->hw_queue_id = queue_id; - ring->doorbell_index = qprops.doorbell_off; - - if (queue_type == AMDGPU_RING_TYPE_GFX) - sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id); - else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) - sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id, - queue_id); - else if (queue_type == AMDGPU_RING_TYPE_SDMA) - sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id, - queue_id); - else - BUG(); - - *out = ring; - return 0; - -clean_up_ring: - amdgpu_ring_fini(ring); -clean_up_memory: - kfree(ring); - return r; -} - -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring) - return; - - amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); - del_timer_sync(&ring->fence_drv.fallback_timer); - amdgpu_ring_fini(ring); - kfree(ring); -} - uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio) { return adev->mes.aggregated_doorbells[prio]; } -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - int r; - - r = amdgpu_bo_create_kernel(adev, - sizeof(struct amdgpu_mes_ctx_meta_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create CTX bo failed\n", r); - return r; - } - - if (!ctx_data->meta_data_obj) - return -ENOMEM; - - memset(ctx_data->meta_data_ptr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data)); - - return 0; -} - -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) -{ - if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, - &ctx_data->meta_data_mc_addr, - &ctx_data->meta_data_ptr); -} - -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va; - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); - if (!bo_va) { - DRM_ERROR("failed to create bo_va for meta data BO\n"); - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, - sizeof(struct amdgpu_mes_ctx_meta_data), - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error_del_bo_va; - } - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) { - DRM_ERROR("failed to update pdes on meta data\n"); - goto error_del_bo_va; - } - amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - ctx_data->meta_data_va = bo_va; - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, bo_va); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data) -{ - struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; - struct amdgpu_bo *bo = ctx_data->meta_data_obj; - struct amdgpu_vm *vm = bo_va->base.vm; - struct dma_fence *fence; - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, - &ctx_data->meta_data_obj->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - amdgpu_vm_bo_del(adev, bo_va); - if (!amdgpu_vm_ready(vm)) - goto out_unlock; - - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, - &fence); - if (r) - goto out_unlock; - if (fence) { - amdgpu_bo_fence(bo, fence, true); - fence = NULL; - } - - r = amdgpu_vm_clear_freed(adev, vm, &fence); - if (r || !fence) - goto out_unlock; - - dma_fence_wait(fence, false); - amdgpu_bo_fence(bo, fence, true); - dma_fence_put(fence); - -out_unlock: - if (unlikely(r < 0)) - dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - drm_exec_fini(&exec); - - return r; -} - int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) { const struct mes_firmware_header_v1_0 *mes_hdr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 78362a838212..be3390d26301 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -149,19 +149,6 @@ struct amdgpu_mes { }; -struct amdgpu_mes_process { - int pasid; - struct amdgpu_vm *vm; - uint64_t pd_gpu_addr; - struct amdgpu_bo *proc_ctx_bo; - uint64_t proc_ctx_gpu_addr; - void *proc_ctx_cpu_ptr; - uint64_t process_quantum; - struct list_head gang_list; - uint32_t doorbell_index; - struct mutex doorbell_lock; -}; - struct amdgpu_mes_gang { int gang_id; int priority; @@ -404,22 +391,9 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); int amdgpu_mes_init(struct amdgpu_device *adev); void amdgpu_mes_fini(struct amdgpu_device *adev); -int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, - struct amdgpu_vm *vm); -void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid); - -int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, - struct amdgpu_mes_gang_properties *gprops, - int *gang_id); -int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id); - int amdgpu_mes_suspend(struct amdgpu_device *adev); int amdgpu_mes_resume(struct amdgpu_device *adev); -int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, - struct amdgpu_mes_queue_properties *qprops, - int *queue_id); -int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id); int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type, int me_id, int pipe_id, int queue_id, int vmid); @@ -451,25 +425,10 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, bool trap_en); int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, uint64_t process_context_addr); -int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, - int queue_type, int idx, - struct amdgpu_mes_ctx_data *ctx_data, - struct amdgpu_ring **out); -void amdgpu_mes_remove_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring); uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, enum amdgpu_mes_priority_level prio); -int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); -void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_mes_ctx_data *ctx_data); -int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, - struct amdgpu_mes_ctx_data *ctx_data); - int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* -- 2.51.0 From 4220d2c7c41b8ea3fd154dc5678b05575653cba0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2025 13:47:33 -0400 Subject: [PATCH 14/16] drm/amdgpu: remove is_mes_queue flag MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This was leftover from MES bring up when we had MES user queues in the kernel. It's no longer used so remove it. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 112 ++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 14 -- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 84 +++--------- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 143 ++++--------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 67 ++------- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 4 - drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 4 - drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 166 +++++++---------------- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 83 ++++-------- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 83 ++++-------- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 164 +++++++--------------- 15 files changed, 259 insertions(+), 697 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2ea98ec60220..802743efa3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -163,12 +163,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow = false; } - if (!ring->sched.ready && !ring->is_mes_queue) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } - if (vm && !job->vmid && !ring->is_mes_queue) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 59acdbfe28d8..426834806fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -187,14 +187,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } #define amdgpu_ring_get_gpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : \ - (ring->adev->wb.gpu_addr + offset * 4)) + (ring->adev->wb.gpu_addr + offset * 4) #define amdgpu_ring_get_cpu_addr(ring, offset) \ - (ring->is_mes_queue ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - (&ring->adev->wb.wb[offset])) + (&ring->adev->wb.wb[offset]) /** * amdgpu_ring_init - init driver ring struct. @@ -243,57 +239,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->sched_score = sched_score; ring->vmid_wait = dma_fence_get_stub(); - if (!ring->is_mes_queue) { - ring->idx = adev->num_rings++; - adev->rings[ring->idx] = ring; - } + ring->idx = adev->num_rings++; + adev->rings[ring->idx] = ring; r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } - if (ring->is_mes_queue) { - ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RPTR_OFFS); - ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_WPTR_OFFS); - ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_FENCE_OFFS); - ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); - ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_COND_EXE_OFFS); - } else { - r = amdgpu_device_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); - if (r) { - dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); + if (r) { + dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); - if (r) { - dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; } ring->fence_gpu_addr = @@ -353,18 +334,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->cached_rptr = 0; /* Allocate ring buffer */ - if (ring->is_mes_queue) { - int offset = 0; - - BUG_ON(ring->ring_size > PAGE_SIZE*4); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_RING_OFFS); - ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - amdgpu_ring_clear_ring(ring); - - } else if (ring->ring_obj == NULL) { + if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, @@ -401,32 +371,26 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { /* Not to finish a ring which is not initialized */ - if (!(ring->adev) || - (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; ring->sched.ready = false; - if (!ring->is_mes_queue) { - amdgpu_device_wb_free(ring->adev, ring->rptr_offs); - amdgpu_device_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_device_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); - amdgpu_bo_free_kernel(&ring->ring_obj, - &ring->gpu_addr, - (void **)&ring->ring); - } else { - kfree(ring->fence_drv.fences); - } + amdgpu_bo_free_kernel(&ring->ring_obj, + &ring->gpu_addr, + (void **)&ring->ring); dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; ring->me = 0; - if (!ring->is_mes_queue) - ring->adev->rings[ring->idx] = NULL; + ring->adev->rings[ring->idx] = NULL; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bb2b66385223..615c3d5c5a8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -301,11 +301,6 @@ struct amdgpu_ring { unsigned num_hw_submission; atomic_t *sched_score; - /* used for mes */ - bool is_mes_queue; - uint32_t hw_queue_id; - struct amdgpu_mes_ctx_data *mes_ctx; - bool is_sw_ring; unsigned int entry_index; /* store the cached rptr to restore after reset */ @@ -435,15 +430,6 @@ static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, ring->ring[offset] = cur - offset; } -#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) - -#define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \ - (ring->is_mes_queue && ring->mes_ctx ? \ - (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ - NULL) - int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 807da4595893..c3c6f03190c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -76,22 +76,14 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) return 0; - if (ring->is_mes_queue) { - uint32_t offset = 0; + r = amdgpu_sdma_get_index_from_ring(ring, &index); - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - sdma[ring->idx].sdma_meta_data); - csa_mc_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - r = amdgpu_sdma_get_index_from_ring(ring, &index); - - if (r || index > 31) - csa_mc_addr = 0; - else - csa_mc_addr = amdgpu_csa_vaddr(adev) + - AMDGPU_CSA_SDMA_OFFSET + - index * AMDGPU_CSA_SDMA_SIZE; - } + if (r || index > 31) + csa_mc_addr = 0; + else + csa_mc_addr = amdgpu_csa_vaddr(adev) + + AMDGPU_CSA_SDMA_OFFSET + + index * AMDGPU_CSA_SDMA_SIZE; return csa_mc_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ce52b4d75e94..aa65d64fb15c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -817,7 +817,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + if (ring->funcs->emit_gds_switch && gds_switch_needed) { amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f98c9c8253f8..23f4d097fa8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -612,33 +612,18 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -665,12 +650,10 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -5757,10 +5740,6 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); } - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -5780,10 +5759,6 @@ static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - /* Currently, there is a high possibility to get wave ID mismatch * between ME and GDS, leading to a hw deadlock, because ME generates * different wave IDs than the GDS expects. This situation happens @@ -5841,8 +5816,7 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -5870,10 +5844,7 @@ static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { @@ -6102,28 +6073,13 @@ static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v10_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v10_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v10_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 58f17d95b2f5..1c7022103c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -506,33 +506,18 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t padding, offset; - - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - padding = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); - *cpu_ptr = cpu_to_le32(0xCAFEDEAD); - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) - return r; + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); - cpu_ptr = &adev->wb.wb[index]; + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; } ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); @@ -559,12 +544,10 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - if (!ring->is_mes_queue) - amdgpu_ib_free(&ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -4252,45 +4235,17 @@ static u64 gfx_v12_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -4315,42 +4270,14 @@ static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx12 now */ - } + BUG(); /* only DOORBELL method supported on gfx12 now */ } } @@ -4397,10 +4324,6 @@ static void gfx_v12_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x400000; - amdgpu_ring_write(ring, header); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4420,10 +4343,6 @@ static void gfx_v12_0_ring_emit_ib_compute(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - if (ring->is_mes_queue) - /* inherit vmid from mqd */ - control |= 0x40000000; - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ amdgpu_ring_write(ring, @@ -4463,8 +4382,7 @@ static void gfx_v12_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, lower_32_bits(seq)); amdgpu_ring_write(ring, upper_32_bits(seq)); - amdgpu_ring_write(ring, ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); + amdgpu_ring_write(ring, 0); } static void gfx_v12_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) @@ -4492,10 +4410,7 @@ static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v12_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - if (ring->is_mes_queue) - gfx_v12_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); - else - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); /* compute doesn't have PFP */ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c6125abddbb9..e62c77b3934a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5466,16 +5466,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5498,16 +5490,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5697,19 +5681,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5795,28 +5769,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 95d894a231fc..8ae4c031162b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -428,10 +428,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index ad099f136f84..5c91d4445418 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -393,10 +393,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index 05c026d0b0d9..6e15cff6d548 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -413,10 +413,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid struct amdgpu_device *adev = ring->adev; uint32_t reg; - /* MES fw manages IH_VMID_x_LUT updating */ - if (ring->is_mes_queue) - return; - if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 0dce59f4f6e2..e1348b6d9c6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -369,67 +369,36 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - AMDGPU_MES_PRIORITY_LEVEL_NORMAL); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, - ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -575,11 +544,9 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -1046,33 +1013,22 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1085,10 +1041,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1100,8 +1053,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1124,38 +1076,24 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, - AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1183,10 +1121,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1197,8 +1132,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2b39a03ff0c1..964f12afac9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -394,11 +394,9 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if ((flags & AMDGPU_FENCE_FLAG_INT)) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -903,33 +901,22 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -942,10 +929,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -957,8 +941,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -981,37 +964,23 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1039,10 +1008,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1053,8 +1019,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3aa4fec4d9e4..00dd7bfff01a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -377,11 +377,9 @@ static void sdma_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -921,33 +919,22 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -960,10 +947,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -975,8 +959,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -999,37 +982,23 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1057,10 +1026,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1071,8 +1037,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 8e3aa4536e5a..99744728f5cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -205,66 +205,39 @@ static uint64_t sdma_v7_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, - ring->me, - regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, + ring->me, + regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -408,11 +381,9 @@ static void sdma_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { - uint32_t ctx = ring->is_mes_queue ? - (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } } @@ -965,33 +936,22 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; } + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1004,10 +964,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1019,8 +976,7 @@ static int sdma_v7_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1043,37 +999,23 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; - volatile uint32_t *cpu_ptr = NULL; tmp = 0xCAFEDEAD; memset(&ib, 0, sizeof(ib)); - if (ring->is_mes_queue) { - uint32_t offset = 0; - offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); - ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = amdgpu_mes_ctx_get_offs(ring, - AMDGPU_MES_CTX_PADDING_OFFS); - gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - *cpu_ptr = tmp; - } else { - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } - gpu_addr = adev->wb.gpu_addr + (index * 4); - adev->wb.wb[index] = cpu_to_le32(tmp); + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; - } + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; } ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | @@ -1101,10 +1043,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto err1; } - if (ring->is_mes_queue) - tmp = le32_to_cpu(*cpu_ptr); - else - tmp = le32_to_cpu(adev->wb.wb[index]); + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) r = 0; @@ -1115,8 +1054,7 @@ err1: amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: - if (!ring->is_mes_queue) - amdgpu_device_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } -- 2.51.0 From 9e2bbba1d516b52c2ebc9875144f544025f9d5ef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 12:31:46 -0500 Subject: [PATCH 15/16] drm/amdgpu/mes: centralize gfx_hqd mask management Move it to amdgpu_mes to align with the compute and sdma hqd masks. No functional change. v2: rebase on new changes v3: misc optimizations Reviewed-by: Prike Liang Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 16 +++------------- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 15 +++------------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 1d355b0fd4b6..016af4e9c35f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -108,6 +108,27 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { + if (i >= adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me) + break; + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(12, 0, 0)) + /* + * GFX V12 has only one GFX pipe, but 8 queues in it. + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1-7 for MES scheduling + * mask = 1111 1110b + */ + adev->mes.gfx_hqd_mask[i] = 0xFE; + else + /* + * GFX pipe 0 queue 0 is being used by Kernel queue. + * Set GFX pipe 0 queue 1 for MES scheduling + * mask = 10b + */ + adev->mes.gfx_hqd_mask[i] = 0x2; + } + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) break; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index ccc19a40f03d..06b51867c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -669,18 +669,6 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, offsetof(union MESAPI__MISC, api_status)); } -static void mes_v11_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) -{ - /* - * GFX pipe 0 queue 0 is being used by Kernel queue. - * Set GFX pipe 0 queue 1 for MES scheduling - * mask = 10b - * GFX pipe 1 can't be used for MES due to HW limitation. - */ - pkt->gfx_hqd_mask[0] = 0x2; - pkt->gfx_hqd_mask[1] = 0; -} - static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) { int i; @@ -705,7 +693,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - mes_v11_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 801928555eff..8892858cfd9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -694,17 +694,6 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static void mes_v12_0_set_gfx_hqd_mask(union MESAPI_SET_HW_RESOURCES *pkt) -{ - /* - * GFX V12 has only one GFX pipe, but 8 queues in it. - * GFX pipe 0 queue 0 is being used by Kernel queue. - * Set GFX pipe 0 queue 1-7 for MES scheduling - * mask = 1111 1110b - */ - pkt->gfx_hqd_mask[0] = 0xFE; -} - static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; @@ -727,7 +716,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_pkt.compute_hqd_mask[i] = mes->compute_hqd_mask[i]; - mes_v12_0_set_gfx_hqd_mask(&mes_set_hw_res_pkt); + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; for (i = 0; i < MAX_SDMA_PIPES; i++) mes_set_hw_res_pkt.sdma_hqd_mask[i] = -- 2.51.0 From b6f190e6236af268e504d8ba62ab89b8ba5a1674 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Mar 2025 10:18:58 -0400 Subject: [PATCH 16/16] drm/amdgpu/mes: warn on unexpected pipe numbers Warn if the number of pipes exceeds what the MES supports. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 016af4e9c35f..c52071841226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -90,7 +90,7 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) int amdgpu_mes_init(struct amdgpu_device *adev) { - int i, r; + int i, r, num_pipes; adev->mes.adev = adev; @@ -108,8 +108,13 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; + num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me; + if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES) + dev_warn(adev->dev, "more gfx pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_GFX_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) { - if (i >= adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me) + if (i >= num_pipes) break; if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) @@ -129,14 +134,24 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = 0x2; } + num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec; + if (num_pipes > AMDGPU_MES_MAX_COMPUTE_PIPES) + dev_warn(adev->dev, "more compute pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + if (i >= num_pipes) break; adev->mes.compute_hqd_mask[i] = 0xc; } + num_pipes = adev->sdma.num_instances; + if (num_pipes > AMDGPU_MES_MAX_SDMA_PIPES) + dev_warn(adev->dev, "more SDMA pipes than supported by MES! (%d vs %d)\n", + num_pipes, AMDGPU_MES_MAX_SDMA_PIPES); + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (i >= adev->sdma.num_instances) + if (i >= num_pipes) break; adev->mes.sdma_hqd_mask[i] = 0xfc; } -- 2.51.0