From 7845438718411b0e6e354f77a10a7b8b51b01852 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Feb 2025 16:16:01 -0500 Subject: [PATCH 01/16] drm/amdgpu/mes: Add cleaner shader fence address handling in MES for GFX12 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit introduces enhancements to the handling of the cleaner shader fence in the AMDGPU MES driver: - The MES (Microcode Execution Scheduler) now sends a PM4 packet to the KIQ (Kernel Interface Queue) to request the cleaner shader, ensuring that requests are handled in a controlled manner and avoiding the race conditions. - The CP (Compute Processor) firmware has been updated to use a private bus for accessing specific registers, avoiding unnecessary operations that could lead to issues in VF (Virtual Function) mode. - The cleaner shader fence memory address is now set correctly in the `mes_set_hw_res_pkt` structure, allowing for proper synchronization of the cleaner shader execution. Cc: Christian König Cc: Srinivasan Shanmugam Suggested-by: Shaoyun Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 901e924e69ad..4b8b4e8b9c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -678,6 +678,9 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { + unsigned int alloc_size = AMDGPU_GPU_PAGE_SIZE; + int ret = 0; + struct amdgpu_device *adev = mes->adev; union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; memset(&mes_set_hw_res_1_pkt, 0, sizeof(mes_set_hw_res_1_pkt)); @@ -687,6 +690,19 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa; + ret = amdgpu_bo_create_kernel(adev, alloc_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mes->resource_1, + &mes->resource_1_gpu_addr, + &mes->resource_1_addr); + if (ret) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); + return ret; + } + + mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr = + mes->resource_1_gpu_addr; + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); @@ -1761,6 +1777,12 @@ failure: static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) { + struct amdgpu_device *adev = ip_block->adev; + + if (adev->enable_uni_mes) + amdgpu_bo_free_kernel(&adev->mes.resource_1, + &adev->mes.resource_1_gpu_addr, + &adev->mes.resource_1_addr); return 0; } -- 2.51.0 From 59af05d6a391575c5fe48e8ae693ff9cc5c682c9 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Tue, 11 Feb 2025 16:39:52 +0800 Subject: [PATCH 02/16] drm/amdgpu: Enable ACA by default for psp v13_0_12 Enable ACA by default for psp v13_0_12. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 44d13a60588d..3c3312bbfee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3760,8 +3760,9 @@ init_ras_enabled_flag: adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; - /* aca is disabled by default */ - adev->aca.is_enabled = false; + /* aca is disabled by default except for psp v13_0_12 */ + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12)); /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && -- 2.51.0 From b0bebbe4ea2a25937d341fa1f2ab2cd8ce339cad Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 3 Dec 2024 17:27:44 +0530 Subject: [PATCH 03/16] drm/amdgpu/umsch: remove vpe test from umsch MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit current test is more intrusive for user queue test Signed-off-by: Saleemkhan Jamadar Suggested-by: Christian Koenig Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 459 +------------------ 1 file changed, 1 insertion(+), 458 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index dde15c6a96e1..2cfa2447d13e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -32,463 +32,6 @@ #include "amdgpu_umsch_mm.h" #include "umsch_mm_v4_0.h" -struct umsch_mm_test_ctx_data { - uint8_t process_csa[PAGE_SIZE]; - uint8_t vpe_ctx_csa[PAGE_SIZE]; - uint8_t vcn_ctx_csa[PAGE_SIZE]; -}; - -struct umsch_mm_test_mqd_data { - uint8_t vpe_mqd[PAGE_SIZE]; - uint8_t vcn_mqd[PAGE_SIZE]; -}; - -struct umsch_mm_test_ring_data { - uint8_t vpe_ring[PAGE_SIZE]; - uint8_t vpe_ib[PAGE_SIZE]; - uint8_t vcn_ring[PAGE_SIZE]; - uint8_t vcn_ib[PAGE_SIZE]; -}; - -struct umsch_mm_test_queue_info { - uint64_t mqd_addr; - uint64_t csa_addr; - uint32_t doorbell_offset_0; - uint32_t doorbell_offset_1; - enum UMSCH_SWIP_ENGINE_TYPE engine; -}; - -struct umsch_mm_test { - struct amdgpu_bo *ctx_data_obj; - uint64_t ctx_data_gpu_addr; - uint32_t *ctx_data_cpu_addr; - - struct amdgpu_bo *mqd_data_obj; - uint64_t mqd_data_gpu_addr; - uint32_t *mqd_data_cpu_addr; - - struct amdgpu_bo *ring_data_obj; - uint64_t ring_data_gpu_addr; - uint32_t *ring_data_cpu_addr; - - - struct amdgpu_vm *vm; - struct amdgpu_bo_va *bo_va; - uint32_t pasid; - uint32_t vm_cntx_cntl; - uint32_t num_queues; -}; - -static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, - uint64_t addr, uint32_t size) -{ - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - *bo_va = amdgpu_vm_bo_add(adev, vm, bo); - if (!*bo_va) { - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) - goto error_del_bo_va; - - - r = amdgpu_vm_bo_update(adev, *bo_va, false); - if (r) - goto error_del_bo_va; - - amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) - goto error_del_bo_va; - - amdgpu_sync_fence(&sync, vm->last_update); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, *bo_va); - amdgpu_sync_free(&sync); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, - uint64_t addr) -{ - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - - r = amdgpu_vm_bo_unmap(adev, bo_va, addr); - if (r) - goto out_unlock; - - amdgpu_vm_bo_del(adev, bo_va); - -out_unlock: - drm_exec_fini(&exec); - - return r; -} - -static void setup_vpe_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; - uint64_t ring_gpu_addr = test->ring_data_gpu_addr; - - mqd->rb_base_lo = (ring_gpu_addr >> 8); - mqd->rb_base_hi = (ring_gpu_addr >> 40); - mqd->rb_size = PAGE_SIZE / 4; - mqd->wptr_val = 0; - mqd->rptr_val = 0; - mqd->unmapped = 1; - - if (adev->vpe.collaborate_mode) - memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); - - qinfo->mqd_addr = test->mqd_data_gpu_addr; - qinfo->csa_addr = test->ctx_data_gpu_addr + - offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = 0; - qinfo->doorbell_offset_1 = 0; -} - -static void setup_vcn_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ -} - -static int add_test_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct umsch_mm_add_queue_input queue_input = {}; - int r; - - queue_input.process_id = test->pasid; - queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(test->vm->root.bo); - - queue_input.process_va_start = 0; - queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; - - queue_input.process_quantum = 100000; /* 10ms */ - queue_input.process_csa_addr = test->ctx_data_gpu_addr + - offsetof(struct umsch_mm_test_ctx_data, process_csa); - - queue_input.context_quantum = 10000; /* 1ms */ - queue_input.context_csa_addr = qinfo->csa_addr; - - queue_input.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL; - queue_input.context_global_priority_level = CONTEXT_PRIORITY_LEVEL_NORMAL; - queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; - queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; - - queue_input.engine_type = qinfo->engine; - queue_input.mqd_addr = qinfo->mqd_addr; - queue_input.vm_context_cntl = test->vm_cntx_cntl; - - amdgpu_umsch_mm_lock(&adev->umsch_mm); - r = adev->umsch_mm.funcs->add_queue(&adev->umsch_mm, &queue_input); - amdgpu_umsch_mm_unlock(&adev->umsch_mm); - if (r) - return r; - - return 0; -} - -static int remove_test_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct umsch_mm_remove_queue_input queue_input = {}; - int r; - - queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; - queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; - queue_input.context_csa_addr = qinfo->csa_addr; - - amdgpu_umsch_mm_lock(&adev->umsch_mm); - r = adev->umsch_mm.funcs->remove_queue(&adev->umsch_mm, &queue_input); - amdgpu_umsch_mm_unlock(&adev->umsch_mm); - if (r) - return r; - - return 0; -} - -static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) -{ - struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; - uint32_t *ring = test->ring_data_cpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ring) / 4; - uint32_t *ib = test->ring_data_cpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ib) / 4; - uint64_t ib_gpu_addr = test->ring_data_gpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ib); - uint32_t *fence = ib + 2048 / 4; - uint64_t fence_gpu_addr = ib_gpu_addr + 2048; - const uint32_t test_pattern = 0xdeadbeef; - int i; - - ib[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); - ib[1] = lower_32_bits(fence_gpu_addr); - ib[2] = upper_32_bits(fence_gpu_addr); - ib[3] = test_pattern; - - ring[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0); - ring[1] = (ib_gpu_addr & 0xffffffe0); - ring[2] = upper_32_bits(ib_gpu_addr); - ring[3] = 4; - ring[4] = 0; - ring[5] = 0; - - mqd->wptr_val = (6 << 2); - if (adev->vpe.collaborate_mode) - (++mqd)->wptr_val = (6 << 2); - - WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); - - for (i = 0; i < adev->usec_timeout; i++) { - if (*fence == test_pattern) - return 0; - udelay(1); - } - - dev_err(adev->dev, "vpe queue submission timeout\n"); - - return -ETIMEDOUT; -} - -static int submit_vcn_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) -{ - return 0; -} - -static int setup_umsch_mm_test(struct amdgpu_device *adev, - struct umsch_mm_test *test) -{ - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; - int r; - - test->vm_cntx_cntl = hub->vm_cntx_cntl; - - test->vm = kzalloc(sizeof(*test->vm), GFP_KERNEL); - if (!test->vm) { - r = -ENOMEM; - return r; - } - - r = amdgpu_vm_init(adev, test->vm, -1); - if (r) - goto error_free_vm; - - r = amdgpu_pasid_alloc(16); - if (r < 0) - goto error_fini_vm; - test->pasid = r; - - r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ctx_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->ctx_data_obj, - &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); - if (r) - goto error_free_pasid; - - memset(test->ctx_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ctx_data)); - - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->mqd_data_obj, - &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); - if (r) - goto error_free_ctx_data_obj; - - memset(test->mqd_data_cpu_addr, 0, PAGE_SIZE); - - r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ring_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->ring_data_obj, - NULL, - (void **)&test->ring_data_cpu_addr); - if (r) - goto error_free_mqd_data_obj; - - memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); - - test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; - r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, - test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); - if (r) - goto error_free_ring_data_obj; - - return 0; - -error_free_ring_data_obj: - amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, - (void **)&test->ring_data_cpu_addr); -error_free_mqd_data_obj: - amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); -error_free_ctx_data_obj: - amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); -error_free_pasid: - amdgpu_pasid_free(test->pasid); -error_fini_vm: - amdgpu_vm_fini(adev, test->vm); -error_free_vm: - kfree(test->vm); - - return r; -} - -static void cleanup_umsch_mm_test(struct amdgpu_device *adev, - struct umsch_mm_test *test) -{ - unmap_ring_data(adev, test->vm, test->ring_data_obj, - test->bo_va, test->ring_data_gpu_addr); - amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); - amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, - (void **)&test->ring_data_cpu_addr); - amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); - amdgpu_pasid_free(test->pasid); - amdgpu_vm_fini(adev, test->vm); - kfree(test->vm); -} - -static int setup_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i, r; - - for (i = 0; i < test->num_queues; i++) { - if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) - setup_vpe_queue(adev, test, &qinfo[i]); - else - setup_vcn_queue(adev, test, &qinfo[i]); - - r = add_test_queue(adev, test, &qinfo[i]); - if (r) - return r; - } - - return 0; -} - -static int submit_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i, r; - - for (i = 0; i < test->num_queues; i++) { - if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) - r = submit_vpe_queue(adev, test); - else - r = submit_vcn_queue(adev, test); - if (r) - return r; - } - - return 0; -} - -static void cleanup_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i; - - for (i = 0; i < test->num_queues; i++) - remove_test_queue(adev, test, &qinfo[i]); -} - -static int umsch_mm_test(struct amdgpu_device *adev) -{ - struct umsch_mm_test_queue_info qinfo[] = { - { .engine = UMSCH_SWIP_ENGINE_TYPE_VPE }, - }; - struct umsch_mm_test test = { .num_queues = ARRAY_SIZE(qinfo) }; - int r; - - r = setup_umsch_mm_test(adev, &test); - if (r) - return r; - - r = setup_test_queues(adev, &test, qinfo); - if (r) - goto cleanup; - - r = submit_test_queues(adev, &test, qinfo); - if (r) - goto cleanup; - - cleanup_test_queues(adev, &test, qinfo); - cleanup_umsch_mm_test(adev, &test); - - return 0; - -cleanup: - cleanup_test_queues(adev, &test, qinfo); - cleanup_umsch_mm_test(adev, &test); - return r; -} - int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) { struct amdgpu_ring *ring = &umsch->ring; @@ -792,7 +335,7 @@ static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block) if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) return 0; - return umsch_mm_test(adev); + return 0; } static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block) -- 2.51.0 From 8c6631234557515a7567c6251505a98e9793c8a6 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 5 Feb 2025 15:46:42 +0800 Subject: [PATCH 04/16] drm/amdgpu: refine smu send msg debug log format remove unnecessary line breaks. [ 51.280860] amdgpu 0000:24:00.0: amdgpu: smu send message: GetEnabledSmuFeaturesHigh(13) param: 0x00000000, resp: 0x00000001, readval: 0x00003763 Fixes: 0cd2bc06de72 ("drm/amd/pm: enable amdgpu smu send message log") Signed-off-by: Yang Wang Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 9f55207ea9bc..d834d134ad2b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -459,8 +459,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, } if (read_arg) { smu_cmn_read_arg(smu, read_arg); - dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x,\ - readval: 0x%08x\n", + dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x, readval: 0x%08x\n", smu_get_message_name(smu, msg), index, param, reg, *read_arg); } else { dev_dbg(adev->dev, "smu send message: %s(%d) param: 0x%08x, resp: 0x%08x\n", -- 2.51.0 From abce7b4fc7c6d6a567872626e00c9b840be63b22 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Fri, 24 Jan 2025 20:29:08 +0530 Subject: [PATCH 05/16] drm/amdgpu: Per-instance init func for JPEG4_0_3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add helper functions to handle per-instance and per-core initialization and deinitialization in JPEG4_0_3. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 190 ++++++++++++----------- 1 file changed, 98 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 2a97302a22d3..574083c7378e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -525,6 +525,75 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); } +static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst) +{ + int jpeg_inst = GET_INST(JPEG, inst); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v4_0_3_disable_clock_gating(adev, inst); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); +} + +static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe)); + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset); +} + /** * jpeg_v4_0_3_start - start JPEG block * @@ -535,84 +604,42 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int i, j, jpeg_inst; + int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - jpeg_inst = GET_INST(JPEG, i); - - WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, - 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG( - JPEG, jpeg_inst, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_ON - << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); - - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regUVD_JPEG_POWER_STATUS), - 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - jpeg_v4_0_3_disable_clock_gating(adev, i); - - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - + jpeg_v4_0_3_start_inst(adev, i); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - int reg_offset = jpeg_v4_0_3_core_reg_offset(j); - ring = &adev->jpeg.inst[i].ring_dec[j]; - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC0_MASK << j, - ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); - - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_CNTL, - reg_offset, - (0x00000001L | 0x00000002L)); - WREG32_SOC15_OFFSET( - JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, - reg_offset, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET( - JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - reg_offset, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_RPTR, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_WPTR, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_CNTL, - reg_offset, 0x00000002L); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_SIZE, - reg_offset, ring->ring_size / 4); - ring->wptr = RREG32_SOC15_OFFSET( - JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, - reg_offset); + jpeg_v4_0_3_start_jrbc(ring); } } return 0; } +static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst) +{ + int jpeg_inst = GET_INST(JPEG, inst); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_3_enable_clock_gating(adev, inst); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); +} + /** * jpeg_v4_0_3_stop - stop JPEG block * @@ -622,31 +649,10 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) { - int i, jpeg_inst; + int i; - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - jpeg_inst = GET_INST(JPEG, i); - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - jpeg_v4_0_3_enable_clock_gating(adev, i); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, - 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG( - JPEG, jpeg_inst, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_OFF - << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); - } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + jpeg_v4_0_3_stop_inst(adev, i); return 0; } -- 2.51.0 From 74894ffc7d0cb6d99368b8c47386f4cc6c213a05 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 29 Jan 2025 19:03:27 +0530 Subject: [PATCH 06/16] drm/amdgpu: Add ring reset callback for JPEG4_0_0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add ring reset function callback for JPEG4_0_0 to recover from job timeouts without a full gpu reset. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index b0666090f52c..292d4a234ea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -143,14 +143,10 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ - adev->jpeg.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - return 0; + return r; } /** @@ -724,6 +720,16 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + if (amdgpu_sriov_vf(ring->adev)) + return -EINVAL; + + jpeg_v4_0_stop(ring->adev); + jpeg_v4_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .name = "jpeg_v4_0", .early_init = jpeg_v4_0_early_init, @@ -769,6 +775,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_ring_reset, }; static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) -- 2.51.0 From 03399d0bff2534e499878473bd0edc5dd8f99bbd Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 29 Jan 2025 19:11:35 +0530 Subject: [PATCH 07/16] drm/amdgpu: Add ring reset callback for JPEG3_0_0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add ring reset function callback for JPEG3_0_0 to recover from job timeouts without a full gpu reset. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a4acb7cb7ea0..9faa9c6809df 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -132,7 +132,10 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - return 0; + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -151,6 +154,8 @@ static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -550,6 +555,13 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v3_0_stop(ring->adev); + jpeg_v3_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .name = "jpeg_v3_0", .early_init = jpeg_v3_0_early_init, @@ -595,6 +607,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v3_0_ring_reset, }; static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) -- 2.51.0 From cb493aee4d40e84a60e2e4eca55c745b0835ac30 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Tue, 28 Jan 2025 09:17:08 +0530 Subject: [PATCH 08/16] drm/amdgpu: Per-instance init func for JPEG2_5_0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add helper functions to handle per-instance initialization and deinitialization in JPEG2_5_0. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 102 +++++++++++++------------ 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index b19724928ce4..0490b672d8d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -330,6 +330,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst) WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); } +static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i) +{ + struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v2_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); +} + /** * jpeg_v2_5_start - start JPEG block * @@ -339,52 +377,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst) */ static int jpeg_v2_5_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; + jpeg_v2_5_start_inst(adev, i); - ring = adev->jpeg.inst[i].ring_dec; - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - jpeg_v2_5_disable_clock_gating(adev, i); - - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); } return 0; } +static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i) +{ + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v2_5_enable_clock_gating(adev, i); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +} + /** * jpeg_v2_5_stop - stop JPEG block * @@ -399,18 +418,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - jpeg_v2_5_enable_clock_gating(adev, i); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + jpeg_v2_5_stop_inst(adev, i); } return 0; -- 2.51.0 From 09e24a0b5243adf6e977288beb34497910a00cfe Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 29 Jan 2025 19:18:11 +0530 Subject: [PATCH 09/16] drm/amdgpu: Add ring reset callback for JPEG2_5_0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add ring reset function callback for JPEG2_5_0 to recover from job timeouts without a full gpu reset. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 0490b672d8d3..0a2c1dee2430 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -167,7 +167,10 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - return 0; + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -186,6 +189,8 @@ static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -638,6 +643,13 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v2_5_stop_inst(ring->adev, ring->me); + jpeg_v2_5_start_inst(ring->adev, ring->me); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .name = "jpeg_v2_5", .early_init = jpeg_v2_5_early_init, @@ -700,6 +712,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_5_ring_reset, }; static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { @@ -730,6 +743,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_5_ring_reset, }; static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) -- 2.51.0 From 500c04d2a70876c9fd49070606d948e96d32760e Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Wed, 29 Jan 2025 19:34:06 +0530 Subject: [PATCH 10/16] drm/amdgpu: Add ring reset callback for JPEG2_0_0 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add ring reset function callback for JPEG2_0_0 to recover from job timeouts without a full gpu reset. Signed-off-by: Sathishkumar S Acked-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 8c61081746c6..75843a0e3bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -118,7 +118,10 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - return 0; + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -137,6 +140,8 @@ static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -759,6 +764,13 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v2_0_stop(ring->adev); + jpeg_v2_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .name = "jpeg_v2_0", .early_init = jpeg_v2_0_early_init, @@ -804,6 +816,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_0_ring_reset, }; static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) -- 2.51.0 From 80513e389765c8f9543b26d8fa4bbdf0e59ff8bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 13 Feb 2025 13:37:01 -0500 Subject: [PATCH 11/16] drm/amdgpu/gfx: only call mes for enforce isolation if supported This should not be called on chips without MES so check if MES is enabled and if the cleaner shader is supported. Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES") Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher Cc: Shaoyun Liu Cc: Srinivasan Shanmugam --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 27f5318c3a26..b9bd6654f317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1670,11 +1670,13 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, if (adev->enforce_isolation[i] && !partition_values[i]) { /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, false); + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) + amdgpu_mes_set_enforce_isolation(adev, i, false); } else if (!adev->enforce_isolation[i] && partition_values[i]) { /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, true); + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) + amdgpu_mes_set_enforce_isolation(adev, i, true); } adev->enforce_isolation[i] = partition_values[i]; } -- 2.51.0 From fe652becdbfccf265f4cea0eb379418d08c6596a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2025 16:20:15 -0500 Subject: [PATCH 12/16] drm/amdgpu/umsch: declare umsch firmware Needed to be properly picked up for the initrd, etc. Fixes: 3488c79beafa ("drm/amdgpu: add initial support for UMSCH") Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher Cc: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 2cfa2447d13e..78319988b054 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -32,6 +32,8 @@ #include "amdgpu_umsch_mm.h" #include "umsch_mm_v4_0.h" +MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin"); + int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) { struct amdgpu_ring *ring = &umsch->ring; -- 2.51.0 From 5183e69090f07585fa6c7ef71d4301bc16a15c76 Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Wed, 12 Feb 2025 14:26:00 -0500 Subject: [PATCH 13/16] drm/amdgpu: Remove extra checks for CPX As far as the number of XCCs, the number of compute partitions, and the number of memory partitions qualify, CPX is valid. Signed-off-by: Amber Lin Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index e157d6d857b6..2753f282e42d 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -559,8 +559,11 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, adev->gmc.num_mem_partitions == 4) && (num_xccs_per_xcp >= 2); case AMDGPU_CPX_PARTITION_MODE: + /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX. + * (num_xcc % adev->gmc.num_mem_partitions) == 0 because + * num_compute_partitions can't be less than num_mem_partitions + */ return ((num_xcc > 1) && - (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) && (num_xcc % adev->gmc.num_mem_partitions) == 0); default: return false; -- 2.51.0 From c917e39cbdcd9fff421184db6cc461cc58d52c17 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2025 16:31:43 -0500 Subject: [PATCH 14/16] drm/amdgpu/umsch: fix ucode check Return an error if the IP version doesn't match otherwise we end up passing a NULL string to amdgpu_ucode_request. We should never hit this in practice today since we only enable the umsch code on the supported IP versions, but add a check to be safe. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202502130406.iWQ0eBug-lkp@intel.com/ Fixes: 020620424b27 ("drm/amd: Use a constant format string for amdgpu_ucode_request") Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher Cc: Arnd Bergmann Cc: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 78319988b054..a7f2648245ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -129,7 +129,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: - break; + return -EINVAL; } r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED, -- 2.51.0 From 56763be4009f3be178e534f9b3c10594abec5b6e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Feb 2025 16:43:13 -0500 Subject: [PATCH 15/16] drm/amdgpu/umsch: tidy up the ucode name string handling Make the constant parts of the name part of the string we pass to amdgpu_ucode_request(). Only the version number varies from IP to IP. Reviewed-by: Saleemkhan Jamadar Signed-off-by: Alex Deucher Cc: Lang Yu --- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index a7f2648245ec..cd707d70a0bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -126,14 +126,14 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): case IP_VERSION(4, 0, 6): - fw_name = "amdgpu/umsch_mm_4_0_0.bin"; + fw_name = "4_0_0"; break; default: return -EINVAL; } r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED, - "%s", fw_name); + "amdgpu/umsch_mm_%s.bin", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; -- 2.51.0 From 0487f50310cfeec1bb4480a67294fc7081c5ed22 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 10 Feb 2025 17:45:14 -0500 Subject: [PATCH 16/16] drm/amdgpu/vcn5.0.1: use correct dpm helper The VCN and UVD helpers were split in commit ff69bba05f08 ("drm/amd/pm: add inst to dpm_set_powergating_by_smu") However, this happened in parallel to the vcn 5.0.1 development so it was missed there. Fixes: 346492f30ce3 ("drm/amdgpu: Add VCN_5_0_1 support") Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: Sonny Jiang Cc: Boyuan Zhang --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 8b463c977d08..8b0b3739a537 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -575,8 +575,10 @@ static int vcn_v5_0_1_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r, vcn_inst; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; @@ -816,8 +818,10 @@ static int vcn_v5_0_1_stop(struct amdgpu_device *adev) WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } -- 2.51.0