From 0880f58f9609f0200483a49429af0f050d281703 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 25 Oct 2024 15:56:39 +0100 Subject: [PATCH 01/16] drm/amd/pm: Vangogh: Fix kernel memory out of bounds write KASAN reports that the GPU metrics table allocated in vangogh_tables_init() is not large enough for the memset done in smu_cmn_init_soft_gpu_metrics(). Condensed report follows: [ 33.861314] BUG: KASAN: slab-out-of-bounds in smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu] [ 33.861799] Write of size 168 at addr ffff888129f59500 by task mangoapp/1067 ... [ 33.861808] CPU: 6 UID: 1000 PID: 1067 Comm: mangoapp Tainted: G W 6.12.0-rc4 #356 1a56f59a8b5182eeaf67eb7cb8b13594dd23b544 [ 33.861816] Tainted: [W]=WARN [ 33.861818] Hardware name: Valve Galileo/Galileo, BIOS F7G0107 12/01/2023 [ 33.861822] Call Trace: [ 33.861826] [ 33.861829] dump_stack_lvl+0x66/0x90 [ 33.861838] print_report+0xce/0x620 [ 33.861853] kasan_report+0xda/0x110 [ 33.862794] kasan_check_range+0xfd/0x1a0 [ 33.862799] __asan_memset+0x23/0x40 [ 33.862803] smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.863306] vangogh_get_gpu_metrics_v2_4+0x123/0xad0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.864257] vangogh_common_get_gpu_metrics+0xb0c/0xbc0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.865682] amdgpu_dpm_get_gpu_metrics+0xcc/0x110 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.866160] amdgpu_get_gpu_metrics+0x154/0x2d0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.867135] dev_attr_show+0x43/0xc0 [ 33.867147] sysfs_kf_seq_show+0x1f1/0x3b0 [ 33.867155] seq_read_iter+0x3f8/0x1140 [ 33.867173] vfs_read+0x76c/0xc50 [ 33.867198] ksys_read+0xfb/0x1d0 [ 33.867214] do_syscall_64+0x90/0x160 ... [ 33.867353] Allocated by task 378 on cpu 7 at 22.794876s: [ 33.867358] kasan_save_stack+0x33/0x50 [ 33.867364] kasan_save_track+0x17/0x60 [ 33.867367] __kasan_kmalloc+0x87/0x90 [ 33.867371] vangogh_init_smc_tables+0x3f9/0x840 [amdgpu] [ 33.867835] smu_sw_init+0xa32/0x1850 [amdgpu] [ 33.868299] amdgpu_device_init+0x467b/0x8d90 [amdgpu] [ 33.868733] amdgpu_driver_load_kms+0x19/0xf0 [amdgpu] [ 33.869167] amdgpu_pci_probe+0x2d6/0xcd0 [amdgpu] [ 33.869608] local_pci_probe+0xda/0x180 [ 33.869614] pci_device_probe+0x43f/0x6b0 Empirically we can confirm that the former allocates 152 bytes for the table, while the latter memsets the 168 large block. Root cause appears that when GPU metrics tables for v2_4 parts were added it was not considered to enlarge the table to fit. The fix in this patch is rather "brute force" and perhaps later should be done in a smarter way, by extracting and consolidating the part version to size logic to a common helper, instead of brute forcing the largest possible allocation. Nevertheless, for now this works and fixes the out of bounds write. v2: * Drop impossible v3_0 case. (Mario) Signed-off-by: Tvrtko Ursulin Fixes: 41cec40bc9ba ("drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire gpu_metrics") Cc: Mario Limonciello Cc: Evan Quan Cc: Wenyou Yang Cc: Alex Deucher Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241025145639.19124-1-tursulin@igalia.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index a333ab827f48..6c43724c01dd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -242,7 +242,9 @@ static int vangogh_tables_init(struct smu_context *smu) goto err0_out; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2)); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_3)); + smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_4)); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err1_out; -- 2.51.0 From 58a8c756fc4ca243fb5c070e1b9e0970f00757d9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 27 Sep 2024 16:05:21 +0800 Subject: [PATCH 02/16] drm/amdgpu: correct the S3 abort check condition In the normal S3 entry, the TOS cycle counter is not reset during BIOS execution the _S3 method, so it doesn't determine whether the _S3 method is executed exactly. Howerver, the PM core performs the S3 suspend will set the PM_SUSPEND_FLAG_FW_RESUME bit if all the devices suspend successfully. Therefore, drivers can check the pm_suspend_global_flags bit(1) to detect the S3 suspend abort event. Fixes: 6704dbf71928 ("drm/amdgpu: update suspend status for aborting from deeper suspend") Signed-off-by: Prike Liang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 93e44e7ee3fa..af739b13b6b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -578,16 +578,13 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) { - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. + * 1) Only reset on APU side, dGPU hasn't checked yet. + * 2) S3 suspend aborted in the normal S3 suspend or + * performing pm core test. */ if (adev->flags & AMD_IS_APU && adev->in_s3 && - sol_reg) { + !pm_resume_via_firmware()) { adev->suspend_complete = false; return true; } else { @@ -603,11 +600,17 @@ static int soc15_asic_reset(struct amdgpu_device *adev) * successfully. So now, temporarily enable it for the * S3 suspend abort case. */ - if (((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) && - !soc15_need_reset_on_resume(adev)) + + if ((adev->apu_flags & AMD_APU_IS_PICASSO || + !(adev->apu_flags & AMD_APU_IS_RAVEN)) && + soc15_need_reset_on_resume(adev)) + goto asic_reset; + + if ((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) return 0; +asic_reset: switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_PCI: dev_info(adev->dev, "PCI reset\n"); -- 2.51.0 From d5e3d8a2a6cb8b8c8678e60ae8067c18ffbc2da2 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 14 Oct 2024 15:25:35 +0800 Subject: [PATCH 03/16] drm/amdgpu: clean up the suspend_complete To check the status of S3 suspend completion, use the PM core pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, clean up the AMDGPU driver's private flag suspend_complete. Signed-off-by: Prike Liang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/soc21.c | 5 +++-- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3af5acff8518..607998d8b1d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1111,8 +1111,6 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; - /* indicate amdgpu suspension status */ - bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46f756513948..ff3ac30744dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2500,7 +2500,6 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2515,7 +2514,6 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 66947850d7e4..e9248a855ba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3288,8 +3288,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) * confirmed that the APU gfx10/gfx11 needn't such update. */ if (adev->flags & AMD_IS_APU && - adev->in_s3 && !adev->suspend_complete) { - DRM_INFO(" Will skip the CSB packet resubmit\n"); + adev->in_s3 && !pm_resume_via_firmware()) { + DRM_INFO("Will skip the CSB packet resubmit\n"); return 0; } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index af739b13b6b5..533b4b2b432d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) * performing pm core test. */ if (adev->flags & AMD_IS_APU && adev->in_s3 && - !pm_resume_via_firmware()) { - adev->suspend_complete = false; + !pm_resume_via_firmware()) return true; - } else { - adev->suspend_complete = true; + else return false; - } } static int soc15_asic_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 1c07ebdc0d1f..93fbb3354720 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -897,9 +897,10 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) /* Will reset for the following suspend abort cases. * 1) Only reset dGPU side. * 2) S3 suspend got aborted and TOS is active. + * As for dGPU suspend abort cases the SOL value + * will be kept as zero at this resume point. */ - if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && - !adev->suspend_complete) { + if (!(adev->flags & AMD_IS_APU) && adev->in_s3) { sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); msleep(100); sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); -- 2.51.0 From ea9d8863daa93f2bfd39ce820254a788b1fe0c1f Mon Sep 17 00:00:00 2001 From: Le Ma Date: Fri, 25 Oct 2024 17:43:57 +0800 Subject: [PATCH 04/16] drm/amdgpu: add generic func to check if ta fw is applicable Separated xgmi ta is required for specific APU, and driver needs parse the ta binary properly with aux xgmi ta packed. v2: make the check function more generic (Lijo) Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 34 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + 2 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index abd5e980c9c7..ae24df65a3df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3563,6 +3563,36 @@ out: return err; } +static bool is_ta_fw_applicable(struct psp_context *psp, + const struct psp_fw_bin_desc *desc) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t fw_version; + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: + /* for now, AUX TA only exists on 13.0.6 ta bin, + * from v20.00.0x.14 + */ + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 6)) { + fw_version = le32_to_cpu(desc->fw_version); + + if (adev->flags & AMD_IS_APU && + (fw_version & 0xff) >= 0x14) + return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX; + else + return desc->fw_type == TA_FW_TYPE_PSP_XGMI; + } + break; + default: + break; + } + + return true; +} + static int parse_ta_bin_descriptor(struct psp_context *psp, const struct psp_fw_bin_desc *desc, const struct ta_firmware_header_v2_0 *ta_hdr) @@ -3572,6 +3602,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, if (!psp || !desc || !ta_hdr) return -EINVAL; + if (!is_ta_fw_applicable(psp, desc)) + return 0; + ucode_start_addr = (uint8_t *)ta_hdr + le32_to_cpu(desc->offset_bytes) + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); @@ -3584,6 +3617,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, psp->asd_context.bin_desc.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version); psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes); psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4e23419b92d4..4150ec0aa10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -163,6 +163,7 @@ enum ta_fw_type { TA_FW_TYPE_PSP_DTM, TA_FW_TYPE_PSP_RAP, TA_FW_TYPE_PSP_SECUREDISPLAY, + TA_FW_TYPE_PSP_XGMI_AUX, TA_FW_TYPE_MAX_INDEX, }; -- 2.51.0 From 7daa0f6b2859201a851f4553bea755cec14acb41 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 25 Oct 2024 13:56:03 +0800 Subject: [PATCH 05/16] drm/amdgpu: optimize ACA log print - skip to print CE ACA log. - optimize ACA log print for MCA. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 18ee60378727..3ca03b5e0f91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -348,6 +348,24 @@ static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgp return ret; } +static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry) +{ + bool ret; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_CE: + ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]); + break; + case AMDGPU_MCA_ERROR_TYPE_UE: + default: + ret = true; + break; + } + + return ret; +} + static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_query_context *qctx) { @@ -373,7 +391,8 @@ static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mc amdgpu_mca_bank_set_add_entry(mca_set, &entry); - amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); + if (amdgpu_mca_bank_should_dump(adev, type, &entry)) + amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); } return 0; -- 2.51.0 From cb67ff6272eceb5fcb2fe3b74f0293fa0706841a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 22 Oct 2024 12:30:50 -0400 Subject: [PATCH 06/16] drm/amdkfd: flag per-queue reset support for gfx9 Flag KFD support for per-queue reset on GFX9 devices. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 ++ include/uapi/linux/kfd_sysfs.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3871591c9aec..9476e30d6baa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1998,6 +1998,8 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + + dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 5e8d28617efa..859b8e91d4d3 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -60,7 +60,8 @@ #define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 #define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000 -#define HSA_CAP_RESERVED 0x800f8000 +#define HSA_CAP_PER_QUEUE_RESET_SUPPORTED 0x80000000 +#define HSA_CAP_RESERVED 0x000f8000 /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f -- 2.51.0 From ecfe9b237687a55d596fff0650ccc8cc455edd3f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 09:13:21 -0400 Subject: [PATCH 07/16] drm/amdgpu/smu13: fix profile reporting The following 3 commits landed in parallel: commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave optimization") commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without fullscreen 3D") While everything is set correctly, this caused the profile to be reported incorrectly because both the powersave and fullscreen3d bits were set in the mask and when the driver prints the profile, it looks for the first bit set. Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3e2277abc754..8d25cc1f218f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2473,7 +2473,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask; + u32 workload_mask, selected_workload_mask; smu->power_profile_mode = input[size]; @@ -2540,7 +2540,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - workload_mask = 1 << workload_type; + selected_workload_mask = workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2560,7 +2560,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask, NULL); if (!ret) - smu->workload_mask = workload_mask; + smu->workload_mask = selected_workload_mask; return ret; } -- 2.51.0 From 8fe7cf58ff0e46769b86b3890d657c8996b86bc6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Oct 2024 10:51:10 -0400 Subject: [PATCH 08/16] drm/amdkfd: add an interface to query whether is KFD is active Add an interface to query whether KFD has any active queues. v2: fix build issues Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 9 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 25 ++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b545940e512b..24343c312480 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -890,6 +890,15 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) return kgd2kfd_start_sched(adev->kfd.dev, node_id); } +/* check if there are KFD queues active */ +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return false; + + return kgd2kfd_compute_active(adev->kfd.dev, node_id); +} + /* Config CGTT_SQ_CLK_CTRL */ int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 7e0a22072536..4b80ad860639 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -268,6 +268,7 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault @@ -431,6 +432,7 @@ int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -511,5 +513,10 @@ static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } + +static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + return false; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index fad1c8f2bc83..348925254bff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1392,6 +1392,13 @@ void kfd_dec_compute_active(struct kfd_node *node) WARN_ONCE(count < 0, "Compute profile ref. count error"); } +static bool kfd_compute_active(struct kfd_node *node) +{ + if (atomic_read(&node->kfd->compute_profile)) + return true; + return false; +} + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { /* @@ -1485,6 +1492,24 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return false; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return false; + } + + node = kfd->nodes[node_id]; + + return kfd_compute_active(node); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS -- 2.51.0 From 370e8fdbb09a4c60d355abd622a9be85428cf0b1 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 4 Oct 2024 23:56:42 +0100 Subject: [PATCH 09/16] drm/amd/display: Remove unused regamma functions calculate_user_regamma_coeff() and calculate_user_regamma_ramp() were added in 2018 in commit 55a01d4023ce ("drm/amd/display: Add user_regamma to color module") but never used. Remove them and their helpers. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 307 ------------------ .../amd/display/modules/color/color_gamma.h | 11 - 2 files changed, 318 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 3699e633801d..a71df052cf25 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1399,71 +1399,6 @@ static void scale_gamma_dx(struct pwl_float_data *pwl_rgb, pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); } -/* todo: all these scale_gamma functions are inherently the same but - * take different structures as params or different format for ramp - * values. We could probably implement it in a more generic fashion - */ -static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb, - const struct regamma_ramp *ramp, - struct dividers dividers) -{ - unsigned short max_driver = 0xFFFF; - unsigned short max_os = 0xFF00; - unsigned short scaler = max_os; - uint32_t i; - struct pwl_float_data *rgb = pwl_rgb; - struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1; - - i = 0; - do { - if (ramp->gamma[i] > max_os || - ramp->gamma[i + 256] > max_os || - ramp->gamma[i + 512] > max_os) { - scaler = max_driver; - break; - } - i++; - } while (i != GAMMA_RGB_256_ENTRIES); - - i = 0; - do { - rgb->r = dc_fixpt_from_fraction( - ramp->gamma[i], scaler); - rgb->g = dc_fixpt_from_fraction( - ramp->gamma[i + 256], scaler); - rgb->b = dc_fixpt_from_fraction( - ramp->gamma[i + 512], scaler); - - ++rgb; - ++i; - } while (i != GAMMA_RGB_256_ENTRIES); - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider1); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider1); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider1); - - ++rgb; - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider2); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider2); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider2); - - ++rgb; - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider3); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider3); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider3); -} - /* * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here * Input is evenly distributed in the output color space as specified in @@ -1663,106 +1598,6 @@ static bool calculate_interpolated_hardware_curve( return true; } -/* The "old" interpolation uses a complicated scheme to build an array of - * coefficients while also using an array of 0-255 normalized to 0-1 - * Then there's another loop using both of the above + new scaled user ramp - * and we concatenate them. It also searches for points of interpolation and - * uses enums for positions. - * - * This function uses a different approach: - * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255 - * To find index for hwX , we notice the following: - * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1 - * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT - * - * Once the index is known, combined Y is simply: - * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index) - * - * We should switch to this method in all cases, it's simpler and faster - * ToDo one day - for now this only applies to ADL regamma to avoid regression - * for regular use cases (sRGB and PQ) - */ -static void interpolate_user_regamma(uint32_t hw_points_num, - struct pwl_float_data *rgb_user, - bool apply_degamma, - struct dc_transfer_func_distributed_points *tf_pts) -{ - uint32_t i; - uint32_t color = 0; - int32_t index; - int32_t index_next; - struct fixed31_32 *tf_point; - struct fixed31_32 hw_x; - struct fixed31_32 norm_factor = - dc_fixpt_from_int(255); - struct fixed31_32 norm_x; - struct fixed31_32 index_f; - struct fixed31_32 lut1; - struct fixed31_32 lut2; - struct fixed31_32 delta_lut; - struct fixed31_32 delta_index; - const struct fixed31_32 one = dc_fixpt_from_int(1); - - i = 0; - /* fixed_pt library has problems handling too small values */ - while (i != 32) { - tf_pts->red[i] = dc_fixpt_zero; - tf_pts->green[i] = dc_fixpt_zero; - tf_pts->blue[i] = dc_fixpt_zero; - ++i; - } - while (i <= hw_points_num + 1) { - for (color = 0; color < 3; color++) { - if (color == 0) - tf_point = &tf_pts->red[i]; - else if (color == 1) - tf_point = &tf_pts->green[i]; - else - tf_point = &tf_pts->blue[i]; - - if (apply_degamma) { - if (color == 0) - hw_x = coordinates_x[i].regamma_y_red; - else if (color == 1) - hw_x = coordinates_x[i].regamma_y_green; - else - hw_x = coordinates_x[i].regamma_y_blue; - } else - hw_x = coordinates_x[i].x; - - if (dc_fixpt_le(one, hw_x)) - hw_x = one; - - norm_x = dc_fixpt_mul(norm_factor, hw_x); - index = dc_fixpt_floor(norm_x); - if (index < 0 || index > 255) - continue; - - index_f = dc_fixpt_from_int(index); - index_next = (index == 255) ? index : index + 1; - - if (color == 0) { - lut1 = rgb_user[index].r; - lut2 = rgb_user[index_next].r; - } else if (color == 1) { - lut1 = rgb_user[index].g; - lut2 = rgb_user[index_next].g; - } else { - lut1 = rgb_user[index].b; - lut2 = rgb_user[index_next].b; - } - - // we have everything now, so interpolate - delta_lut = dc_fixpt_sub(lut2, lut1); - delta_index = dc_fixpt_sub(norm_x, index_f); - - *tf_point = dc_fixpt_add(lut1, - dc_fixpt_mul(delta_index, delta_lut)); - } - ++i; - } -} - static void build_new_custom_resulted_curve( uint32_t hw_points_num, struct dc_transfer_func_distributed_points *tf_pts) @@ -1784,29 +1619,6 @@ static void build_new_custom_resulted_curve( } } -static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma, - uint32_t hw_points_num, struct calculate_buffer *cal_buffer) -{ - uint32_t i; - - struct gamma_coefficients coeff; - struct pwl_float_data_ex *rgb = rgb_regamma; - const struct hw_x_point *coord_x = coordinates_x; - - build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB); - - i = 0; - while (i != hw_points_num + 1) { - rgb->r = translate_from_linear_space_ex( - coord_x->x, &coeff, 0, cal_buffer); - rgb->g = rgb->r; - rgb->b = rgb->r; - ++coord_x; - ++rgb; - ++i; - } -} - static bool map_regamma_hw_to_x_user( const struct dc_gamma *ramp, struct pixel_gamma_point *coeff128, @@ -1855,125 +1667,6 @@ static bool map_regamma_hw_to_x_user( #define _EXTRA_POINTS 3 -bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp) -{ - struct gamma_coefficients coeff; - const struct hw_x_point *coord_x = coordinates_x; - uint32_t i = 0; - - do { - coeff.a0[i] = dc_fixpt_from_fraction( - regamma->coeff.A0[i], 10000000); - coeff.a1[i] = dc_fixpt_from_fraction( - regamma->coeff.A1[i], 1000); - coeff.a2[i] = dc_fixpt_from_fraction( - regamma->coeff.A2[i], 1000); - coeff.a3[i] = dc_fixpt_from_fraction( - regamma->coeff.A3[i], 1000); - coeff.user_gamma[i] = dc_fixpt_from_fraction( - regamma->coeff.gamma[i], 1000); - - ++i; - } while (i != 3); - - i = 0; - /* fixed_pt library has problems handling too small values */ - while (i != 32) { - output_tf->tf_pts.red[i] = dc_fixpt_zero; - output_tf->tf_pts.green[i] = dc_fixpt_zero; - output_tf->tf_pts.blue[i] = dc_fixpt_zero; - ++coord_x; - ++i; - } - while (i != MAX_HW_POINTS + 1) { - output_tf->tf_pts.red[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 0, cal_buffer); - output_tf->tf_pts.green[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 1, cal_buffer); - output_tf->tf_pts.blue[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 2, cal_buffer); - ++coord_x; - ++i; - } - - if (ramp && ramp->type == GAMMA_CS_TFM_1D) - apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts); - - // this function just clamps output to 0-1 - build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts); - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - - return true; -} - -bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp) -{ - struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; - struct dividers dividers; - - struct pwl_float_data *rgb_user = NULL; - struct pwl_float_data_ex *rgb_regamma = NULL; - bool ret = false; - - if (regamma == NULL) - return false; - - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - - rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS, - sizeof(*rgb_user), - GFP_KERNEL); - if (!rgb_user) - goto rgb_user_alloc_fail; - - rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; - - dividers.divider1 = dc_fixpt_from_fraction(3, 2); - dividers.divider2 = dc_fixpt_from_int(2); - dividers.divider3 = dc_fixpt_from_fraction(5, 2); - - scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers); - - if (regamma->flags.bits.applyDegamma == 1) { - apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer); - copy_rgb_regamma_to_coordinates_x(coordinates_x, - MAX_HW_POINTS, rgb_regamma); - } - - interpolate_user_regamma(MAX_HW_POINTS, rgb_user, - regamma->flags.bits.applyDegamma, tf_pts); - - // no custom HDR curves! - tf_pts->end_exponent = 0; - tf_pts->x_point_at_y1_red = 1; - tf_pts->x_point_at_y1_green = 1; - tf_pts->x_point_at_y1_blue = 1; - - if (ramp && ramp->type == GAMMA_CS_TFM_1D) - apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts); - - // this function just clamps output to 0-1 - build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts); - - ret = true; - - kfree(rgb_regamma); -rgb_regamma_alloc_fail: - kfree(rgb_user); -rgb_user_alloc_fail: - return ret; -} - bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *input_tf, const struct dc_gamma *ramp, bool map_user_ramp) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index ee5c466613de..97e55278940e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -115,15 +115,4 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); -bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp); - -bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp); - - #endif /* COLOR_MOD_COLOR_GAMMA_H_ */ -- 2.51.0 From 8b89acc0b2baecfe331f5336e7ff1fcc5a44b062 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 9 Oct 2024 01:33:34 +0100 Subject: [PATCH 10/16] drm/amd/display: Remove unused cm3_helper_translate_curve_to_degamma_hw_format cm3_helper_translate_curve_to_degamma_hw_format() since it was added in 2020's commit 03f54d7d3448 ("drm/amd/display: Add DCN3 DPP") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn30/dcn30_cm_common.c | 151 ------------------ .../display/dc/dwb/dcn30/dcn30_cm_common.h | 4 - 2 files changed, 155 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 1e1038fb04e8..0690c346f2c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -280,157 +280,6 @@ bool cm3_helper_translate_curve_to_hw_format( return true; } -#define NUM_DEGAMMA_REGIONS 12 - - -bool cm3_helper_translate_curve_to_degamma_hw_format( - const struct dc_transfer_func *output_tf, - struct pwl_params *lut_params) -{ - struct curve_points3 *corner_points; - struct pwl_result_data *rgb_resulted; - struct pwl_result_data *rgb; - struct pwl_result_data *rgb_plus_1; - - int32_t region_start, region_end; - int32_t i; - uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; - - if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) - return false; - - corner_points = lut_params->corner_points; - rgb_resulted = lut_params->rgb_resulted; - hw_points = 0; - - memset(lut_params, 0, sizeof(struct pwl_params)); - memset(seg_distr, 0, sizeof(seg_distr)); - - region_start = -NUM_DEGAMMA_REGIONS; - region_end = 0; - - - for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) - seg_distr[i] = -1; - /* 12 segments - * segments are from 2^-12 to 0 - */ - for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++) - seg_distr[i] = 4; - - for (k = 0; k < MAX_REGIONS_NUMBER; k++) { - if (seg_distr[k] != -1) - hw_points += (1 << seg_distr[k]); - } - - j = 0; - for (k = 0; k < (region_end - region_start); k++) { - increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); - start_index = (region_start + k + MAX_LOW_POINT) * - NUMBER_SW_SEGMENTS; - for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; - i += increment) { - if (j == hw_points - 1) - break; - if (i >= TRANSFER_FUNC_POINTS) - return false; - rgb_resulted[j].red = output_tf->tf_pts.red[i]; - rgb_resulted[j].green = output_tf->tf_pts.green[i]; - rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; - j++; - } - } - - /* last point */ - start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; - rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; - rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; - rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - - corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), - dc_fixpt_from_int(region_start)); - corner_points[0].green.x = corner_points[0].red.x; - corner_points[0].blue.x = corner_points[0].red.x; - corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), - dc_fixpt_from_int(region_end)); - corner_points[1].green.x = corner_points[1].red.x; - corner_points[1].blue.x = corner_points[1].red.x; - - corner_points[0].red.y = rgb_resulted[0].red; - corner_points[0].green.y = rgb_resulted[0].green; - corner_points[0].blue.y = rgb_resulted[0].blue; - - /* see comment above, m_arrPoints[1].y should be the Y value for the - * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) - */ - corner_points[1].red.y = rgb_resulted[hw_points - 1].red; - corner_points[1].green.y = rgb_resulted[hw_points - 1].green; - corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue; - corner_points[1].red.slope = dc_fixpt_zero; - corner_points[1].green.slope = dc_fixpt_zero; - corner_points[1].blue.slope = dc_fixpt_zero; - - if (output_tf->tf == TRANSFER_FUNCTION_PQ) { - /* for PQ, we want to have a straight line from last HW X point, - * and the slope to be such that we hit 1.0 at 10000 nits. - */ - const struct fixed31_32 end_value = - dc_fixpt_from_int(125); - - corner_points[1].red.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y), - dc_fixpt_sub(end_value, corner_points[1].red.x)); - corner_points[1].green.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y), - dc_fixpt_sub(end_value, corner_points[1].green.x)); - corner_points[1].blue.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y), - dc_fixpt_sub(end_value, corner_points[1].blue.x)); - } - - lut_params->hw_points_num = hw_points; - - k = 0; - for (i = 1; i < MAX_REGIONS_NUMBER; i++) { - if (seg_distr[k] != -1) { - lut_params->arr_curve_points[k].segments_num = - seg_distr[k]; - lut_params->arr_curve_points[i].offset = - lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); - } - k++; - } - - if (seg_distr[k] != -1) - lut_params->arr_curve_points[k].segments_num = seg_distr[k]; - - rgb = rgb_resulted; - rgb_plus_1 = rgb_resulted + 1; - - i = 1; - while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; - - rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); - - ++rgb_plus_1; - ++rgb; - ++i; - } - cm3_helper_convert_to_custom_float(rgb_resulted, - lut_params->corner_points, - hw_points, false); - - return true; -} - bool cm3_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, struct curve_points3 *corner_points, diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h index bd98b327a6c7..b86347c9b038 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h @@ -63,10 +63,6 @@ bool cm3_helper_translate_curve_to_hw_format( const struct dc_transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint); -bool cm3_helper_translate_curve_to_degamma_hw_format( - const struct dc_transfer_func *output_tf, - struct pwl_params *lut_params); - bool cm3_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, struct curve_points3 *corner_points, -- 2.51.0 From 5fd95dab6094ba0b851767fc460c2806eaafe8bd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 10 Oct 2024 21:51:54 +0100 Subject: [PATCH 11/16] drm/amd/display: Remove last parts of timing_trace Commit c2c2ce1e9623 ("drm/amd/display: Optimize passive update planes.") removed the last caller of context_timing_trace. Remove it. With that gone, no one is now looking at the 'timing_trace' flag, remove it and all the places that set it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_debug.c | 42 ------------------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - .../dc/resource/dcn10/dcn10_resource.c | 2 - .../dc/resource/dcn20/dcn20_resource.c | 1 - .../dc/resource/dcn201/dcn201_resource.c | 1 - .../dc/resource/dcn21/dcn21_resource.c | 1 - .../dc/resource/dcn30/dcn30_resource.c | 1 - .../dc/resource/dcn301/dcn301_resource.c | 1 - .../dc/resource/dcn302/dcn302_resource.c | 1 - .../dc/resource/dcn303/dcn303_resource.c | 1 - .../dc/resource/dcn31/dcn31_resource.c | 1 - .../dc/resource/dcn314/dcn314_resource.c | 1 - .../dc/resource/dcn315/dcn315_resource.c | 1 - .../dc/resource/dcn316/dcn316_resource.c | 1 - .../dc/resource/dcn32/dcn32_resource.c | 1 - .../dc/resource/dcn321/dcn321_resource.c | 1 - .../dc/resource/dcn35/dcn35_resource.c | 1 - .../dc/resource/dcn351/dcn351_resource.c | 1 - .../dc/resource/dcn401/dcn401_resource.c | 1 - .../amd/display/include/logger_interface.h | 4 -- 20 files changed, 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 801cdbc8117d..0bb25c537243 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -46,11 +46,6 @@ DC_LOG_IF_TRACE(__VA_ARGS__); \ } while (0) -#define TIMING_TRACE(...) do {\ - if (dc->debug.timing_trace) \ - DC_LOG_SYNC(__VA_ARGS__); \ -} while (0) - #define CLOCK_TRACE(...) do {\ if (dc->debug.clock_trace) \ DC_LOG_BANDWIDTH_CALCS(__VA_ARGS__); \ @@ -306,43 +301,6 @@ void post_surface_trace(struct dc *dc) } -void context_timing_trace( - struct dc *dc, - struct resource_context *res_ctx) -{ - int i; - int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0}; - struct crtc_position position; - unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; - DC_LOGGER_INIT(dc->ctx->logger); - - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - /* get_position() returns CRTC vertical/horizontal counter - * hence not applicable for underlay pipe - */ - if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) - continue; - - pipe_ctx->stream_res.tg->funcs->get_position(pipe_ctx->stream_res.tg, &position); - h_pos[i] = position.horizontal_count; - v_pos[i] = position.vertical_count; - } - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - - if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) - continue; - - TIMING_TRACE("OTG_%d H_tot:%d V_tot:%d H_pos:%d V_pos:%d\n", - pipe_ctx->stream_res.tg->inst, - pipe_ctx->stream->timing.h_total, - pipe_ctx->stream->timing.v_total, - h_pos[i], v_pos[i]); - } -} - void context_clock_trace( struct dc *dc, struct dc_state *context) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 72adbab589f5..76130bbf2fe4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -868,7 +868,6 @@ struct dc_debug_options { bool sanity_checks; bool max_disp_clk; bool surface_trace; - bool timing_trace; bool clock_trace; bool validation_trace; bool bandwidth_calcs_trace; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index 05d6d41ef9d3..4f1bd71b9ad9 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -533,7 +533,6 @@ static const struct dc_debug_options debug_defaults_drv = { .sanity_checks = true, .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, /* raven smu dones't allow 0 disp clk, @@ -563,7 +562,6 @@ static const struct dc_debug_options debug_defaults_drv = { static const struct dc_debug_options debug_defaults_diags = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = true, .clock_trace = true, .disable_stutter = true, .disable_pplib_clock_request = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 288189913e1e..189d0c85872e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -706,7 +706,6 @@ static const struct resource_caps res_cap_nv14 = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 15180ad71513..d3d67d366523 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -600,7 +600,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 14b28841657d..021ba8ac5c8c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -610,7 +610,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index baa4e2647dad..cd31e4f16c14 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -711,7 +711,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, //No DMCU on DCN30 .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index d8a7c2cf05de..a9816affd312 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -682,7 +682,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_dpp_power_gate = false, .disable_hubp_power_gate = false, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 40c20b04635a..02af8b8f4d27 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -81,7 +81,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index daf1b65fd088..7002a8dd358a 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -82,7 +82,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 36bb26182e11..f71a5b8286b2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -858,7 +858,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 58a5fbcf22bf..8aa10da68432 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -876,7 +876,6 @@ static const struct dc_debug_options debug_defaults_drv = { .replay_skip_crtc_disabled = true, .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_dpp_power_gate = false, .disable_hubp_power_gate = false, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 3acad708c31b..6c3295259a81 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -858,7 +858,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, /*hw not support it*/ .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index ce56f5d162c0..6edaaadcb173 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -853,7 +853,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, /*hw not support it*/ .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index aaaa888d112d..01d1a11d5545 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -689,7 +689,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 35acc13cb5a9..5cb74fd9cb7d 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -686,7 +686,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 795f2c71c70f..6cc2960b6104 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -712,7 +712,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 0b8dc2eff596..d87e2641cda1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -692,7 +692,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 306b4117e219..db93bac247c0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -685,7 +685,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h index 02c23b04d34b..058f882d5bdd 100644 --- a/drivers/gpu/drm/amd/display/include/logger_interface.h +++ b/drivers/gpu/drm/amd/display/include/logger_interface.h @@ -52,10 +52,6 @@ void update_surface_trace( void post_surface_trace(struct dc *dc); -void context_timing_trace( - struct dc *dc, - struct resource_context *res_ctx); - void context_clock_trace( struct dc *dc, struct dc_state *context); -- 2.51.0 From efe6a8774375ddcbdd46fb920be55cc2d0120836 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Oct 2024 11:58:34 -0400 Subject: [PATCH 12/16] drm/amdgpu: fix fairness in enforce isolation handling Make sure KFD gets a turn when serializing access to the GC IP. Currently non-KFD jobs can starve KFD if they submit often enough. This patch prevents that by stalling non-KFD if its time period has elapsed. v2: fix units v3: check enablement properly Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 53 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 607998d8b1d5..7645e498faa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,7 @@ #define MAX_GPU_INSTANCE 64 -#define GFX_SLICE_PERIOD msecs_to_jiffies(250) +#define GFX_SLICE_PERIOD_MS 250 struct amdgpu_gpu_instance { struct amdgpu_device *adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72..b8cc4b146bdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1752,7 +1752,7 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, if (adev->gfx.kfd_sch_req_count[idx] == 0 && adev->gfx.kfd_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { if (adev->gfx.kfd_sch_req_count[idx] == 0) { @@ -1807,8 +1807,9 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); } if (fences) { + /* we've already had our timeslice, so let's wrap this up */ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ if (adev->kfd.init_complete) { @@ -1821,6 +1822,51 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) mutex_unlock(&adev->enforce_isolation_mutex); } +static void +amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, + u32 idx) +{ + unsigned long cjiffies; + bool wait = false; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + /* set the initial values if nothing is set */ + if (!adev->gfx.enforce_isolation_jiffies[idx]) { + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + /* Make sure KFD gets a chance to run */ + if (amdgpu_amdkfd_compute_active(adev, idx)) { + cjiffies = jiffies; + if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { + cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; + if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { + /* if our time is up, let KGD work drain before scheduling more */ + wait = true; + /* reset the timer period */ + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } else { + /* set the timer period to what's left in our time slice */ + adev->gfx.enforce_isolation_time[idx] = + GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); + } + } else { + /* if jiffies wrap around we will just wait a little longer */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + } + } else { + /* if there is no KFD work, then set the full slice period */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); + + if (wait) + msleep(GFX_SLICE_PERIOD_MS); +} + void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -1837,6 +1883,9 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) if (idx >= MAX_XCP) return; + /* Don't submit more work until KFD has had some time */ + amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); + mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f710178a21bc..af9dbd760fee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -472,6 +472,8 @@ struct amdgpu_gfx { struct mutex kfd_sch_mutex; u64 kfd_sch_req_count[MAX_XCP]; bool kfd_sch_inactive[MAX_XCP]; + unsigned long enforce_isolation_jiffies[MAX_XCP]; + unsigned long enforce_isolation_time[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { -- 2.51.0 From 35984fd4a093ccb9e0bb82db4cac5c1bf2df7d93 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Oct 2024 14:13:58 -0400 Subject: [PATCH 13/16] drm/amdgpu: add ring reset messages Add messages to make it clear when a per ring reset happens. This is helpful for debugging and aligns with other reset methods. v2: add ring name in success/fail messages (Lijo) Reviewed-by: Lijo Lazar Reviewed-by: Kent Russell (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 102742f1faa2..cbae2fc7b94e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -137,6 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* attempt a per ring reset */ if (amdgpu_gpu_recovery && ring->funcs->reset) { + dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); /* stop the scheduler, but don't mess with the * bad job yet because if ring reset fails * we'll fall back to full GPU reset. @@ -150,8 +151,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_fence_driver_force_completion(ring); if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched); + dev_err(adev->dev, "Ring %s reset success\n", ring->sched.name); goto exit; } + dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); } if (amdgpu_device_should_recover_gpu(ring->adev)) { -- 2.51.0 From a1144da794adedb9447437c57d69add56494309d Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Wed, 30 Oct 2024 04:27:58 +0800 Subject: [PATCH 14/16] drm/amdgpu: Fix the memory allocation issue in amdgpu_discovery_get_nps_info() Fix two issues with memory allocation in amdgpu_discovery_get_nps_info() for mem_ranges: - Add a check for allocation failure to avoid dereferencing a null pointer. - As suggested by Christophe, use kvcalloc() for memory allocation, which checks for multiplication overflow. Additionally, assign the output parameters nps_type and range_cnt after the kvcalloc() call to prevent modifying the output parameters in case of an error return. Fixes: b194d21b9bcc ("drm/amdgpu: Use NPS ranges from discovery table") Suggested-by: Christophe JAILLET Reviewed-by: Lijo Lazar Signed-off-by: Li Huafei Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73f4d56c5de4..1040204ac8b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1795,11 +1795,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, switch (le16_to_cpu(nps_info->v1.header.version_major)) { case 1: + mem_ranges = kvcalloc(nps_info->v1.count, + sizeof(*mem_ranges), + GFP_KERNEL); + if (!mem_ranges) + return -ENOMEM; *nps_type = nps_info->v1.nps_type; *range_cnt = nps_info->v1.count; - mem_ranges = kvzalloc( - *range_cnt * sizeof(struct amdgpu_gmc_memrange), - GFP_KERNEL); for (i = 0; i < *range_cnt; i++) { mem_ranges[i].base_address = nps_info->v1.instance_info[i].base_address; -- 2.51.0 From f2863650384b32f1a511e338f102b819044ca930 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Thu, 24 Oct 2024 10:23:38 +0100 Subject: [PATCH 15/16] drm/amdgpu: make drm-memory-* report resident memory MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The old behavior reports the resident memory usage for this key and the documentation say so as well. However this was accidentally changed to include buffers that was evicted. Fixes: 04bdba46542c ("drm/amdgpu: Use drm_print_memory_stats helper from fdinfo") Signed-off-by: Yunxiang Li Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 00a4ab082459..8281dd45faaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -95,11 +96,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) /* Legacy amdgpu keys, alias to drm-resident-memory-: */ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].total/1024UL); + stats[TTM_PL_VRAM].drm.resident/1024UL); drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", - stats[TTM_PL_TT].total/1024UL); + stats[TTM_PL_TT].drm.resident/1024UL); drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", - stats[TTM_PL_SYSTEM].total/1024UL); + stats[TTM_PL_SYSTEM].drm.resident/1024UL); /* Amdgpu specific memory accounting keys: */ drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1e6a044e3143..d41686a8e5ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1224,7 +1224,6 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, /* DRM stats common fields: */ - stats[type].total += size; if (drm_gem_object_is_shared_for_memory_stats(obj)) stats[type].drm.shared += size; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7260349917ef..a5653f474f85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -142,7 +142,6 @@ struct amdgpu_bo_vm { struct amdgpu_mem_stats { struct drm_memory_stats drm; - uint64_t total; uint64_t visible; uint64_t evicted; uint64_t evicted_visible; -- 2.51.0 From fdee0872a29fe86e8450ab00838b9c0533388733 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Thu, 24 Oct 2024 10:23:39 +0100 Subject: [PATCH 16/16] drm/amdgpu: stop tracking visible memory stats MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Since on modern systems all of vram can be made visible anyways, to simplify the new implementation, drops tracking how much memory is visible for now. If this is really needed we can add it back on top of the new implementation, or just report all the BOs as visible. Signed-off-by: Yunxiang Li Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 6 ------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 ++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 10 ---------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 ++++++++++- 4 files changed, 12 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 8281dd45faaa..7a9573958d87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -103,16 +103,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) stats[TTM_PL_SYSTEM].drm.resident/1024UL); /* Amdgpu specific memory accounting keys: */ - drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].visible/1024UL); drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].evicted/1024UL); - drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].evicted_visible/1024UL); drm_printf(p, "amd-requested-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].requested/1024UL); - drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].requested_visible/1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats[TTM_PL_TT].requested/1024UL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d41686a8e5ec..0d3fb6b4212e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,6 +40,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" +#include "amdgpu_vm.h" /** * DOC: amdgpu_object @@ -1236,23 +1237,14 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, stats[type].drm.active += size; else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) stats[type].drm.purgeable += size; - - if (type == TTM_PL_VRAM && amdgpu_res_cpu_visible(adev, res)) - stats[type].visible += size; } /* amdgpu specific stats: */ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { stats[TTM_PL_VRAM].requested += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].requested_visible += size; - - if (type != TTM_PL_VRAM) { + if (type != TTM_PL_VRAM) stats[TTM_PL_VRAM].evicted += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].evicted_visible += size; - } } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { stats[TTM_PL_TT].requested += size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index a5653f474f85..be6769852ece 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -139,16 +139,6 @@ struct amdgpu_bo_vm { struct amdgpu_vm_bo_base entries[]; }; -struct amdgpu_mem_stats { - struct drm_memory_stats drm; - - uint64_t visible; - uint64_t evicted; - uint64_t evicted_visible; - uint64_t requested; - uint64_t requested_visible; -}; - static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) { return container_of(tbo, struct amdgpu_bo, tbo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c5b41e3ed14f..5d119ac26c4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -42,7 +42,6 @@ struct amdgpu_bo_va; struct amdgpu_job; struct amdgpu_bo_list_entry; struct amdgpu_bo_vm; -struct amdgpu_mem_stats; /* * GPUVM handling @@ -322,6 +321,16 @@ struct amdgpu_vm_fault_info { unsigned int vmhub; }; +struct amdgpu_mem_stats { + struct drm_memory_stats drm; + + /* buffers that requested this placement */ + uint64_t requested; + /* buffers that requested this placement + * but are currently evicted */ + uint64_t evicted; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; -- 2.51.0