From b02d6fd855633a3f34548612eb36a5bf5e89a4d0 Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Fri, 17 Jan 2025 18:56:53 +0800 Subject: [PATCH 01/16] drm/amd/pm: Update smu_v13_0_0 SRIOV VF flag in msg mapping table [Why] Under SRIOV VF, driver send a VF unsupportted smu message causing a failure. [How] Update smu_v13_0_0 message mapping table based on PMFW. Signed-off-by: Yifan Zha Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 0551a3311217..985355bf78b2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -126,7 +126,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DisableSmuFeaturesHigh, PPSMC_MSG_DisableSmuFeaturesHigh, 1), MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetRunningSmuFeaturesLow, 1), MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetRunningSmuFeaturesHigh, 1), - MSG_MAP(SetWorkloadMask, PPSMC_MSG_SetWorkloadMask, 1), + MSG_MAP(SetWorkloadMask, PPSMC_MSG_SetWorkloadMask, 0), MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0), MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), @@ -140,7 +140,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(ExitBaco, PPSMC_MSG_ExitBaco, 0), MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 1), MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), - MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 1), + MSG_MAP(SetHardMinByFreq, PPSMC_MSG_SetHardMinByFreq, 0), MSG_MAP(SetHardMaxByFreq, PPSMC_MSG_SetHardMaxByFreq, 0), MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), @@ -149,7 +149,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 0), MSG_MAP(PowerUpJpeg, PPSMC_MSG_PowerUpJpeg, 0), MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg, 0), - MSG_MAP(GetDcModeMaxDpmFreq, PPSMC_MSG_GetDcModeMaxDpmFreq, 1), + MSG_MAP(GetDcModeMaxDpmFreq, PPSMC_MSG_GetDcModeMaxDpmFreq, 0), MSG_MAP(OverridePcieParameters, PPSMC_MSG_OverridePcieParameters, 0), MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), -- 2.51.0 From b2d97a134c02ee14fcdc03b544ccb10449e853c3 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 21 Jan 2025 19:35:05 +0800 Subject: [PATCH 02/16] drm/amd/pm: Update metrics tbl struct for smu_v_13.0.6 Update metrics table struct name for smu_v_13.0.6 and keep it as version Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 6 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 56 +++++++++---------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index 274b3e1cc4fb..ff9f7d4e11df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -129,6 +129,7 @@ typedef enum { #define SMU_METRICS_TABLE_VERSION 0xF +// Unified metrics table for smu_v13_0_6 typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -241,8 +242,9 @@ typedef struct __attribute__((packed, aligned(4))) { //Total App Clock Counter uint64_t GfxclkBelowHostLimitAcc[8]; -} MetricsTableX_t; +} MetricsTableV0_t; +// Metrics table for smu_v13_0_6 APUS typedef struct __attribute__((packed, aligned(4))) { uint32_t AccumulationCounter; @@ -333,7 +335,7 @@ typedef struct __attribute__((packed, aligned(4))) { // VCN/JPEG ACTIVITY uint32_t VcnBusy[4]; uint32_t JpegBusy[32]; -} MetricsTableA_t; +} MetricsTableV1_t; #define SMU_VF_METRICS_TABLE_VERSION 0x5 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index fa11e30bff24..dbfab5d503b5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -273,7 +273,7 @@ struct PPTable_t { #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) #define GET_METRIC_FIELD(field, flag) ((flag) ?\ - (metrics_a->field) : (metrics_x->field)) + (metrics_v1->field) : (metrics_v0->field)) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -514,7 +514,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, - max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), + max(sizeof(MetricsTableV0_t), sizeof(MetricsTableV1_t)), PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -522,8 +522,8 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableX_t), - sizeof(MetricsTableA_t)), GFP_KERNEL); + smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableV0_t), + sizeof(MetricsTableV1_t)), GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; @@ -753,8 +753,8 @@ static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; - MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; + MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; + MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); @@ -1128,8 +1128,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, uint32_t *value) { struct smu_table_context *smu_table = &smu->smu_table; - MetricsTableX_t *metrics_x = (MetricsTableX_t *)smu_table->metrics_table; - MetricsTableA_t *metrics_a = (MetricsTableA_t *)smu_table->metrics_table; + MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; + MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); struct amdgpu_device *adev = smu->adev; int ret = 0; @@ -2479,21 +2479,21 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; - MetricsTableX_t *metrics_x; - MetricsTableA_t *metrics_a; + MetricsTableV0_t *metrics_v0; + MetricsTableV1_t *metrics_v1; struct amdgpu_xcp *xcp; u16 link_width_level; u32 inst_mask; bool per_inst; - metrics_x = kzalloc(max(sizeof(MetricsTableX_t), sizeof(MetricsTableA_t)), GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics_x, true); + metrics_v0 = kzalloc(max(sizeof(MetricsTableV0_t), sizeof(MetricsTableV1_t)), GFP_KERNEL); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, true); if (ret) { - kfree(metrics_x); + kfree(metrics_v0); return ret; } - metrics_a = (MetricsTableA_t *)metrics_x; + metrics_v1 = (MetricsTableV1_t *)metrics_v0; smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); @@ -2559,9 +2559,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table * for pf from registers */ if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { - gpu_metrics->pcie_link_width = metrics_x->PCIeLinkWidth; + gpu_metrics->pcie_link_width = metrics_v0->PCIeLinkWidth; gpu_metrics->pcie_link_speed = - pcie_gen_to_speed(metrics_x->PCIeLinkSpeed); + pcie_gen_to_speed(metrics_v0->PCIeLinkSpeed); } else if (!amdgpu_sriov_vf(adev)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); if (link_width_level > MAX_LINK_WIDTH) @@ -2574,22 +2574,22 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table } gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics_x->PcieBandwidthAcc[0]); + SMUQ10_ROUND(metrics_v0->PcieBandwidthAcc[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics_x->PcieBandwidth[0]); + SMUQ10_ROUND(metrics_v0->PcieBandwidth[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics_x->PCIeL0ToRecoveryCountAcc; + metrics_v0->PCIeL0ToRecoveryCountAcc; gpu_metrics->pcie_replay_count_acc = - metrics_x->PCIenReplayAAcc; + metrics_v0->PCIenReplayAAcc; gpu_metrics->pcie_replay_rover_count_acc = - metrics_x->PCIenReplayARolloverCountAcc; + metrics_v0->PCIenReplayARolloverCountAcc; gpu_metrics->pcie_nak_sent_count_acc = - metrics_x->PCIeNAKSentCountAcc; + metrics_v0->PCIeNAKSentCountAcc; gpu_metrics->pcie_nak_rcvd_count_acc = - metrics_x->PCIeNAKReceivedCountAcc; + metrics_v0->PCIeNAKReceivedCountAcc; if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) gpu_metrics->pcie_lc_perf_other_end_recovery = - metrics_x->PCIeOtherEndRecoveryAcc; + metrics_v0->PCIeOtherEndRecoveryAcc; } @@ -2639,14 +2639,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table for_each_inst(k, inst_mask) { inst = GET_INST(GC, k); gpu_metrics->xcp_stats[i].gfx_busy_inst[idx] = - SMUQ10_ROUND(metrics_x->GfxBusy[inst]); + SMUQ10_ROUND(metrics_v0->GfxBusy[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = - SMUQ10_ROUND(metrics_x->GfxBusyAcc[inst]); + SMUQ10_ROUND(metrics_v0->GfxBusyAcc[inst]); if (smu_v13_0_6_cap_supported( smu, SMU_CAP(HST_LIMIT_METRICS))) gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = - SMUQ10_ROUND(metrics_x->GfxclkBelowHostLimitAcc + SMUQ10_ROUND(metrics_v0->GfxclkBelowHostLimitAcc [inst]); idx++; } @@ -2659,7 +2659,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, flag); *table = (void *)gpu_metrics; - kfree(metrics_x); + kfree(metrics_v0); return sizeof(*gpu_metrics); } -- 2.51.0 From 00117e3eb11441f398ef01c16d349bbfea4bcc52 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 21 Jan 2025 20:00:18 +0800 Subject: [PATCH 03/16] drm/amd/pm: Add metrics table header for smu_v13_0_12 Add metrics table header for smu_v13_0_12 as metrics version V2 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h index ff9f7d4e11df..f8ed45857878 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h @@ -337,6 +337,115 @@ typedef struct __attribute__((packed, aligned(4))) { uint32_t JpegBusy[32]; } MetricsTableV1_t; +// Metrics table for smu_v13_0_12 +typedef struct __attribute__((packed, aligned(4))) { + uint64_t AccumulationCounter; + + //TEMPERATURE + uint32_t MaxSocketTemperature; + uint32_t MaxVrTemperature; + uint32_t MaxHbmTemperature; + uint64_t MaxSocketTemperatureAcc; + uint64_t MaxVrTemperatureAcc; + uint64_t MaxHbmTemperatureAcc; + + //POWER + uint32_t SocketPowerLimit; + uint32_t MaxSocketPowerLimit; + uint32_t SocketPower; + + //ENERGY + uint64_t Timestamp; + uint64_t SocketEnergyAcc; + uint64_t CcdEnergyAcc; + uint64_t XcdEnergyAcc; + uint64_t AidEnergyAcc; + uint64_t HbmEnergyAcc; + + //FREQUENCY + uint32_t GfxclkFrequencyLimit; + uint32_t FclkFrequency; + uint32_t UclkFrequency; + uint32_t SocclkFrequency[4]; + uint32_t VclkFrequency[4]; + uint32_t DclkFrequency[4]; + uint32_t LclkFrequency[4]; + uint64_t GfxclkFrequencyAcc[8]; + + //FREQUENCY RANGE + uint32_t MaxGfxclkFrequency; + uint32_t MinGfxclkFrequency; + uint32_t FclkFrequencyTable[4]; + uint32_t UclkFrequencyTable[4]; + uint32_t SocclkFrequencyTable[4]; + uint32_t VclkFrequencyTable[4]; + uint32_t DclkFrequencyTable[4]; + uint32_t LclkFrequencyTable[4]; + uint32_t MaxLclkDpmRange; + uint32_t MinLclkDpmRange; + + //XGMI + uint32_t XgmiWidth; + uint32_t XgmiBitrate; + uint64_t XgmiReadBandwidthAcc[8]; + uint64_t XgmiWriteBandwidthAcc[8]; + + //ACTIVITY + uint32_t SocketGfxBusy; + uint32_t DramBandwidthUtilization; + uint64_t SocketC0ResidencyAcc; + uint64_t SocketGfxBusyAcc; + uint64_t DramBandwidthAcc; + uint32_t MaxDramBandwidth; + uint64_t DramBandwidthUtilizationAcc; + uint64_t PcieBandwidthAcc[4]; + + //THROTTLERS + uint32_t ProchotResidencyAcc; + uint32_t PptResidencyAcc; + uint32_t SocketThmResidencyAcc; + uint32_t VrThmResidencyAcc; + uint32_t HbmThmResidencyAcc; + uint32_t GfxLockXCDMak; + + // New Items at end to maintain driver compatibility + uint32_t GfxclkFrequency[8]; + + //PSNs + uint64_t PublicSerialNumber_AID[4]; + uint64_t PublicSerialNumber_XCD[8]; + + //XGMI Data tranfser size + uint64_t XgmiReadDataSizeAcc[8];//in KByte + uint64_t XgmiWriteDataSizeAcc[8];//in KByte + + //PCIE BW Data and error count + uint32_t PcieBandwidth[4]; + uint32_t PCIeL0ToRecoveryCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIenReplayAAcc; // The Pcie counter itself is accumulated + uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated + uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated + + // VCN/JPEG ACTIVITY + uint32_t VcnBusy[4]; + uint32_t JpegBusy[32]; + + // PCIE LINK Speed and width + uint32_t PCIeLinkSpeed; + uint32_t PCIeLinkWidth; + + // PER XCD ACTIVITY + uint32_t GfxBusy[8]; + uint64_t GfxBusyAcc[8]; + + //PCIE BW Data and error count + uint32_t PCIeOtherEndRecoveryAcc; // The Pcie counter itself is accumulated + + //Total App Clock Counter + uint64_t GfxclkBelowHostLimitAcc[8]; +} MetricsTableV2_t; + #define SMU_VF_METRICS_TABLE_VERSION 0x5 typedef struct __attribute__((packed, aligned(4))) { -- 2.51.0 From ca7a75183b997420cf447f638962b8c92ba24676 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 21 Jan 2025 20:34:53 +0800 Subject: [PATCH 04/16] drm/amd/pm: Add SMUv13.0.12 PPT interface Add SMUv13.0.12 PPT interface to fetch dpm features Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h | 138 ++++++++++++++++++ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 + drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile | 2 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 101 +++++++++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +- 5 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h create mode 100644 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h new file mode 100644 index 000000000000..4a1256d29d62 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h @@ -0,0 +1,138 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef SMU_13_0_12_PMFW_H +#define SMU_13_0_12_PMFW_H + +#define NUM_VCLK_DPM_LEVELS 4 +#define NUM_DCLK_DPM_LEVELS 4 +#define NUM_SOCCLK_DPM_LEVELS 4 +#define NUM_LCLK_DPM_LEVELS 4 +#define NUM_UCLK_DPM_LEVELS 4 +#define NUM_FCLK_DPM_LEVELS 4 +#define NUM_XGMI_DPM_LEVELS 2 +#define NUM_CXL_BITRATES 4 +#define NUM_PCIE_BITRATES 4 +#define NUM_XGMI_BITRATES 4 +#define NUM_XGMI_WIDTHS 3 +#define NUM_TDP_GROUPS 4 +#define NUM_SOC_P2S_TABLES 6 +#define NUM_GFX_P2S_TABLES 8 +#define NUM_PSM_DIDT_THRESHOLDS 3 + +typedef enum { +/*0*/ FEATURE_DATA_CALCULATION = 0, +/*1*/ FEATURE_DPM_FCLK = 1, +/*2*/ FEATURE_DPM_GFXCLK = 2, +/*3*/ FEATURE_DPM_LCLK = 3, +/*4*/ FEATURE_DPM_SOCCLK = 4, +/*5*/ FEATURE_DPM_UCLK = 5, +/*6*/ FEATURE_DPM_VCN = 6, +/*7*/ FEATURE_DPM_XGMI = 7, +/*8*/ FEATURE_DS_FCLK = 8, +/*9*/ FEATURE_DS_GFXCLK = 9, +/*10*/ FEATURE_DS_LCLK = 10, +/*11*/ FEATURE_DS_MP0CLK = 11, +/*12*/ FEATURE_DS_MP1CLK = 12, +/*13*/ FEATURE_DS_MPIOCLK = 13, +/*14*/ FEATURE_DS_SOCCLK = 14, +/*15*/ FEATURE_DS_VCN = 15, +/*16*/ FEATURE_APCC_DFLL = 16, +/*17*/ FEATURE_APCC_PLUS = 17, +/*18*/ FEATURE_PPT = 18, +/*19*/ FEATURE_TDC = 19, +/*20*/ FEATURE_THERMAL = 20, +/*21*/ FEATURE_SOC_PCC = 21, +/*22*/ FEATURE_PROCHOT = 22, +/*23*/ FEATURE_FDD_AID_HBM = 23, +/*24*/ FEATURE_FDD_AID_SOC = 24, +/*25*/ FEATURE_FDD_XCD_EDC = 25, +/*26*/ FEATURE_FDD_XCD_XVMIN = 26, +/*27*/ FEATURE_FW_CTF = 27, +/*28*/ FEATURE_SMU_CG = 28, +/*29*/ FEATURE_PSI7 = 29, +/*30*/ FEATURE_XGMI_PER_LINK_PWR_DOWN = 30, +/*31*/ FEATURE_SOC_DC_RTC = 31, +/*32*/ FEATURE_GFX_DC_RTC = 32, +/*33*/ FEATURE_DVM_MIN_PSM = 33, +/*34*/ FEATURE_PRC = 34, +/*35*/ FEATURE_PSM_SQ_THROTTLER = 35, +/*36*/ FEATURE_PIT = 36, +/*37*/ FEATURE_DVO = 37, +/*38*/ FEATURE_XVMINORPSM_CLKSTOP_DS = 38, + +/*39*/ NUM_FEATURES = 39 +} FEATURE_LIST_e; + +//enum for MPIO PCIe gen speed msgs +typedef enum { + PCIE_LINK_SPEED_INDEX_TABLE_GEN1, + PCIE_LINK_SPEED_INDEX_TABLE_GEN2, + PCIE_LINK_SPEED_INDEX_TABLE_GEN3, + PCIE_LINK_SPEED_INDEX_TABLE_GEN4, + PCIE_LINK_SPEED_INDEX_TABLE_GEN4_ESM, + PCIE_LINK_SPEED_INDEX_TABLE_GEN5, + PCIE_LINK_SPEED_INDEX_TABLE_COUNT +} PCIE_LINK_SPEED_INDEX_TABLE_e; + +typedef enum { + GFX_GUARDBAND_OFFSET_0, + GFX_GUARDBAND_OFFSET_1, + GFX_GUARDBAND_OFFSET_2, + GFX_GUARDBAND_OFFSET_3, + GFX_GUARDBAND_OFFSET_4, + GFX_GUARDBAND_OFFSET_5, + GFX_GUARDBAND_OFFSET_6, + GFX_GUARDBAND_OFFSET_7, + GFX_GUARDBAND_OFFSET_COUNT +} GFX_GUARDBAND_OFFSET_e; + +typedef enum { + GFX_DVM_MARGINHI_0, + GFX_DVM_MARGINHI_1, + GFX_DVM_MARGINHI_2, + GFX_DVM_MARGINHI_3, + GFX_DVM_MARGINHI_4, + GFX_DVM_MARGINHI_5, + GFX_DVM_MARGINHI_6, + GFX_DVM_MARGINHI_7, + GFX_DVM_MARGINLO_0, + GFX_DVM_MARGINLO_1, + GFX_DVM_MARGINLO_2, + GFX_DVM_MARGINLO_3, + GFX_DVM_MARGINLO_4, + GFX_DVM_MARGINLO_5, + GFX_DVM_MARGINLO_6, + GFX_DVM_MARGINLO_7, + GFX_DVM_MARGIN_COUNT +} GFX_DVM_MARGIN_e; + +#define SMU_VF_METRICS_TABLE_VERSION 0x3 + +typedef struct __attribute__((packed, aligned(4))) { + uint32_t AccumulationCounter; + uint32_t InstGfxclk_TargFreq; + uint64_t AccGfxclk_TargFreq; + uint64_t AccGfxRsmuDpm_Busy; +} VfMetricsTable_t; + +#endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 8d4a96e23326..31166974746f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -306,5 +306,7 @@ int smu_v13_0_get_boot_freq_by_index(struct smu_context *smu, uint32_t *value); void smu_v13_0_interrupt_work(struct smu_context *smu); +bool smu_v13_0_12_is_dpm_running(struct smu_context *smu); +extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[]; #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile index 7f3493b6c53c..51f1fa9789ab 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/Makefile @@ -24,7 +24,7 @@ # It provides the smu management services for the driver. SMU13_MGR = smu_v13_0.o aldebaran_ppt.o yellow_carp_ppt.o smu_v13_0_0_ppt.o smu_v13_0_4_ppt.o \ - smu_v13_0_5_ppt.o smu_v13_0_7_ppt.o smu_v13_0_6_ppt.o + smu_v13_0_5_ppt.o smu_v13_0_7_ppt.o smu_v13_0_6_ppt.o smu_v13_0_12_ppt.o AMD_SWSMU_SMU13MGR = $(addprefix $(AMD_SWSMU_PATH)/smu13/,$(SMU13_MGR)) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c new file mode 100644 index 000000000000..86852e738837 --- /dev/null +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -0,0 +1,101 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#define SWSMU_CODE_LAYER_L2 + +#include +#include "amdgpu.h" +#include "amdgpu_smu.h" +#include "smu_v13_0_12_pmfw.h" +#include "smu_v13_0_6_ppt.h" +#include "smu_v13_0.h" +#include "amdgpu_xgmi.h" +#include +#include "smu_cmn.h" + +#undef MP1_Public +#undef smnMP1_FIRMWARE_FLAGS + +/* + * DO NOT use these for err/warn/info/debug messages. + * Use dev_err, dev_warn, dev_info and dev_dbg instead. + * They are more MGPU friendly. + */ +#undef pr_err +#undef pr_warn +#undef pr_info +#undef pr_debug + +#define SMU_13_0_12_FEA_MAP(smu_feature, smu_13_0_12_feature) \ + [smu_feature] = { 1, (smu_13_0_12_feature) } + +#define FEATURE_MASK(feature) (1ULL << feature) +#define SMC_DPM_FEATURE \ + (FEATURE_MASK(FEATURE_DATA_CALCULATION) | \ + FEATURE_MASK(FEATURE_DPM_GFXCLK) | FEATURE_MASK(FEATURE_DPM_FCLK)) + +const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] = { + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATION), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_TDC_BIT, FEATURE_TDC), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_APCC_DFLL_BIT, FEATURE_APCC_DFLL), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_MP1_CG_BIT, FEATURE_SMU_CG), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_SOC_PCC_BIT, FEATURE_SOC_PCC), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), +}; + +static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu, + uint64_t *feature_mask) +{ + int ret; + + ret = smu_cmn_get_enabled_mask(smu, feature_mask); + + if (ret == -EIO) { + *feature_mask = 0; + ret = 0; + } + + return ret; +} + +bool smu_v13_0_12_is_dpm_running(struct smu_context *smu) +{ + int ret; + uint64_t feature_enabled; + + ret = smu_v13_0_12_get_enabled_mask(smu, &feature_enabled); + + if (ret) + return false; + + return !!(feature_enabled & SMC_DPM_FEATURE); +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index dbfab5d503b5..d4a159bb5a65 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2193,6 +2193,9 @@ static bool smu_v13_0_6_is_dpm_running(struct smu_context *smu) int ret; uint64_t feature_enabled; + if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) + return smu_v13_0_12_is_dpm_running(smu); + ret = smu_v13_0_6_get_enabled_mask(smu, &feature_enabled); if (ret) @@ -3564,7 +3567,8 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) smu->ppt_funcs = &smu_v13_0_6_ppt_funcs; smu->message_map = smu_v13_0_6_message_map; smu->clock_map = smu_v13_0_6_clk_map; - smu->feature_map = smu_v13_0_6_feature_mask_map; + smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? + smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; smu->table_map = smu_v13_0_6_table_map; smu->smc_driver_if_version = SMU13_0_6_DRIVER_IF_VERSION; smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; -- 2.51.0 From 7485c30809edc56d407e09f72373317b158088a8 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 21 Jan 2025 22:23:21 +0800 Subject: [PATCH 05/16] drm/amd/pm: Add metrics support for smuv13.0.12 Add metrics table support for smuv13.0.12 to fetch data from metrics version v2 v2: Update get metric field and get metric size macro (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 186 ++++++++++-------- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h | 8 + 2 files changed, 113 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index d4a159bb5a65..72c18b1635c0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -105,7 +105,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); enum smu_v13_0_6_caps { SMU_CAP(DPM), - SMU_CAP(UNI_METRICS), SMU_CAP(DPM_POLICY), SMU_CAP(OTHER_END_METRICS), SMU_CAP(SET_UCLK_MAX), @@ -272,8 +271,13 @@ struct PPTable_t { #define SMUQ10_TO_UINT(x) ((x) >> 10) #define SMUQ10_FRAC(x) ((x) & 0x3ff) #define SMUQ10_ROUND(x) ((SMUQ10_TO_UINT(x)) + ((SMUQ10_FRAC(x)) >= 0x200)) -#define GET_METRIC_FIELD(field, flag) ((flag) ?\ - (metrics_v1->field) : (metrics_v0->field)) +#define GET_GPU_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V0) ?\ + (metrics_v0->field) : (metrics_v2->field)) +#define GET_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V1) ?\ + (metrics_v1->field) : GET_GPU_METRIC_FIELD(field, version)) +#define METRICS_TABLE_SIZE (max3(sizeof(MetricsTableV0_t),\ + sizeof(MetricsTableV1_t),\ + sizeof(MetricsTableV2_t))) struct smu_v13_0_6_dpm_map { enum smu_clk_type clk_type; @@ -282,6 +286,18 @@ struct smu_v13_0_6_dpm_map { uint32_t *freq_table; }; +static inline int smu_v13_0_6_get_metrics_version(struct smu_context *smu) +{ + if ((smu->adev->flags & AMD_IS_APU) && + smu->smc_fw_version <= 0x4556900) + return METRICS_VERSION_V1; + else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 12)) + return METRICS_VERSION_V2; + + return METRICS_VERSION_V0; +} + static inline void smu_v13_0_6_cap_set(struct smu_context *smu, enum smu_v13_0_6_caps cap) { @@ -309,7 +325,6 @@ static inline bool smu_v13_0_6_cap_supported(struct smu_context *smu, static void smu_v13_0_14_init_caps(struct smu_context *smu) { enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), - SMU_CAP(UNI_METRICS), SMU_CAP(SET_UCLK_MAX), SMU_CAP(DPM_POLICY), SMU_CAP(PCIE_METRICS), @@ -335,12 +350,14 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu) static void smu_v13_0_12_init_caps(struct smu_context *smu) { enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), - SMU_CAP(UNI_METRICS), SMU_CAP(PCIE_METRICS), SMU_CAP(CTF_LIMIT), SMU_CAP(MCA_DEBUG_MODE), SMU_CAP(RMA_MSG), - SMU_CAP(ACA_SYND) }; + SMU_CAP(ACA_SYND), + SMU_CAP(OTHER_END_METRICS), + SMU_CAP(HST_LIMIT_METRICS), + SMU_CAP(PER_INST_METRICS) }; uint32_t fw_ver = smu->smc_fw_version; for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) @@ -356,7 +373,6 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu) static void smu_v13_0_6_init_caps(struct smu_context *smu) { enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), - SMU_CAP(UNI_METRICS), SMU_CAP(SET_UCLK_MAX), SMU_CAP(DPM_POLICY), SMU_CAP(PCIE_METRICS), @@ -382,8 +398,6 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu) smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); - if (fw_ver <= 0x4556900) - smu_v13_0_6_cap_clear(smu, SMU_CAP(UNI_METRICS)); if (fw_ver >= 0x04556F00) smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); if (fw_ver >= 0x04556A00) @@ -514,7 +528,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, - max(sizeof(MetricsTableV0_t), sizeof(MetricsTableV1_t)), + METRICS_TABLE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); @@ -522,8 +536,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); - smu_table->metrics_table = kzalloc(max(sizeof(MetricsTableV0_t), - sizeof(MetricsTableV1_t)), GFP_KERNEL); + smu_table->metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); if (!smu_table->metrics_table) return -ENOMEM; smu_table->metrics_time = 0; @@ -755,9 +768,10 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) struct smu_table_context *smu_table = &smu->smu_table; MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; + MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); + int version = smu_v13_0_6_get_metrics_version(smu); int ret, i, retry = 100; uint32_t table_version; @@ -769,7 +783,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) return ret; /* Ensure that metrics have been updated */ - if (GET_METRIC_FIELD(AccumulationCounter, flag)) + if (GET_METRIC_FIELD(AccumulationCounter, version)) break; usleep_range(1000, 1100); @@ -786,29 +800,30 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) table_version; pptable->MaxSocketPowerLimit = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, version)); pptable->MaxGfxclkFrequency = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, version)); pptable->MinGfxclkFrequency = - SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, version)); for (i = 0; i < 4; ++i) { pptable->FclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, version)[i]); pptable->UclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, version)[i]); pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( - GET_METRIC_FIELD(SocclkFrequencyTable, flag)[i]); + GET_METRIC_FIELD(SocclkFrequencyTable, version)[i]); pptable->VclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, version)[i]); pptable->DclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, version)[i]); pptable->LclkFrequencyTable[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, version)[i]); } /* use AID0 serial number by default */ - pptable->PublicSerialNumber_AID = GET_METRIC_FIELD(PublicSerialNumber_AID, flag)[0]; + pptable->PublicSerialNumber_AID = + GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; pptable->Init = true; } @@ -1130,7 +1145,8 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; - bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); + MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; + int version = smu_v13_0_6_get_metrics_version(smu); struct amdgpu_device *adev = smu->adev; int ret = 0; int xcc_id; @@ -1145,50 +1161,50 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_GFXCLK: if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { xcc_id = GET_INST(GC, 0); - *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); } else { *value = 0; } break; case METRICS_CURR_SOCCLK: case METRICS_AVERAGE_SOCCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, flag)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[0]); break; case METRICS_CURR_UCLK: case METRICS_AVERAGE_UCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, flag)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); break; case METRICS_CURR_VCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, flag)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, version)[0]); break; case METRICS_CURR_DCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, flag)[0]); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, version)[0]); break; case METRICS_CURR_FCLK: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, flag)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, version)); break; case METRICS_AVERAGE_GFXACTIVITY: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, flag)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); break; case METRICS_AVERAGE_MEMACTIVITY: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); + *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); break; case METRICS_CURR_SOCKETPOWER: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)) << 8; + *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)) << 8; break; case METRICS_TEMPERATURE_HOTSPOT: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; case METRICS_TEMPERATURE_MEM: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, flag)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; /* This is the max of all VRs and not just SOC VR. * No need to define another data type for the same. */ case METRICS_TEMPERATURE_VRSOC: - *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, flag)) * + *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; break; default: @@ -2479,17 +2495,18 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table struct smu_table_context *smu_table = &smu->smu_table; struct gpu_metrics_v1_7 *gpu_metrics = (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; - bool flag = !smu_v13_0_6_cap_supported(smu, SMU_CAP(UNI_METRICS)); + int version = smu_v13_0_6_get_metrics_version(smu); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; MetricsTableV0_t *metrics_v0; MetricsTableV1_t *metrics_v1; + MetricsTableV2_t *metrics_v2; struct amdgpu_xcp *xcp; u16 link_width_level; u32 inst_mask; bool per_inst; - metrics_v0 = kzalloc(max(sizeof(MetricsTableV0_t), sizeof(MetricsTableV1_t)), GFP_KERNEL); + metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, true); if (ret) { kfree(metrics_v0); @@ -2497,64 +2514,69 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table } metrics_v1 = (MetricsTableV1_t *)metrics_v0; + metrics_v2 = (MetricsTableV2_t *)metrics_v0; smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); gpu_metrics->temperature_hotspot = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)); /* Individual HBM stack temperature is not reported */ gpu_metrics->temperature_mem = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)); /* Reports max temperature of all voltage rails */ gpu_metrics->temperature_vrsoc = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)); gpu_metrics->average_gfx_activity = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); gpu_metrics->average_umc_activity = - SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); gpu_metrics->mem_max_bandwidth = - SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, version)); gpu_metrics->curr_socket_power = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)); /* Energy counter reported in 15.259uJ (2^-16) units */ - gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, flag); + gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, version); for (i = 0; i < MAX_GFX_CLKS; i++) { xcc_id = GET_INST(GC, i); if (xcc_id >= 0) gpu_metrics->current_gfxclk[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, flag)[xcc_id]); + SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); if (i < MAX_CLKS) { gpu_metrics->current_socclk[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[i]); inst = GET_INST(VCN, i); if (inst >= 0) { gpu_metrics->current_vclk0[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, flag)[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, + version)[inst]); gpu_metrics->current_dclk0[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, flag)[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, + version)[inst]); } } } - gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, flag)); + gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); /* Total accumulated cycle counter */ - gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, flag); + gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version); /* Accumulated throttler residencies */ - gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, flag); - gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, flag); - gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, flag); - gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, flag); - gpu_metrics->hbm_thm_residency_acc = GET_METRIC_FIELD(HbmThmResidencyAcc, flag); + gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, version); + gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, version); + gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, version); + gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, version); + gpu_metrics->hbm_thm_residency_acc = + GET_METRIC_FIELD(HbmThmResidencyAcc, version); /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ - gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, flag) >> GET_INST(GC, 0); + gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, + version) >> GET_INST(GC, 0); if (!(adev->flags & AMD_IS_APU)) { /*Check smu version, PCIE link speed and width will be reported from pmfw metric @@ -2562,9 +2584,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table * for pf from registers */ if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { - gpu_metrics->pcie_link_width = metrics_v0->PCIeLinkWidth; + gpu_metrics->pcie_link_width = GET_GPU_METRIC_FIELD(PCIeLinkWidth, version); gpu_metrics->pcie_link_speed = - pcie_gen_to_speed(metrics_v0->PCIeLinkSpeed); + pcie_gen_to_speed(GET_GPU_METRIC_FIELD(PCIeLinkSpeed, version)); } else if (!amdgpu_sriov_vf(adev)) { link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); if (link_width_level > MAX_LINK_WIDTH) @@ -2577,37 +2599,37 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table } gpu_metrics->pcie_bandwidth_acc = - SMUQ10_ROUND(metrics_v0->PcieBandwidthAcc[0]); + SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidthAcc, version)[0]); gpu_metrics->pcie_bandwidth_inst = - SMUQ10_ROUND(metrics_v0->PcieBandwidth[0]); + SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidth, version)[0]); gpu_metrics->pcie_l0_to_recov_count_acc = - metrics_v0->PCIeL0ToRecoveryCountAcc; + GET_GPU_METRIC_FIELD(PCIeL0ToRecoveryCountAcc, version); gpu_metrics->pcie_replay_count_acc = - metrics_v0->PCIenReplayAAcc; + GET_GPU_METRIC_FIELD(PCIenReplayAAcc, version); gpu_metrics->pcie_replay_rover_count_acc = - metrics_v0->PCIenReplayARolloverCountAcc; + GET_GPU_METRIC_FIELD(PCIenReplayARolloverCountAcc, version); gpu_metrics->pcie_nak_sent_count_acc = - metrics_v0->PCIeNAKSentCountAcc; + GET_GPU_METRIC_FIELD(PCIeNAKSentCountAcc, version); gpu_metrics->pcie_nak_rcvd_count_acc = - metrics_v0->PCIeNAKReceivedCountAcc; + GET_GPU_METRIC_FIELD(PCIeNAKReceivedCountAcc, version); if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) gpu_metrics->pcie_lc_perf_other_end_recovery = - metrics_v0->PCIeOtherEndRecoveryAcc; + GET_GPU_METRIC_FIELD(PCIeOtherEndRecoveryAcc, version); } gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); gpu_metrics->gfx_activity_acc = - SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, version)); gpu_metrics->mem_activity_acc = - SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, flag)); + SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, version)); for (i = 0; i < NUM_XGMI_LINKS; i++) { gpu_metrics->xgmi_read_data_acc[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, version)[i]); gpu_metrics->xgmi_write_data_acc[i] = - SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]); + SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, version)[i]); ret = amdgpu_get_xgmi_link_status(adev, i); if (ret >= 0) gpu_metrics->xgmi_link_status[i] = ret; @@ -2627,11 +2649,11 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { gpu_metrics->xcp_stats[i].jpeg_busy [(idx * adev->jpeg.num_jpeg_rings) + j] = - SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy, flag) + SMUQ10_ROUND(GET_METRIC_FIELD(JpegBusy, version) [(inst * adev->jpeg.num_jpeg_rings) + j]); } gpu_metrics->xcp_stats[i].vcn_busy[idx] = - SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, flag)[inst]); + SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]); idx++; } @@ -2642,24 +2664,26 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table for_each_inst(k, inst_mask) { inst = GET_INST(GC, k); gpu_metrics->xcp_stats[i].gfx_busy_inst[idx] = - SMUQ10_ROUND(metrics_v0->GfxBusy[inst]); + SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]); gpu_metrics->xcp_stats[i].gfx_busy_acc[idx] = - SMUQ10_ROUND(metrics_v0->GfxBusyAcc[inst]); + SMUQ10_ROUND(GET_GPU_METRIC_FIELD(GfxBusyAcc, + version)[inst]); if (smu_v13_0_6_cap_supported( smu, SMU_CAP(HST_LIMIT_METRICS))) gpu_metrics->xcp_stats[i].gfx_below_host_limit_acc[idx] = - SMUQ10_ROUND(metrics_v0->GfxclkBelowHostLimitAcc + SMUQ10_ROUND(GET_GPU_METRIC_FIELD + (GfxclkBelowHostLimitAcc, version) [inst]); idx++; } } } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, flag)); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate, flag)); + gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, version)); + gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate, version)); - gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, flag); + gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); *table = (void *)gpu_metrics; kfree(metrics_v0); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h index f0fa42a645c0..717fe669882e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h @@ -27,6 +27,14 @@ #define SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL 0x4 #define SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL 0x2 +typedef enum { +/*0*/ METRICS_VERSION_V0 = 0, +/*1*/ METRICS_VERSION_V1 = 1, +/*2*/ METRICS_VERSION_V2 = 2, + +/*3*/ NUM_METRICS = 3 +} METRICS_LIST_e; + extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu); #endif -- 2.51.0 From b9755229ea8ff889289e03bd405bcaf2e3e1edf0 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 21 Jan 2025 22:40:28 +0800 Subject: [PATCH 06/16] drm/amd/pm: Skip showing MCLK_OD level Skip showing MCLK_OD level if setting UCLK MAX is not supported Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 72c18b1635c0..cf313da88607 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -1368,6 +1368,9 @@ static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, break; case SMU_OD_MCLK: + if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) + return 0; + size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", pstate_table->uclk_pstate.curr.min, -- 2.51.0 From c003b5ccaf625c4e8077a0e7a8a1d9e6e403d603 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Fri, 17 Jan 2025 17:08:33 +0800 Subject: [PATCH 07/16] drm/amd/pm: Update pm attr for gc_9_5_0 Update power management & clk attributes for gc_v_9_5_0 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e8ae7681bf0a..0aca0803514e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2006,9 +2006,10 @@ static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdg return 0; } - /* Enable pp_od_clk_voltage node for gc 9.4.3 SRIOV/BM support */ + /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0 SRIOV/BM support */ if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) { + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) { if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -2087,7 +2088,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2109,7 +2111,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2120,7 +2123,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { if (gc_ver == IP_VERSION(9, 4, 2) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) *states = ATTR_STATE_UNSUPPORTED; } @@ -2416,6 +2420,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -3530,7 +3535,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* Skip crit temp on APU */ if ((((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ)) || - (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4))) && + (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) return 0; @@ -3605,7 +3611,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ adev->family == AMDGPU_FAMILY_KV || /* not implemented yet */ (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) && + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_in0_input.dev_attr.attr || attr == &sensor_dev_attr_in0_label.dev_attr.attr)) return 0; @@ -3613,7 +3620,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* only APUs other than gc 9,4,3 have vddnb */ if ((!(adev->flags & AMD_IS_APU) || (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) && + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; @@ -3636,7 +3644,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* hotspot temperature for gc 9,4,3*/ if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) { + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) { if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp1_label.dev_attr.attr) -- 2.51.0 From 16b85a0942c0b0f1611bcaa42cc98f020e34b1cf Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 22 Jan 2025 19:34:33 +0800 Subject: [PATCH 08/16] drm/amdgpu: Update usage for bad page threshold The driver's behavior varies based on the configuration of amdgpu_bad_page_threshold setting Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 40 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 41 +++++++++++-------- 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e789f6790a1c..f52f674477eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -964,7 +964,7 @@ module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); * result in the GPU entering bad status when the number of total * faulty pages by ECC exceeds the threshold value. */ -MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)"); +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e4..90394f89aba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3080,31 +3080,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); /* - * Justification of value bad_page_cnt_threshold in ras structure - * - * Generally, 0 <= amdgpu_bad_page_threshold <= max record length - * in eeprom or amdgpu_bad_page_threshold == -2, introduce two - * scenarios accordingly. - * - * Bad page retirement enablement: - * - If amdgpu_bad_page_threshold = -2, - * bad_page_cnt_threshold = typical value by formula. - * - * - When the value from user is 0 < amdgpu_bad_page_threshold < - * max record length in eeprom, use it directly. - * - * Bad page retirement disablement: - * - If amdgpu_bad_page_threshold = 0, bad page retirement - * functionality is disabled, and bad_page_cnt_threshold will - * take no effect. + * amdgpu_bad_page_threshold is used to config + * the threshold for the number of bad pages. + * -1: Threshold is set to default value + * Driver will issue a warning message when threshold is reached + * and continue runtime services. + * 0: Disable bad page retirement + * Driver will not retire bad pages + * which is intended for debugging purpose. + * -2: Threshold is determined by a formula + * that assumes 1 bad page per 100M of local memory. + * Driver will continue runtime services when threhold is reached. + * 0 < threshold < max number of bad page records in EEPROM, + * A user-defined threshold is set + * Driver will halt runtime services when this custom threshold is reached. */ - - if (amdgpu_bad_page_threshold < 0) { + if (amdgpu_bad_page_threshold == -2) { u64 val = adev->gmc.mc_vram_size; do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), max_count); + } else if (amdgpu_bad_page_threshold == -1) { + con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4; } else { con->bad_page_cnt_threshold = min_t(int, max_count, amdgpu_bad_page_threshold); @@ -3848,8 +3846,10 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): + con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; + break; case IP_VERSION(13, 0, 14): - con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; + con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 82db986c36a0..cc4586581dba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -65,7 +65,7 @@ struct amdgpu_iv_entry; /* Reserve 8 physical dram row for possible retirement. * In worst cases, it will lose 8 * 2MB memory in vram domain */ -#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20) +#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20) /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 52c16bfeccaa..723c655bb4d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -558,16 +558,17 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) return false; if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { - if (amdgpu_bad_page_threshold == -1) { + if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", - con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); + con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { dev_warn(adev->dev, - "But GPU can be operated due to bad_page_threshold = -1.\n"); + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); return false; } else { - dev_warn(adev->dev, "This GPU is in BAD status."); - dev_warn(adev->dev, "Please retire it or set a larger " - "threshold value when reloading driver.\n"); + dev_warn(adev->dev, + "Please consider adjusting the customized threshold.\n"); return true; } } @@ -758,7 +759,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.health_percent = 0; } - if (amdgpu_bad_page_threshold != -1) + if ((amdgpu_bad_page_threshold != -1) && + (amdgpu_bad_page_threshold != -2)) ras->is_rma = true; /* ignore the -ENOTSUPP return value */ @@ -1428,8 +1430,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = __verify_ras_table_checksum(control); if (res) - DRM_ERROR("RAS table incorrect checksum or error:%d\n", - res); + dev_err(adev->dev, + "RAS table incorrect checksum or error:%d\n", + res); /* Warn if we are at 90% of the threshold or above */ @@ -1447,8 +1450,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = __verify_ras_table_checksum(control); if (res) { - dev_err(adev->dev, "RAS Table incorrect checksum or error:%d\n", - res); + dev_err(adev->dev, + "RAS Table incorrect checksum or error:%d\n", + res); return -EINVAL; } if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) { @@ -1466,17 +1470,18 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = amdgpu_ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); } else { - dev_err(adev->dev, "RAS records:%d exceed threshold:%d", + dev_warn(adev->dev, + "RAS records:%d exceed threshold:%d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - if (amdgpu_bad_page_threshold == -1) { - dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { res = 0; + dev_warn(adev->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n"); } else { ras->is_rma = true; - dev_err(adev->dev, - "RAS records:%d exceed threshold:%d, " - "GPU will not be initialized. Replace this GPU or increase the threshold", - control->ras_num_bad_pages, ras->bad_page_cnt_threshold); + dev_warn(adev->dev, + "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } } else { -- 2.51.0 From 04893397766a2b2f1bc7fe5c6414e4c0846ed171 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 20 Jan 2025 22:00:22 -0500 Subject: [PATCH 09/16] drm/amdgpu: Skip err_count sysfs creation on VF unsupported RAS blocks VFs are not able to query error counts for all RAS blocks. Rather than returning error for queries on these blocks, skip sysfs the creation all together. Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 17 ++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 90394f89aba6..44d13a60588d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1864,6 +1864,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, if (!obj || obj->attr_inuse) return -EINVAL; + if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block)) + return 0; + get_obj(obj); snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0af469ec6fcc..2056efaf157d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1246,7 +1246,8 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc case AMDGPU_RAS_BLOCK__MPIO: return RAS_TELEMETRY_GPU_BLOCK_MPIO; default: - dev_err(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n", block); + DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n", + block); return RAS_TELEMETRY_GPU_BLOCK_COUNT; } } @@ -1331,3 +1332,17 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) return 0; } + +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + enum amd_sriov_ras_telemetry_gpu_block sriov_block; + + sriov_block = amdgpu_ras_block_to_sriov(adev, block); + + if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT || + !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 5381b8d596e6..270a032e2d70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -406,4 +406,6 @@ bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, struct ras_err_data *err_data); int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block); #endif -- 2.51.0 From 25961bad9212476983c570438366e1f5e9a9cf21 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 21 Jan 2025 12:32:07 +0530 Subject: [PATCH 10/16] drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX10.1.0 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_10_1_0_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++ .../drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 35 +++++ .../amdgpu/gfx_v10_1_10_cleaner_shader.asm | 126 ++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 4b5e65affb81..1878c83ff7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4794,6 +4794,20 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) break; } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 1, 10): + adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 101 && + adev->gfx.pfp_fw_version >= 158 && + adev->gfx.mec_fw_version >= 152) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 4): diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 663c2572d440..5255378af53c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -21,6 +21,41 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +/* Define the cleaner shader gfx_10_1_10 */ +static const u32 gfx_10_1_10_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbf068100, 0xbf840023, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020102, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; + /* Define the cleaner shader gfx_10_3_0 */ static const u32 gfx_10_3_0_cleaner_shader_hex[] = { 0xb0804004, 0xbf8a0000, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm new file mode 100644 index 000000000000..9ba3359253c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader. + +// GFX10.1 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10.1) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1) + + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + -- 2.51.0 From d8c782cac5007e68e7484d420168f12d3490def6 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Mon, 13 Jan 2025 14:22:31 +0800 Subject: [PATCH 11/16] drm/amd/display: Initial psr_version with correct setting [Why & How] The initial setting for psr_version is not correct while create a virtual link. The default psr_version should be DC_PSR_VERSION_UNSUPPORTED. Reviewed-by: Roman Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ba3a34fad26a..f08d3d467372 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -276,6 +276,7 @@ static bool create_links( link->link_id.type = OBJECT_TYPE_CONNECTOR; link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); if (!link->link_enc) { -- 2.51.0 From 6a7fde433231c18164c117592d3e18ced648ad58 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 10 Jan 2025 11:35:46 -0500 Subject: [PATCH 12/16] drm/amd/display: Update CR AUX RD interval interpretation [Why] DP spec updated to have the CR AUX RD interval match the EQ AUX RD interval interpretation of DPCD 0000Eh/0220Eh for 8b/10b non-LTTPR mode and LTTPR transparent mode cases. [How] Update interpretation of DPCD 0000Eh/0220Eh for CR AUX RD interval during 8b/10b link training. Reviewed-by: Michael Strauss Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_training_8b_10b.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 3bdce32a85e3..ae95ec48e572 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -36,7 +36,8 @@ link->ctx->logger static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, - const struct dc_link_settings *link_settings) + const struct dc_link_settings *link_settings, + enum lttpr_mode lttpr_mode) { union training_aux_rd_interval training_rd_interval; uint32_t wait_in_micro_secs = 100; @@ -49,6 +50,8 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, DP_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; } @@ -110,7 +113,6 @@ void decide_8b_10b_training_settings( */ lt_settings->link_settings.link_spread = link->dp_ss_off ? LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ; - lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); @@ -119,6 +121,7 @@ void decide_8b_10b_training_settings( lt_settings->disallow_per_lane_settings = true; lt_settings->always_match_dpcd_with_hw_lane_settings = true; lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link); + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); } -- 2.51.0 From 4a4077b4b63a8404efd6d37fc2926f03fb25bace Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Thu, 9 Jan 2025 16:11:48 +0800 Subject: [PATCH 13/16] drm/amd/display: Update Cursor request mode to the beginning prefetch always [Why] The double buffer cursor registers is updated by the cursor vupdate event. There is a gap between vupdate and cursor data fetch if cursor fetch data reletive to cursor position. Cursor corruption will happen if we update the cursor surface in this gap. [How] Modify the cursor request mode to the beginning prefetch always and avoid wraparound calculation issues. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Zhikai Zhai Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hubp/dcn31/dcn31_hubp.c | 2 +- .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 22 ++++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index c2900c79a2d3..7fd582a8a4ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -44,7 +44,7 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable); - REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, enable); + REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1); } void hubp31_soft_reset(struct hubp *hubp, bool reset) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 906934128912..35c0d101d7c8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1993,20 +1993,11 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) dc->hwss.get_position(&pipe_ctx, 1, &position); vpos = position.vertical_count; - /* Avoid wraparound calculation issues */ - vupdate_start += stream->timing.v_total; - vupdate_end += stream->timing.v_total; - vpos += stream->timing.v_total; - if (vpos <= vupdate_start) { /* VPOS is in VACTIVE or back porch. */ lines_to_vupdate = vupdate_start - vpos; - } else if (vpos > vupdate_end) { - /* VPOS is in the front porch. */ - return; } else { - /* VPOS is in VUPDATE. */ - lines_to_vupdate = 0; + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; } /* Calculate time until VUPDATE in microseconds. */ @@ -2014,13 +2005,18 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; us_to_vupdate = lines_to_vupdate * us_per_line; + /* Stall out until the cursor update completes. */ + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + /* Position is in the range of vupdate start and end*/ + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + /* 70 us is a conservative estimate of cursor update time*/ if (us_to_vupdate > 70) return; - /* Stall out until the cursor update completes. */ - if (vupdate_end < vupdate_start) - vupdate_end += stream->timing.v_total; us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; udelay(us_to_vupdate + us_vupdate); } -- 2.51.0 From 36681f15bb12b5c01df924379cdab9234259825c Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Mon, 13 Jan 2025 14:13:51 -0500 Subject: [PATCH 14/16] drm/amd/display: Account For OTO Prefetch Bandwidth When Calculating Urgent Bandwidth [Why] 1) The current calculations for OTO prefetch bandwidth do not consider the number of DPP pipes in use. As a result, OTO prefetch bandwidth may be larger than the vactive bandwidth if multiple DPP pipes are used. OTO prefetch bandwidth should never exceed the vactive bandwidth. 2) Mode programming may be mismatched with mode support In cases where mode support has chosen to use the equalized (equ) prefetch schedule, mode programming may end up using oto prefetch schedule instead. The bandwidth required to do the oto schedule may end up being higher than the equ schedule. This can cause the required urgent bandwidth to exceed the available urgent bandwidth. [How] Output the oto prefetch bandwidth and incorperate it into the urgent bandwidth calculations even if the prefetch schedule being used is not the oto schedule. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 25 ++++++++++++++++++- .../src/dml2_core/dml2_core_shared_types.h | 5 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 51b457b6d66f..e96a13dc43d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4909,6 +4909,7 @@ static double get_urgent_bandwidth_required( double ReadBandwidthChroma[], double PrefetchBandwidthLuma[], double PrefetchBandwidthChroma[], + double PrefetchBandwidthOto[], double excess_vactive_fill_bw_l[], double excess_vactive_fill_bw_c[], double cursor_bw[], @@ -4972,8 +4973,9 @@ static double get_urgent_bandwidth_required( l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; - surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); @@ -5171,6 +5173,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; + *p->RequiredPrefetchBWOTO = 0.0; dcc_mrq_enable = (p->dcc_enable && p->mrq_present); @@ -5384,6 +5387,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; } + /* oto prefetch bw should be always be less than total vactive bw */ + DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); @@ -5394,6 +5400,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. + * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule + * and the required bandwidth increases when going from ms to mp + */ + *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); @@ -6154,6 +6166,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO l->zero_array, l->zero_array, l->zero_array, @@ -6190,6 +6203,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6226,6 +6240,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6262,6 +6277,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6298,6 +6314,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -9060,6 +9077,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; @@ -9204,6 +9222,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -9370,6 +9389,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -11286,6 +11306,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; @@ -11428,6 +11449,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; @@ -11560,6 +11582,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[k]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 23c0fca5515f..b7cb017b59ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -484,6 +484,8 @@ struct dml2_core_internal_mode_support { double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + /* oto bw should also be considered when calculating urgent bw to avoid situations oto/equ mismatches between ms and mp */ + double RequiredPrefetchBWOTO[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; double prefetch_cursor_bw[DML2_MAX_PLANES]; double prefetch_vmrow_bw[DML2_MAX_PLANES]; @@ -1381,6 +1383,7 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals { double vm_row_bw; double flip_and_active_bw; double flip_and_prefetch_bw; + double flip_and_prefetch_bw_oto; double active_and_excess_bw; }; @@ -1792,6 +1795,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *VRatioPrefetchC; double *RequiredPrefetchPixelDataBWLuma; double *RequiredPrefetchPixelDataBWChroma; + double *RequiredPrefetchBWOTO; bool *NotEnoughTimeForDynamicMetadata; double *Tno_bw; double *Tno_bw_flip; @@ -2025,6 +2029,7 @@ struct dml2_core_calcs_calculate_peak_bandwidth_required_params { double *surface_read_bandwidth_c; double *prefetch_bandwidth_l; double *prefetch_bandwidth_c; + double *prefetch_bandwidth_oto; double *excess_vactive_fill_bw_l; double *excess_vactive_fill_bw_c; double *cursor_bw; -- 2.51.0 From c31b41f1cb32450d8ac176eef9bda979760040e7 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 10 Jan 2025 16:09:45 +0800 Subject: [PATCH 15/16] drm/amd/display: Disable PSR-SU on some OLED panel [Why] PSR-SU may cause some glitching randomly on some OLED panel. [How] Disable the PSR-SU for certain PSR-SU OLED panel. Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d..104f03868266 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -30,6 +30,23 @@ #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" +static bool is_specific_oled_panel(struct dc_link *link) +{ + if (!link->dpcd_sink_ext_caps.bits.oled) + return false; + + /* Disable PSR-SU for some OLED panels to avoid glitches */ + if (link->dpcd_caps.sink_dev_id == 0xBA4159) { + uint8_t sink_dev_id_str1[] = {'4', '0', 'C', 'U', '1'}; + + if (!memcmp(link->dpcd_caps.sink_dev_id_str, sink_dev_id_str1, + sizeof(sink_dev_id_str1))) + return true; + } + + return false; +} + static bool link_supports_psrsu(struct dc_link *link) { struct dc *dc = link->ctx->dc; @@ -40,6 +57,9 @@ static bool link_supports_psrsu(struct dc_link *link) if (dc->ctx->dce_version < DCN_VERSION_3_1) return false; + if (is_specific_oled_panel(link)) + return false; + if (!is_psr_su_specific_panel(link)) return false; -- 2.51.0 From cbd97d621ece1d92c3542e52f8af7c04cd2c6afb Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 14 Jan 2025 12:14:26 -0500 Subject: [PATCH 16/16] drm/amd/display: Ammend DCPG IP control sequences to align with HW guidance [WHY&HOW] IP_REQUEST_CNTL should only be toggled off when it was originally, never unconditionally. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 14 +++++--- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 34 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 3 ++ .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a5e18ab72394..dec732c0c59c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1266,14 +1266,18 @@ static void dcn20_power_on_plane_resources( struct dce_hwseq *hws, struct pipe_ctx *pipe_ctx) { + uint32_t org_ip_request_cntl = 0; + DC_LOGGER_INIT(hws->ctx->logger); if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, pipe_ctx->plane_res.dpp->inst, true); if (REG(DC_IP_REQUEST_CNTL)) { - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 1); + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); if (hws->funcs.dpp_pg_control) hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); @@ -1281,8 +1285,10 @@ static void dcn20_power_on_plane_resources( if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 0); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + DC_LOG_DEBUG( "Un-gated front end for pipe %d\n", pipe_ctx->plane_res.hubp->inst); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 92bb820817b9..8ad0ff669b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2610,3 +2610,37 @@ void dcn401_detect_pipe_changes(struct dc_state *old_state, new_pipe->update_flags.bits.test_pattern_changed = 1; } } + +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp) +{ + struct dce_hwseq *hws = dc->hwseq; + uint32_t org_ip_request_cntl = 0; + + DC_LOGGER_INIT(dc->ctx->logger); + + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp->inst, false); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp->inst, false); + + hubp->funcs->hubp_reset(hubp); + dpp->funcs->dpp_reset(dpp); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + + DC_LOG_DEBUG( + "Power gated front end %d\n", hubp->inst); + + if (hws->funcs.dpp_root_clock_control) + hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 17cea748789e..dbd69d215b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -102,4 +102,7 @@ void dcn401_detect_pipe_changes( struct dc_state *new_state, struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe); +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 44cb376f97c1..a4e3501fadbb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -123,7 +123,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .disable_vga = dcn20_disable_vga, .bios_golden_init = dcn10_bios_golden_init, .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .plane_atomic_power_down = dcn401_plane_atomic_power_down, .enable_power_gating_plane = dcn32_enable_power_gating_plane, .hubp_pg_control = dcn32_hubp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, -- 2.51.0