From 91314e7dfd83345b8b820b782b2511c9c32866cd Mon Sep 17 00:00:00 2001 From: Hamish Claxton Date: Tue, 5 Nov 2024 10:42:31 +1000 Subject: [PATCH 01/16] drm/amd/display: Fix failure to read vram info due to static BP_RESULT The static declaration causes the check to fail. Remove it. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3678 Fixes: 00c391102abc ("drm/amd/display: Add misc DC changes for DCN401") Reviewed-by: Harry Wentland Signed-off-by: Hamish Claxton Signed-off-by: Alex Deucher Cc: aurabindo.pillai@amd.com Cc: hamishclaxton@gmail.com --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index be8fbb04ad98..902491669cbc 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3122,7 +3122,7 @@ static enum bp_result bios_parser_get_vram_info( struct dc_vram_info *info) { struct bios_parser *bp = BP_FROM_DCB(dcb); - static enum bp_result result = BP_RESULT_BADBIOSTABLE; + enum bp_result result = BP_RESULT_BADBIOSTABLE; struct atom_common_table_header *header; struct atom_data_revision revision; -- 2.51.0 From 3c2296b1eec55b50c64509ba15406142d4a958dc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 8 Nov 2024 09:34:46 -0500 Subject: [PATCH 02/16] Revert "drm/amd/display: parse umc_info or vram_info based on ASIC" This reverts commit 2551b4a321a68134360b860113dd460133e856e5. This was not the root cause. Revert. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3678 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: aurabindo.pillai@amd.com Cc: hamishclaxton@gmail.com --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 902491669cbc..c9a6de110b74 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3127,9 +3127,7 @@ static enum bp_result bios_parser_get_vram_info( struct atom_data_revision revision; // vram info moved to umc_info for DCN4x - if (dcb->ctx->dce_version >= DCN_VERSION_4_01 && - dcb->ctx->dce_version < DCN_VERSION_MAX && - info && DATA_TABLES(umc_info)) { + if (info && DATA_TABLES(umc_info)) { header = GET_IMAGE(struct atom_common_table_header, DATA_TABLES(umc_info)); -- 2.51.0 From 447a54a0f79c9a409ceaa17804bdd2e0206397b9 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Tue, 12 Nov 2024 10:11:42 -0600 Subject: [PATCH 03/16] drm/amd: Fix initialization mistake for NBIO 7.7.0 There is a strapping issue on NBIO 7.7.0 that can lead to spurious PME events while in the D0 state. Co-developed-by: Mario Limonciello Signed-off-by: Vijendar Mukunda Signed-off-by: Mario Limonciello Acked-by: Alex Deucher Link: https://lore.kernel.org/r/20241112161142.28974-1-mario.limonciello@amd.com Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index fb37e354a9d5..1ac730328516 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -247,6 +247,12 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data); + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 7, 0): + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); + break; + } } static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, -- 2.51.0 From 6d9f9115c091c88cacf78734d8ea34c8609e8680 Mon Sep 17 00:00:00 2001 From: "Everest K.C." Date: Wed, 23 Oct 2024 17:33:55 -0600 Subject: [PATCH 04/16] drm/xe/guc: Fix dereference before NULL check MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The pointer list->list is dereferenced before the NULL check. Fix this by moving the NULL check outside the for loop, so that the check is performed before the dereferencing. The list->list pointer cannot be NULL so this has no effect on runtime. It's just a correctness issue. This issue was reported by Coverity Scan. https://scan7.scan.coverity.com/#/project-view/51525/11354?selectedIssue=1600335 Fixes: 0f1fdf559225 ("drm/xe/guc: Save manual engine capture into capture list") Signed-off-by: Everest K.C. Reviewed-by: Dan Carpenter Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20241023233356.5479-1-everestkc@everestkc.com.np (cherry picked from commit 2aff81e039de5b0b7ef6bdcb2c320f121f69e2b4) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_capture.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 8b6cb786a2aa..cc72446a5de1 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1531,7 +1531,7 @@ read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_gro { int i; - if (!list || list->num_regs == 0) + if (!list || !list->list || list->num_regs == 0) return; if (!regs) @@ -1541,9 +1541,6 @@ read_reg_to_node(struct xe_hw_engine *hwe, const struct __guc_mmio_reg_descr_gro struct __guc_mmio_reg_descr desc = list->list[i]; u32 value; - if (!list->list) - return; - if (list->type == GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE) { value = xe_hw_engine_mmio_read32(hwe, desc.reg); } else { -- 2.51.0 From 85270776f65d27b1c9720324745ab7da3ed71b3e Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 13 Nov 2024 17:17:23 +0800 Subject: [PATCH 05/16] drm/amd/pm: Update data types used for uapi i/f Update member's data type in amdgpu_xcp_metrics from linux specific to the ones compatible to uapi interface Fixes: 4c07ff7d07f7 ("drm/amd/pm: Add gpu_metrics_v1_6") Signed-off-by: Asad Kamal Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/kgd_pp_interface.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index bb27c0d2a9ae..640f6dcdbe1d 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -357,11 +357,11 @@ struct dpm_clocks; struct amdgpu_xcp_metrics { /* Utilization Instantaneous (%) */ - u32 gfx_busy_inst[MAX_XCC]; - u16 jpeg_busy[NUM_JPEG_ENG]; - u16 vcn_busy[NUM_VCN]; + uint32_t gfx_busy_inst[MAX_XCC]; + uint16_t jpeg_busy[NUM_JPEG_ENG]; + uint16_t vcn_busy[NUM_VCN]; /* Utilization Accumulated (%) */ - u64 gfx_busy_acc[MAX_XCC]; + uint64_t gfx_busy_acc[MAX_XCC]; }; struct amd_pm_funcs { -- 2.51.0 From e2259b5a8c2754d9134fa5a92f69a9de75d7536c Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 11 Nov 2024 20:11:48 +0800 Subject: [PATCH 06/16] drm/amd/pm: Add gpu_metrics_v1_7 Add new gpu_metrics_v1_7 to acquire xgmi link status, application counter and max vram bandwidth v2: Use gpu_metrics_v1_7 for SMU_v_13_0_6 (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/include/kgd_pp_interface.h | 110 ++++++++++++++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 8 +- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 3 + 3 files changed, 117 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 640f6dcdbe1d..67a5de573943 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -364,6 +364,17 @@ struct amdgpu_xcp_metrics { uint64_t gfx_busy_acc[MAX_XCC]; }; +struct amdgpu_xcp_metrics_v1_1 { + /* Utilization Instantaneous (%) */ + uint32_t gfx_busy_inst[MAX_XCC]; + uint16_t jpeg_busy[NUM_JPEG_ENG]; + uint16_t vcn_busy[NUM_VCN]; + /* Utilization Accumulated (%) */ + uint64_t gfx_busy_acc[MAX_XCC]; + /* Total App Clock Counter Accumulated */ + uint64_t gfx_below_host_limit_acc[MAX_XCC]; +}; + struct amd_pm_funcs { /* export for dpm on ci and si */ int (*pre_set_power_state)(void *handle); @@ -977,6 +988,105 @@ struct gpu_metrics_v1_6 { uint32_t pcie_lc_perf_other_end_recovery; }; +struct gpu_metrics_v1_7 { + struct metrics_table_header common_header; + + /* Temperature (Celsius) */ + uint16_t temperature_hotspot; + uint16_t temperature_mem; + uint16_t temperature_vrsoc; + + /* Power (Watts) */ + uint16_t curr_socket_power; + + /* Utilization (%) */ + uint16_t average_gfx_activity; + uint16_t average_umc_activity; // memory controller + + /* VRAM max bandwidthi (in GB/sec) at max memory clock */ + uint64_t mem_max_bandwidth; + + /* Energy (15.259uJ (2^-16) units) */ + uint64_t energy_accumulator; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Accumulation cycle counter */ + uint32_t accumulation_counter; + + /* Accumulated throttler residencies */ + uint32_t prochot_residency_acc; + uint32_t ppt_residency_acc; + uint32_t socket_thm_residency_acc; + uint32_t vr_thm_residency_acc; + uint32_t hbm_thm_residency_acc; + + /* Clock Lock Status. Each bit corresponds to clock instance */ + uint32_t gfxclk_lock_status; + + /* Link width (number of lanes) and speed (in 0.1 GT/s) */ + uint16_t pcie_link_width; + uint16_t pcie_link_speed; + + /* XGMI bus width and bitrate (in Gbps) */ + uint16_t xgmi_link_width; + uint16_t xgmi_link_speed; + + /* Utilization Accumulated (%) */ + uint32_t gfx_activity_acc; + uint32_t mem_activity_acc; + + /*PCIE accumulated bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_acc; + + /*PCIE instantaneous bandwidth (GB/sec) */ + uint64_t pcie_bandwidth_inst; + + /* PCIE L0 to recovery state transition accumulated count */ + uint64_t pcie_l0_to_recov_count_acc; + + /* PCIE replay accumulated count */ + uint64_t pcie_replay_count_acc; + + /* PCIE replay rollover accumulated count */ + uint64_t pcie_replay_rover_count_acc; + + /* PCIE NAK sent accumulated count */ + uint32_t pcie_nak_sent_count_acc; + + /* PCIE NAK received accumulated count */ + uint32_t pcie_nak_rcvd_count_acc; + + /* XGMI accumulated data transfer size(KiloBytes) */ + uint64_t xgmi_read_data_acc[NUM_XGMI_LINKS]; + uint64_t xgmi_write_data_acc[NUM_XGMI_LINKS]; + + /* XGMI link status(active/inactive) */ + uint16_t xgmi_link_status[NUM_XGMI_LINKS]; + + uint16_t padding; + + /* PMFW attached timestamp (10ns resolution) */ + uint64_t firmware_timestamp; + + /* Current clocks (Mhz) */ + uint16_t current_gfxclk[MAX_GFX_CLKS]; + uint16_t current_socclk[MAX_CLKS]; + uint16_t current_vclk0[MAX_CLKS]; + uint16_t current_dclk0[MAX_CLKS]; + uint16_t current_uclk; + + /* Number of current partition */ + uint16_t num_partition; + + /* XCP metrics stats */ + struct amdgpu_xcp_metrics_v1_1 xcp_stats[NUM_XCP]; + + /* PCIE other end recovery counter */ + uint32_t pcie_lc_perf_other_end_recovery; +}; + /* * gpu_metrics_v2_0 is not recommended as it's not naturally aligned. * Use gpu_metrics_v2_1 or later instead. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index fa30a9e1f27a..11ecaa62f419 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -370,7 +370,7 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) return -ENOMEM; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_6); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_7); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) { @@ -2321,8 +2321,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table { bool per_inst, smu_13_0_6_per_inst, smu_13_0_14_per_inst, apu_per_inst; struct smu_table_context *smu_table = &smu->smu_table; - struct gpu_metrics_v1_6 *gpu_metrics = - (struct gpu_metrics_v1_6 *)smu_table->gpu_metrics_table; + struct gpu_metrics_v1_7 *gpu_metrics = + (struct gpu_metrics_v1_7 *)smu_table->gpu_metrics_table; bool flag = smu_v13_0_6_is_unified_metrics(smu); int ret = 0, xcc_id, inst, i, j, k, idx; struct amdgpu_device *adev = smu->adev; @@ -2341,7 +2341,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table metrics_a = (MetricsTableA_t *)metrics_x; - smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 6); + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 7); gpu_metrics->temperature_hotspot = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, flag)); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index f1ab1a6bb467..dbbd3759bff3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1081,6 +1081,9 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev) case METRICS_VERSION(1, 6): structure_size = sizeof(struct gpu_metrics_v1_6); break; + case METRICS_VERSION(1, 7): + structure_size = sizeof(struct gpu_metrics_v1_7); + break; case METRICS_VERSION(2, 0): structure_size = sizeof(struct gpu_metrics_v2_0); break; -- 2.51.0 From 466a59abacc6590487faf21bd572d704f7283d47 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Mon, 11 Nov 2024 21:16:01 +0800 Subject: [PATCH 07/16] drm/amd/pm: Get xgmi link status for XGMI_v_6_4_0 Get XGMI_v_6_4_0 link status and populate it to metrics v1_7 for SMU_v_13_0_6 v2: Get link status register value for each soc from separate function (Lijo) Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 41 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index b47422b0b5b1..74b4349e345a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -40,6 +40,11 @@ #define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 #define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218 +#define XGMI_STATE_DISABLE 0xD1 +#define XGMI_STATE_LS0 0x81 +#define XGMI_LINK_ACTIVE 1 +#define XGMI_LINK_INACTIVE 0 + static DEFINE_MUTEX(xgmi_mutex); #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 @@ -289,6 +294,42 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = { SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)}, }; +static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num) +{ + const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070; + const int xgmi_inst = 2; + u32 link_inst; + u64 addr; + + link_inst = global_link_num % xgmi_inst; + + addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) + + adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst); + + return RREG32_PCIE_EXT(addr); +} + +int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) +{ + u32 xgmi_state_reg_val; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num); + break; + default: + return -EOPNOTSUPP; + } + + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE) + return -ENOLINK; + + if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0) + return XGMI_LINK_ACTIVE; + + return XGMI_LINK_INACTIVE; +} + /** * DOC: AMDGPU XGMI Support * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 8cc7ab38db7c..d1282b4c6348 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -84,5 +84,7 @@ int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, struct amdgpu_hive_info *hive, int req_nps_mode); +int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, + int global_link_num); #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 11ecaa62f419..ab3c93ddce46 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -96,7 +96,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 #define LINK_SPEED_MAX 4 - #define SMU_13_0_6_DSCLK_THRESHOLD 140 #define MCA_BANK_IPID(_ip, _hwid, _type) \ @@ -2448,6 +2447,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]); gpu_metrics->xgmi_write_data_acc[i] = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]); + ret = amdgpu_get_xgmi_link_status(adev, i); + if (ret >= 0) + gpu_metrics->xgmi_link_status[i] = ret; } gpu_metrics->num_partition = adev->xcp_mgr->num_xcps; -- 2.51.0 From 18ab7e88778fdbee3221d6ce8acefe55feaa09d1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 13 Nov 2024 20:51:58 +0800 Subject: [PATCH 08/16] drm/radeon: Use ttm_bo_move_null() in radeon_bo_move() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Since ttm_bo_move_null() is exactly the same as ttm_resource_free() + ttm_bo_assign_mem(), we use ttm_bo_move_null() for the GTT --> SYSTEM move case too. Then the code is more consistent as the SYSTEM --> GTT move case. Acked-by: Christian König Signed-off-by: Huacai Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_ttm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 69d0c12fa419..616d25c8c2de 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -219,8 +219,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == TTM_PL_TT && new_mem->mem_type == TTM_PL_SYSTEM) { radeon_ttm_tt_unbind(bo->bdev, bo->ttm); - ttm_resource_free(bo, &bo->resource); - ttm_bo_assign_mem(bo, new_mem); + ttm_bo_move_null(bo, new_mem); goto out; } if (rdev->ring[radeon_copy_ring_index(rdev)].ready && -- 2.51.0 From 2abf2f7032df4c4e7f6cf7906da59d0e614897d6 Mon Sep 17 00:00:00 2001 From: Umio Yasuno Date: Thu, 14 Nov 2024 16:15:27 +0900 Subject: [PATCH 09/16] drm/amd/pm: update current_socclk and current_uclk in gpu_metrics on smu v13.0.7 These were missed before. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3751 Signed-off-by: Umio Yasuno Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c5d3e25cc967..4fd0354bd312 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2147,6 +2147,8 @@ static ssize_t smu_v13_0_7_get_gpu_metrics(struct smu_context *smu, gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK]; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_1]; -- 2.51.0 From 6ecccc093ec439c04d62b40bda76240389d104a8 Mon Sep 17 00:00:00 2001 From: Bhavin Sharma Date: Thu, 14 Nov 2024 20:41:12 +0530 Subject: [PATCH 10/16] drm/amd/pm: remove redundant tools_size check The check for tools_size being non-zero is redundant as tools_size is explicitly set to a non-zero value (0x19000). Removing the if condition simplifies the code without altering functionality. Signed-off-by: Bhavin Sharma Signed-off-by: Alex Deucher --- .../amd/pm/powerplay/smumgr/vega12_smumgr.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c index b52ce135d84d..d3ff6a831ed5 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/vega12_smumgr.c @@ -257,20 +257,18 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr) priv->smu_tables.entry[TABLE_WATERMARKS].size = sizeof(Watermarks_t); tools_size = 0x19000; - if (tools_size) { - ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, - tools_size, - PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, - &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); - if (ret) - goto err1; + ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, + tools_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].handle, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].mc_addr, + &priv->smu_tables.entry[TABLE_PMSTATUSLOG].table); + if (ret) + goto err1; - priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; - priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; - } + priv->smu_tables.entry[TABLE_PMSTATUSLOG].version = 0x01; + priv->smu_tables.entry[TABLE_PMSTATUSLOG].size = tools_size; /* allocate space for AVFS Fuse table */ ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev, -- 2.51.0 From 6104112693011990a19d971c4c419de6c29adc54 Mon Sep 17 00:00:00 2001 From: Bhavin Sharma Date: Thu, 14 Nov 2024 20:41:11 +0530 Subject: [PATCH 11/16] drm/amd/display: remove redundant is_dsc_possible check Since is_dsc_possible is already checked just above, there's no need to check it again before filling out the DSC settings. Signed-off-by: Bhavin Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index ebd5df1a36e8..d9aaebfa3a0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -1093,14 +1093,11 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - // Final decission: can we do DSC or not? - if (is_dsc_possible) { - // Fill out the rest of DSC settings - dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; - dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; - dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; - dsc_cfg->is_dp = dsc_sink_caps->is_dp; - } + /* Fill out the rest of DSC settings */ + dsc_cfg->block_pred_enable = dsc_common_caps.is_block_pred_supported; + dsc_cfg->linebuf_depth = dsc_common_caps.lb_bit_depth; + dsc_cfg->version_minor = (dsc_common_caps.dsc_version & 0xf0) >> 4; + dsc_cfg->is_dp = dsc_sink_caps->is_dp; done: if (!is_dsc_possible) -- 2.51.0 From 8fef253c94a5312b9150b2ff8e633b331bac7e88 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Wed, 30 Oct 2024 16:20:21 -0400 Subject: [PATCH 12/16] drm/amd/display: update pipe selection policy to check head pipe [Why] No check on head pipe during the dml to dc hw mapping will allow illegal pipe usage. This will result in a wrong pipe topology to cause mpcc tree totally mess up then cause a display hang. [How] Avoid to use the pipe is head in all check and avoid ODM slice during preferred pipe check. Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas Signed-off-by: Yihan Zhu Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml2_dc_resource_mgmt.c | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c index 6eccf0241d85..1ed21c1b86a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_dc_resource_mgmt.c @@ -258,12 +258,25 @@ static unsigned int find_preferred_pipe_candidates(const struct dc_state *existi * However this condition comes with a caveat. We need to ignore pipes that will * require a change in OPP but still have the same stream id. For example during * an MPC to ODM transiton. + * + * Adding check to avoid pipe select on the head pipe by utilizing dc resource + * helper function resource_get_primary_dpp_pipe and comparing the pipe index. */ if (existing_state) { for (i = 0; i < pipe_count; i++) { if (existing_state->res_ctx.pipe_ctx[i].stream && existing_state->res_ctx.pipe_ctx[i].stream->stream_id == stream_id) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if (existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && - existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) + existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i && + (existing_state->res_ctx.pipe_ctx[i].prev_odm_pipe || + existing_state->res_ctx.pipe_ctx[i].next_odm_pipe)) continue; preferred_pipe_candidates[num_preferred_candidates++] = i; @@ -292,6 +305,14 @@ static unsigned int find_last_resort_pipe_candidates(const struct dc_state *exis */ if (existing_state) { for (i = 0; i < pipe_count; i++) { + struct pipe_ctx *head_pipe = + resource_is_pipe_type(&existing_state->res_ctx.pipe_ctx[i], DPP_PIPE) ? + resource_get_primary_dpp_pipe(&existing_state->res_ctx.pipe_ctx[i]) : + NULL; + + // we should always respect the head pipe from selection + if (head_pipe && head_pipe->pipe_idx == i) + continue; if ((existing_state->res_ctx.pipe_ctx[i].plane_res.hubp && existing_state->res_ctx.pipe_ctx[i].plane_res.hubp->opp_id != i) || existing_state->res_ctx.pipe_ctx[i].stream_res.tg) -- 2.51.0 From c33a93201ca07119de90e8c952fbdf65920ab55d Mon Sep 17 00:00:00 2001 From: Chris Park Date: Mon, 4 Nov 2024 13:18:39 -0500 Subject: [PATCH 13/16] drm/amd/display: Ignore scalar validation failure if pipe is phantom [Why] There are some pipe scaler validation failure when the pipe is phantom and causes crash in DML validation. Since, scalar parameters are not as important in phantom pipe and we require this plane to do successful MCLK switches, the failure condition can be ignored. [How] Ignore scalar validation failure if the pipe validation is marked as phantom pipe. Cc: stable@vger.kernel.org # 6.11+ Reviewed-by: Dillon Varone Signed-off-by: Chris Park Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 33125b95c3a1..619fad17de55 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1501,6 +1501,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) res = spl_calculate_scaler_params(spl_in, spl_out); // Convert respective out params from SPL to scaler data translate_SPL_out_params_to_pipe_ctx(pipe_ctx, spl_out); + + /* Ignore scaler failure if pipe context plane is phantom plane */ + if (!res && plane_state->is_phantom) + res = true; } else { #endif /* depends on h_active */ @@ -1571,6 +1575,10 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) &plane_state->scaling_quality); } + /* Ignore scaler failure if pipe context plane is phantom plane */ + if (!res && plane_state->is_phantom) + res = true; + if (res && (pipe_ctx->plane_res.scl_data.taps.v_taps != temp.v_taps || pipe_ctx->plane_res.scl_data.taps.h_taps != temp.h_taps || pipe_ctx->plane_res.scl_data.taps.v_taps_c != temp.v_taps_c || -- 2.51.0 From 27227a234c1487cb7a684615f0749c455218833a Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Mon, 28 Oct 2024 17:12:22 -0400 Subject: [PATCH 14/16] drm/amd/display: Fix handling of plane refcount [Why] The mechanism to backup and restore plane states doesn't maintain refcount, which can cause issues if the refcount of the plane changes in between backup and restore operations, such as memory leaks if the refcount was supposed to go down, or double frees / invalid memory accesses if the refcount was supposed to go up. [How] Cache and re-apply current refcount when restoring plane states. Cc: stable@vger.kernel.org Reviewed-by: Josip Pavic Signed-off-by: Joshua Aberback Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7872c6cabb14..0c1875d35a95 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3141,7 +3141,10 @@ static void restore_planes_and_stream_state( return; for (i = 0; i < status->plane_count; i++) { + /* refcount will always be valid, restore everything else */ + struct kref refcount = status->plane_states[i]->refcount; *status->plane_states[i] = scratch->plane_states[i]; + status->plane_states[i]->refcount = refcount; } *stream = scratch->stream_state; } -- 2.51.0 From 89713ce5518eda6b370c7a17edbcab4f97a39f68 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 1 Nov 2024 10:51:02 -0400 Subject: [PATCH 15/16] drm/amd/display: Enable Request rate limiter during C-State on dcn401 [WHY] When C-State entry is requested, the rate limiter will be disabled which can result in high contention in the DCHUB return path. [HOW] Enable the rate limiter during C-state requests to prevent contention. Cc: stable@vger.kernel.org # 6.11+ Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 6 +++++ .../display/dc/hubbub/dcn10/dcn10_hubbub.h | 8 ++++++- .../display/dc/hubbub/dcn20/dcn20_hubbub.h | 1 + .../display/dc/hubbub/dcn401/dcn401_hubbub.c | 24 +++++++++++++++++-- .../display/dc/hubbub/dcn401/dcn401_hubbub.h | 7 +++++- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 13 ++++++---- .../gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 2 +- .../dc/resource/dcn401/dcn401_resource.h | 3 ++- 8 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 92e43a1e4dd4..601320b1be81 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -11,6 +11,7 @@ #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) { @@ -3886,6 +3887,10 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch #endif *p->hw_debug5 = false; +#ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE + if (p->NumberOfActiveSurfaces > 1) + *p->hw_debug5 = true; +#else for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1) && p->display_cfg->plane_descriptors[k].surface.dcc.enable @@ -3901,6 +3906,7 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5); #endif } +#endif } static enum dml2_odm_mode DecideODMMode(unsigned int HActive, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h index 4bd1dda07719..9fbd45c7dfef 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h @@ -200,6 +200,7 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B; uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1; uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2; + uint32_t DCHUBBUB_CTRL_STATUS; }; #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ @@ -320,7 +321,12 @@ struct dcn_hubbub_registers { type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\ type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\ type DCHUBBUB_TIMEOUT_DETECTION_EN;\ - type DCHUBBUB_TIMEOUT_TIMER_RESET + type DCHUBBUB_TIMEOUT_TIMER_RESET;\ + type ROB_UNDERFLOW_STATUS;\ + type ROB_OVERFLOW_STATUS;\ + type ROB_OVERFLOW_CLEAR;\ + type DCHUBBUB_HW_DEBUG;\ + type CSTATE_SWATH_CHK_GOOD_MODE #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h index 036bb3e6c957..46d8f5c70750 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn20/dcn20_hubbub.h @@ -96,6 +96,7 @@ struct dcn20_hubbub { unsigned int det1_size; unsigned int det2_size; unsigned int det3_size; + bool allow_sdpif_rate_limit_when_cstate_req; }; void hubbub2_construct(struct dcn20_hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 5d658e9bef64..92fab471b183 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1192,15 +1192,35 @@ static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) } } -static void dcn401_program_timeout_thresholds(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs) +static bool dcn401_program_arbiter(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower) { struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + bool wm_pending = false; + uint32_t temp; + /* request backpressure and outstanding return threshold (unused)*/ //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold); /* P-State stall threshold */ REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold); + + if (safe_to_lower || arb_regs->allow_sdpif_rate_limit_when_cstate_req > hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + hubbub2->allow_sdpif_rate_limit_when_cstate_req = arb_regs->allow_sdpif_rate_limit_when_cstate_req; + + /* only update the required bits */ + REG_GET(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, &temp); + if (hubbub2->allow_sdpif_rate_limit_when_cstate_req) { + temp |= (1 << 5); + } else { + temp &= ~(1 << 5); + } + REG_UPDATE(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, temp); + } else { + wm_pending = true; + } + + return wm_pending; } static const struct hubbub_funcs hubbub4_01_funcs = { @@ -1226,7 +1246,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, - .program_timeout_thresholds = dcn401_program_timeout_thresholds, + .program_arbiter = dcn401_program_arbiter, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h index 5f1960722ebd..b1d9ea9d1c3d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h @@ -128,7 +128,12 @@ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\ HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\ - HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh) + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_UNDERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, ROB_OVERFLOW_CLEAR, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, DCHUBBUB_HW_DEBUG, mask_sh),\ + HUBBUB_SF(DCHUBBUB_CTRL_STATUS, CSTATE_SWATH_CHK_GOOD_MODE, mask_sh) bool hubbub401_program_urgent_watermarks( struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index e8cc1bfa73f3..5de11e2837c0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1488,6 +1488,10 @@ void dcn401_prepare_bandwidth(struct dc *dc, &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, false); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + dc->wm_optimized_required |= hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, false); + } /* decrease compbuf size */ if (hubbub->funcs->program_compbuf_segments) { @@ -1529,6 +1533,10 @@ void dcn401_optimize_bandwidth( &context->bw_ctx.bw.dcn.watermarks, dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000, true); + /* update timeout thresholds */ + if (hubbub->funcs->program_arbiter) { + hubbub->funcs->program_arbiter(hubbub, &context->bw_ctx.bw.dcn.arb_regs, true); + } if (dc->clk_mgr->dc_mode_softmax_enabled) if (dc->clk_mgr->clks.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && @@ -1554,11 +1562,6 @@ void dcn401_optimize_bandwidth( pipe_ctx->dlg_regs.min_dst_y_next_start); } } - - /* update timeout thresholds */ - if (hubbub->funcs->program_timeout_thresholds) { - hubbub->funcs->program_timeout_thresholds(hubbub, &context->bw_ctx.bw.dcn.arb_regs); - } } void dcn401_fams2_global_control_lock(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 6c1d41c0f099..52b745667ef7 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -228,7 +228,7 @@ struct hubbub_funcs { void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); - void (*program_timeout_thresholds)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs); + bool (*program_arbiter)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs, bool safe_to_lower); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 7c8d61db153d..19568c359669 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -612,7 +612,8 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SR(DCHUBBUB_SDPIF_CFG1), \ SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \ SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \ - SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2) + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2), \ + SR(DCHUBBUB_CTRL_STATUS) /* DCCG */ -- 2.51.0 From e0179588d6eeb74eb87981c07a405524a1f0a677 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 5 Nov 2024 10:15:19 -0500 Subject: [PATCH 16/16] drm/amd/display: add public taps API in SPL [Why] Add public API to obtain number of taps in SPL. [How] Isolate function to calculate recout, ratios and viewport before calculating taps. Call function in both public taps API call and private scaling call. Reviewed-by: Jun Lei Signed-off-by: Samson Tam Signed-off-by: Hamza Mahfooz Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 63 +++++++++++++++------ drivers/gpu/drm/amd/display/dc/spl/dc_spl.h | 2 + 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 614276200aa0..da477406a4b7 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -99,7 +99,7 @@ static struct spl_rect calculate_plane_rec_in_timing_active( * * recout_x = 128 + round(plane_x * 2304 / 1920) * recout_w = 128 + round((plane_x + plane_w) * 2304 / 1920) - recout_x - * recout_y = 0 + round(plane_y * 1440 / 1280) + * recout_y = 0 + round(plane_y * 1440 / 1200) * recout_h = 0 + round((plane_y + plane_h) * 1440 / 1200) - recout_y * * NOTE: fixed point division is not error free. To reduce errors @@ -1746,6 +1746,32 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, spl_set_blur_scale_data(dscl_prog_data, data); } +/* Calculate recout, scaling ratio, and viewport, then get optimal number of taps */ +static bool spl_calculate_number_of_taps(struct spl_in *spl_in, struct spl_scratch *spl_scratch, struct spl_out *spl_out, + bool *enable_easf_v, bool *enable_easf_h, bool *enable_isharp) +{ + bool res = false; + + memset(spl_scratch, 0, sizeof(struct spl_scratch)); + spl_scratch->scl_data.h_active = spl_in->h_active; + spl_scratch->scl_data.v_active = spl_in->v_active; + + // All SPL calls + /* recout calculation */ + /* depends on h_active */ + spl_calculate_recout(spl_in, spl_scratch, spl_out); + /* depends on pixel format */ + spl_calculate_scaling_ratios(spl_in, spl_scratch, spl_out); + /* depends on scaling ratios and recout, does not calculate offset yet */ + spl_calculate_viewport_size(spl_in, spl_scratch); + + res = spl_get_optimal_number_of_taps( + spl_in->basic_out.max_downscale_src_width, spl_in, + spl_scratch, &spl_in->scaling_quality, enable_easf_v, + enable_easf_h, enable_isharp); + return res; +} + /* Calculate scaler parameters */ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) { @@ -1760,23 +1786,9 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) bool enable_isharp = false; const struct spl_scaler_data *data = &spl_scratch.scl_data; - memset(&spl_scratch, 0, sizeof(struct spl_scratch)); - spl_scratch.scl_data.h_active = spl_in->h_active; - spl_scratch.scl_data.v_active = spl_in->v_active; - - // All SPL calls - /* recout calculation */ - /* depends on h_active */ - spl_calculate_recout(spl_in, &spl_scratch, spl_out); - /* depends on pixel format */ - spl_calculate_scaling_ratios(spl_in, &spl_scratch, spl_out); - /* depends on scaling ratios and recout, does not calculate offset yet */ - spl_calculate_viewport_size(spl_in, &spl_scratch); + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, + &enable_easf_v, &enable_easf_h, &enable_isharp); - res = spl_get_optimal_number_of_taps( - spl_in->basic_out.max_downscale_src_width, spl_in, - &spl_scratch, &spl_in->scaling_quality, &enable_easf_v, - &enable_easf_h, &enable_isharp); /* * Depends on recout, scaling ratios, h_active and taps * May need to re-check lb size after this in some obscure scenario @@ -1824,3 +1836,20 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) return res; } + +/* External interface to get number of taps only */ +bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out) +{ + bool res = false; + bool enable_easf_v = false; + bool enable_easf_h = false; + bool enable_isharp = false; + struct spl_scratch spl_scratch; + struct dscl_prog_data *dscl_prog_data = spl_out->dscl_prog_data; + const struct spl_scaler_data *data = &spl_scratch.scl_data; + + res = spl_calculate_number_of_taps(spl_in, &spl_scratch, spl_out, + &enable_easf_v, &enable_easf_h, &enable_isharp); + spl_set_taps_data(dscl_prog_data, data); + return res; +} diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h index 205e59a2a8ee..02a2d6725ed5 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.h @@ -13,4 +13,6 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out); +bool spl_get_number_of_taps(struct spl_in *spl_in, struct spl_out *spl_out); + #endif /* __DC_SPL_H__ */ -- 2.51.0