From 6a7fde433231c18164c117592d3e18ced648ad58 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 10 Jan 2025 11:35:46 -0500 Subject: [PATCH 01/16] drm/amd/display: Update CR AUX RD interval interpretation [Why] DP spec updated to have the CR AUX RD interval match the EQ AUX RD interval interpretation of DPCD 0000Eh/0220Eh for 8b/10b non-LTTPR mode and LTTPR transparent mode cases. [How] Update interpretation of DPCD 0000Eh/0220Eh for CR AUX RD interval during 8b/10b link training. Reviewed-by: Michael Strauss Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_training_8b_10b.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 3bdce32a85e3..ae95ec48e572 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -36,7 +36,8 @@ link->ctx->logger static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, - const struct dc_link_settings *link_settings) + const struct dc_link_settings *link_settings, + enum lttpr_mode lttpr_mode) { union training_aux_rd_interval training_rd_interval; uint32_t wait_in_micro_secs = 100; @@ -49,6 +50,8 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, DP_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; } @@ -110,7 +113,6 @@ void decide_8b_10b_training_settings( */ lt_settings->link_settings.link_spread = link->dp_ss_off ? LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ; - lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); @@ -119,6 +121,7 @@ void decide_8b_10b_training_settings( lt_settings->disallow_per_lane_settings = true; lt_settings->always_match_dpcd_with_hw_lane_settings = true; lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link); + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); } -- 2.50.1 From 4a4077b4b63a8404efd6d37fc2926f03fb25bace Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Thu, 9 Jan 2025 16:11:48 +0800 Subject: [PATCH 02/16] drm/amd/display: Update Cursor request mode to the beginning prefetch always [Why] The double buffer cursor registers is updated by the cursor vupdate event. There is a gap between vupdate and cursor data fetch if cursor fetch data reletive to cursor position. Cursor corruption will happen if we update the cursor surface in this gap. [How] Modify the cursor request mode to the beginning prefetch always and avoid wraparound calculation issues. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Zhikai Zhai Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hubp/dcn31/dcn31_hubp.c | 2 +- .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 22 ++++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index c2900c79a2d3..7fd582a8a4ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -44,7 +44,7 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable); - REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, enable); + REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1); } void hubp31_soft_reset(struct hubp *hubp, bool reset) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 906934128912..35c0d101d7c8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1993,20 +1993,11 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) dc->hwss.get_position(&pipe_ctx, 1, &position); vpos = position.vertical_count; - /* Avoid wraparound calculation issues */ - vupdate_start += stream->timing.v_total; - vupdate_end += stream->timing.v_total; - vpos += stream->timing.v_total; - if (vpos <= vupdate_start) { /* VPOS is in VACTIVE or back porch. */ lines_to_vupdate = vupdate_start - vpos; - } else if (vpos > vupdate_end) { - /* VPOS is in the front porch. */ - return; } else { - /* VPOS is in VUPDATE. */ - lines_to_vupdate = 0; + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; } /* Calculate time until VUPDATE in microseconds. */ @@ -2014,13 +2005,18 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; us_to_vupdate = lines_to_vupdate * us_per_line; + /* Stall out until the cursor update completes. */ + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + /* Position is in the range of vupdate start and end*/ + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + /* 70 us is a conservative estimate of cursor update time*/ if (us_to_vupdate > 70) return; - /* Stall out until the cursor update completes. */ - if (vupdate_end < vupdate_start) - vupdate_end += stream->timing.v_total; us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; udelay(us_to_vupdate + us_vupdate); } -- 2.50.1 From 36681f15bb12b5c01df924379cdab9234259825c Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Mon, 13 Jan 2025 14:13:51 -0500 Subject: [PATCH 03/16] drm/amd/display: Account For OTO Prefetch Bandwidth When Calculating Urgent Bandwidth [Why] 1) The current calculations for OTO prefetch bandwidth do not consider the number of DPP pipes in use. As a result, OTO prefetch bandwidth may be larger than the vactive bandwidth if multiple DPP pipes are used. OTO prefetch bandwidth should never exceed the vactive bandwidth. 2) Mode programming may be mismatched with mode support In cases where mode support has chosen to use the equalized (equ) prefetch schedule, mode programming may end up using oto prefetch schedule instead. The bandwidth required to do the oto schedule may end up being higher than the equ schedule. This can cause the required urgent bandwidth to exceed the available urgent bandwidth. [How] Output the oto prefetch bandwidth and incorperate it into the urgent bandwidth calculations even if the prefetch schedule being used is not the oto schedule. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 25 ++++++++++++++++++- .../src/dml2_core/dml2_core_shared_types.h | 5 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 51b457b6d66f..e96a13dc43d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4909,6 +4909,7 @@ static double get_urgent_bandwidth_required( double ReadBandwidthChroma[], double PrefetchBandwidthLuma[], double PrefetchBandwidthChroma[], + double PrefetchBandwidthOto[], double excess_vactive_fill_bw_l[], double excess_vactive_fill_bw_c[], double cursor_bw[], @@ -4972,8 +4973,9 @@ static double get_urgent_bandwidth_required( l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; - surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); @@ -5171,6 +5173,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; + *p->RequiredPrefetchBWOTO = 0.0; dcc_mrq_enable = (p->dcc_enable && p->mrq_present); @@ -5384,6 +5387,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; } + /* oto prefetch bw should be always be less than total vactive bw */ + DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); @@ -5394,6 +5400,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. + * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule + * and the required bandwidth increases when going from ms to mp + */ + *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); @@ -6154,6 +6166,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO l->zero_array, l->zero_array, l->zero_array, @@ -6190,6 +6203,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6226,6 +6240,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6262,6 +6277,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6298,6 +6314,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -9060,6 +9077,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; @@ -9204,6 +9222,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -9370,6 +9389,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -11286,6 +11306,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; @@ -11428,6 +11449,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; @@ -11560,6 +11582,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[k]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 23c0fca5515f..b7cb017b59ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -484,6 +484,8 @@ struct dml2_core_internal_mode_support { double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + /* oto bw should also be considered when calculating urgent bw to avoid situations oto/equ mismatches between ms and mp */ + double RequiredPrefetchBWOTO[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; double prefetch_cursor_bw[DML2_MAX_PLANES]; double prefetch_vmrow_bw[DML2_MAX_PLANES]; @@ -1381,6 +1383,7 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals { double vm_row_bw; double flip_and_active_bw; double flip_and_prefetch_bw; + double flip_and_prefetch_bw_oto; double active_and_excess_bw; }; @@ -1792,6 +1795,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *VRatioPrefetchC; double *RequiredPrefetchPixelDataBWLuma; double *RequiredPrefetchPixelDataBWChroma; + double *RequiredPrefetchBWOTO; bool *NotEnoughTimeForDynamicMetadata; double *Tno_bw; double *Tno_bw_flip; @@ -2025,6 +2029,7 @@ struct dml2_core_calcs_calculate_peak_bandwidth_required_params { double *surface_read_bandwidth_c; double *prefetch_bandwidth_l; double *prefetch_bandwidth_c; + double *prefetch_bandwidth_oto; double *excess_vactive_fill_bw_l; double *excess_vactive_fill_bw_c; double *cursor_bw; -- 2.50.1 From c31b41f1cb32450d8ac176eef9bda979760040e7 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 10 Jan 2025 16:09:45 +0800 Subject: [PATCH 04/16] drm/amd/display: Disable PSR-SU on some OLED panel [Why] PSR-SU may cause some glitching randomly on some OLED panel. [How] Disable the PSR-SU for certain PSR-SU OLED panel. Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d..104f03868266 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -30,6 +30,23 @@ #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" +static bool is_specific_oled_panel(struct dc_link *link) +{ + if (!link->dpcd_sink_ext_caps.bits.oled) + return false; + + /* Disable PSR-SU for some OLED panels to avoid glitches */ + if (link->dpcd_caps.sink_dev_id == 0xBA4159) { + uint8_t sink_dev_id_str1[] = {'4', '0', 'C', 'U', '1'}; + + if (!memcmp(link->dpcd_caps.sink_dev_id_str, sink_dev_id_str1, + sizeof(sink_dev_id_str1))) + return true; + } + + return false; +} + static bool link_supports_psrsu(struct dc_link *link) { struct dc *dc = link->ctx->dc; @@ -40,6 +57,9 @@ static bool link_supports_psrsu(struct dc_link *link) if (dc->ctx->dce_version < DCN_VERSION_3_1) return false; + if (is_specific_oled_panel(link)) + return false; + if (!is_psr_su_specific_panel(link)) return false; -- 2.50.1 From cbd97d621ece1d92c3542e52f8af7c04cd2c6afb Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 14 Jan 2025 12:14:26 -0500 Subject: [PATCH 05/16] drm/amd/display: Ammend DCPG IP control sequences to align with HW guidance [WHY&HOW] IP_REQUEST_CNTL should only be toggled off when it was originally, never unconditionally. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 14 +++++--- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 34 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 3 ++ .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a5e18ab72394..dec732c0c59c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1266,14 +1266,18 @@ static void dcn20_power_on_plane_resources( struct dce_hwseq *hws, struct pipe_ctx *pipe_ctx) { + uint32_t org_ip_request_cntl = 0; + DC_LOGGER_INIT(hws->ctx->logger); if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, pipe_ctx->plane_res.dpp->inst, true); if (REG(DC_IP_REQUEST_CNTL)) { - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 1); + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); if (hws->funcs.dpp_pg_control) hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); @@ -1281,8 +1285,10 @@ static void dcn20_power_on_plane_resources( if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 0); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + DC_LOG_DEBUG( "Un-gated front end for pipe %d\n", pipe_ctx->plane_res.hubp->inst); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 92bb820817b9..8ad0ff669b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2610,3 +2610,37 @@ void dcn401_detect_pipe_changes(struct dc_state *old_state, new_pipe->update_flags.bits.test_pattern_changed = 1; } } + +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp) +{ + struct dce_hwseq *hws = dc->hwseq; + uint32_t org_ip_request_cntl = 0; + + DC_LOGGER_INIT(dc->ctx->logger); + + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp->inst, false); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp->inst, false); + + hubp->funcs->hubp_reset(hubp); + dpp->funcs->dpp_reset(dpp); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + + DC_LOG_DEBUG( + "Power gated front end %d\n", hubp->inst); + + if (hws->funcs.dpp_root_clock_control) + hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 17cea748789e..dbd69d215b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -102,4 +102,7 @@ void dcn401_detect_pipe_changes( struct dc_state *new_state, struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe); +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 44cb376f97c1..a4e3501fadbb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -123,7 +123,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .disable_vga = dcn20_disable_vga, .bios_golden_init = dcn10_bios_golden_init, .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .plane_atomic_power_down = dcn401_plane_atomic_power_down, .enable_power_gating_plane = dcn32_enable_power_gating_plane, .hubp_pg_control = dcn32_hubp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, -- 2.50.1 From 871f65a59f3cca534e54ab0efe9d976cdd05ac9a Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Wed, 15 Jan 2025 14:21:24 -0500 Subject: [PATCH 06/16] drm/amd/display: Add boot option to reduce PHY SSC for HBR3 [Why] Spread on DPREFCLK by 0.3 percent can have a negative effect on sink when PHY SSC is also spread by 0.3 percent [How] Add boot option for DMU to lower PHY SSC Reviewed-by: Nicholas Kazlauskas Signed-off-by: Hansen Dsouza Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index d9f31b191c69..3d0bba602b53 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -371,6 +371,7 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.usb4_cm_version = params->usb4_cm_version; boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; + boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0; -- 2.50.1 From c87d202692de34ee71d1fd4679a549a29095658a Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 16 Jan 2025 09:45:54 -0500 Subject: [PATCH 07/16] drm/amd/display: Guard Possible Null Pointer Dereference [WHY] In some situations, dc->res_pool may be null. [HOW] Check if pointer is null before dereference. Reviewed-by: Joshua Aberback Signed-off-by: Sung Lee Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f08d3d467372..ce917714c7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5611,9 +5611,11 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk) idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr); - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + if (dc->res_pool && context) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + } } DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", -- 2.50.1 From a1d79eae960ce1642aed476d98e311aae46bfb82 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Mon, 23 Dec 2024 11:09:52 +0800 Subject: [PATCH 08/16] drm/amd/display: refactor dio link encoder assigning [WHY] We would like to have new dio encoder assigning flow. Which should be aligned with hpo assigning and have simple logic and data representation. [HOW} 1. A new config option to enable/disable the new code. 2. Encoder-link mapping is in res_ctx and assigned encoder. is accessed through pipe_ctx. 3. assign dio encoder when add stream to ctx Reviewed-by: Jun Lei Reviewed-by: Meenakshikumar Somasundaram Signed-off-by: Peichen Huang Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 202 +++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 2 + .../gpu/drm/amd/display/dc/inc/core_types.h | 3 + 4 files changed, 206 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ce917714c7e0..a2b0331ef579 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2325,7 +2325,7 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params /* * Only update link encoder to stream assignment after bandwidth validation passed. */ - if (res == DC_OK && dc->res_pool->funcs->link_encs_assign) + if (res == DC_OK && dc->res_pool->funcs->link_encs_assign && !dc->config.unify_link_enc_assignment) dc->res_pool->funcs->link_encs_assign( dc, context, context->streams, context->stream_count); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7251587c3fb6..f59722e17abd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2683,6 +2683,162 @@ static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx, } } +static inline int find_acquired_dio_link_enc_for_link( + const struct resource_context *res_ctx, + const struct dc_link *link) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(res_ctx->dio_link_enc_ref_cnts); i++) + if (res_ctx->dio_link_enc_ref_cnts[i] > 0 && + res_ctx->dio_link_enc_to_link_idx[i] == link->link_index) + return i; + + return -1; +} + +static inline int find_fixed_dio_link_enc(const struct dc_link *link) +{ + /* the 8b10b dp phy can only use fixed link encoder */ + return link->eng_id; +} + +static inline int find_free_dio_link_enc(const struct resource_context *res_ctx, + const struct dc_link *link, const struct resource_pool *pool) +{ + int i; + int enc_count = pool->dig_link_enc_count; + + /* for dpia, check preferred encoder first and then the next one */ + for (i = 0; i < enc_count; i++) + if (res_ctx->dio_link_enc_ref_cnts[(link->dpia_preferred_eng_id + i) % enc_count] == 0) + break; + + return (i >= 0 && i < enc_count) ? (link->dpia_preferred_eng_id + i) % enc_count : -1; +} + +static inline void acquire_dio_link_enc( + struct resource_context *res_ctx, + unsigned int link_index, + int enc_index) +{ + res_ctx->dio_link_enc_to_link_idx[enc_index] = link_index; + res_ctx->dio_link_enc_ref_cnts[enc_index] = 1; +} + +static inline void retain_dio_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + res_ctx->dio_link_enc_ref_cnts[enc_index]++; +} + +static inline void release_dio_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + ASSERT(res_ctx->dio_link_enc_ref_cnts[enc_index] > 0); + res_ctx->dio_link_enc_ref_cnts[enc_index]--; +} + +static bool is_dio_enc_acquired_by_other_link(const struct dc_link *link, + int enc_index, + int *link_index) +{ + const struct dc *dc = link->dc; + const struct resource_context *res_ctx = &dc->current_state->res_ctx; + + /* pass the link_index that acquired the enc_index */ + if (res_ctx->dio_link_enc_ref_cnts[enc_index] > 0 && + res_ctx->dio_link_enc_to_link_idx[enc_index] != link->link_index) { + *link_index = res_ctx->dio_link_enc_to_link_idx[enc_index]; + return true; + } + + return false; +} + +static void swap_dio_link_enc_to_muxable_ctx(struct dc_state *context, + const struct resource_pool *pool, + int new_encoder, + int old_encoder) +{ + struct resource_context *res_ctx = &context->res_ctx; + int stream_count = context->stream_count; + int i = 0; + + res_ctx->dio_link_enc_ref_cnts[new_encoder] = res_ctx->dio_link_enc_ref_cnts[old_encoder]; + res_ctx->dio_link_enc_to_link_idx[new_encoder] = res_ctx->dio_link_enc_to_link_idx[old_encoder]; + res_ctx->dio_link_enc_ref_cnts[old_encoder] = 0; + + for (i = 0; i < stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (pipe_ctx && pipe_ctx->link_res.dio_link_enc == pool->link_encoders[old_encoder]) + pipe_ctx->link_res.dio_link_enc = pool->link_encoders[new_encoder]; + } +} + +static bool add_dio_link_enc_to_ctx(const struct dc *dc, + struct dc_state *context, + const struct resource_pool *pool, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + struct resource_context *res_ctx = &context->res_ctx; + int enc_index; + + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + retain_dio_link_enc(res_ctx, enc_index); + } else { + if (stream->link->is_dig_mapping_flexible) + enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool); + else { + int link_index = 0; + + enc_index = find_fixed_dio_link_enc(stream->link); + /* Fixed mapping link can only use its fixed link encoder. + * If the encoder is acquired by other link then get a new free encoder and swap the new + * one into the acquiring link. + */ + if (enc_index >= 0 && is_dio_enc_acquired_by_other_link(stream->link, enc_index, &link_index)) { + int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool); + + if (new_enc_index >= 0) + swap_dio_link_enc_to_muxable_ctx(context, pool, new_enc_index, enc_index); + else + return false; + } + } + + if (enc_index >= 0) + acquire_dio_link_enc(res_ctx, stream->link->link_index, enc_index); + } + + if (enc_index >= 0) + pipe_ctx->link_res.dio_link_enc = pool->link_encoders[enc_index]; + + return pipe_ctx->link_res.dio_link_enc != NULL; +} + +static void remove_dio_link_enc_from_ctx(struct resource_context *res_ctx, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + int enc_index = -1; + + if (stream->link) + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + release_dio_link_enc(res_ctx, enc_index); + pipe_ctx->link_res.dio_link_enc = NULL; + } +} + static int get_num_of_free_pipes(const struct resource_pool *pool, const struct dc_state *context) { int i; @@ -2730,6 +2886,10 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, remove_hpo_dp_link_enc_from_ctx( &context->res_ctx, otg_master, stream); } + + if (stream->ctx->dc->config.unify_link_enc_assignment) + remove_dio_link_enc_from_ctx(&context->res_ctx, otg_master, stream); + if (otg_master->stream_res.audio) update_audio_usage( &context->res_ctx, @@ -2744,6 +2904,7 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, if (pool->funcs->remove_stream_from_ctx) pool->funcs->remove_stream_from_ctx( stream->ctx->dc, context, stream); + memset(otg_master, 0, sizeof(*otg_master)); } @@ -3716,6 +3877,7 @@ enum dc_status resource_map_pool_resources( struct pipe_ctx *pipe_ctx = NULL; int pipe_idx = -1; bool acquired = false; + bool is_dio_encoder = true; calculate_phy_pix_clks(stream); @@ -3781,6 +3943,10 @@ enum dc_status resource_map_pool_resources( } } + if (dc->config.unify_link_enc_assignment && is_dio_encoder) + if (!add_dio_link_enc_to_ctx(dc, context, pool, pipe_ctx, stream)) + return DC_NO_LINK_ENC_RESOURCE; + /* TODO: Add check if ASIC support and EDID audio */ if (!stream->converter_disable_audio && dc_is_audio_capable_signal(pipe_ctx->stream->signal) && @@ -5017,6 +5183,28 @@ void get_audio_check(struct audio_info *aud_modes, } } +static struct link_encoder *get_temp_dio_link_enc( + const struct resource_context *res_ctx, + const struct resource_pool *const pool, + const struct dc_link *link) +{ + struct link_encoder *link_enc = NULL; + int enc_index; + + if (link->is_dig_mapping_flexible) + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, link); + else + enc_index = link->eng_id; + + if (enc_index < 0) + enc_index = find_free_dio_link_enc(res_ctx, link, pool); + + if (enc_index >= 0) + link_enc = pool->link_encoders[enc_index]; + + return link_enc; +} + static struct hpo_dp_link_encoder *get_temp_hpo_dp_link_enc( const struct resource_context *res_ctx, const struct resource_pool *const pool, @@ -5046,11 +5234,17 @@ bool get_temp_dp_link_res(struct dc_link *link, memset(link_res, 0, sizeof(*link_res)); if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_128b_132b_ENCODING) { - link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, - dc->res_pool, link); + link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, dc->res_pool, link); if (!link_res->hpo_dp_link_enc) return false; + } else if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && + dc->config.unify_link_enc_assignment) { + link_res->dio_link_enc = get_temp_dio_link_enc(res_ctx, + dc->res_pool, link); + if (!link_res->dio_link_enc) + return false; } + return true; } @@ -5322,6 +5516,10 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, remove_hpo_dp_link_enc_from_ctx(&context->res_ctx, pipe_ctx, pipe_ctx->stream); } + if (pipe_ctx->link_res.dio_link_enc == NULL && dc->config.unify_link_enc_assignment) + if (!add_dio_link_enc_to_ctx(dc, context, dc->res_pool, pipe_ctx, pipe_ctx->stream)) + return DC_NO_LINK_ENC_RESOURCE; + return DC_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 019459dfd6fe..06d9cf0a7edc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -473,6 +473,7 @@ struct dc_config { bool consolidated_dpia_dp_lt; bool set_pipe_unlock_order; bool enable_dpia_pre_training; + bool unify_link_enc_assignment; }; enum visual_confirm { @@ -778,6 +779,7 @@ union dpia_debug_options { uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ uint32_t enable_dpia_pre_training:1; /* bit 7 */ + uint32_t unify_link_enc_assignment:1; /* bit 8 */ uint32_t reserved:24; } bits; uint32_t raw; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 652d52040f4e..37632be09e09 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -376,6 +376,7 @@ struct plane_resource { /* all mappable hardware resources used to enable a link */ struct link_resource { + struct link_encoder *dio_link_enc; struct hpo_dp_link_encoder *hpo_dp_link_enc; }; @@ -500,6 +501,8 @@ struct resource_context { uint8_t dp_clock_source_ref_count; bool is_dsc_acquired[MAX_PIPES]; struct link_enc_cfg_context link_enc_cfg_ctx; + unsigned int dio_link_enc_to_link_idx[MAX_DIG_LINK_ENCODERS]; + int dio_link_enc_ref_cnts[MAX_DIG_LINK_ENCODERS]; bool is_hpo_dp_stream_enc_acquired[MAX_HPO_DP2_ENCODERS]; unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; -- 2.50.1 From 942bd112c92a13611899cdb075944b6e0a3b4165 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 19 Jan 2025 21:45:59 -0500 Subject: [PATCH 09/16] drm/amd/display: 3.2.318 This version brings along the following fixes: - Fixes on psr_version, dcn35 register address, DCPG OP control sequences - Imporvements to CR AUX RD interval interpretation, dio link encoder - Disable PSR-SU on some OLED panels Acked-by: Aurabindo Pillai Signed-off-by: Aric Cyr Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 06d9cf0a7edc..559446dcd431 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.317" +#define DC_VER "3.2.318" #define MAX_SURFACES 4 #define MAX_PLANES 6 -- 2.50.1 From e818635a31d28de9c991c27b663f3a222d9b6723 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 Jan 2025 14:30:59 -0500 Subject: [PATCH 10/16] drm/amdgpu: update and cleanup PM4 headers Consolidate PM4 definitions. Most of these were previously only defined in UMDs. Add them here as well and sync with latest packets. Also no need to include soc15d.h on gfx10+. Reviewed-by: Feifei Xu Suggested-by: Saurabh Verma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 1 - drivers/gpu/drm/amd/amdgpu/nvd.h | 208 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15d.h | 139 +++++++++++++++++ 5 files changed, 347 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1878c83ff7e3..a2b26551314a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,7 +40,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" -#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e27b5f8705c1..1ecdb4268ec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -42,7 +42,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index b5bddebf528e..37c522791035 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -40,7 +40,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx12.h" #include "v12_structs.h" #include "gfx_v12_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index 631dafb92299..56f1bfac0b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -64,6 +64,24 @@ #define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 +#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2 +#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 @@ -105,6 +123,38 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_WRITE_DATA__TEMPORAL__RT 0 +#define PACKET3_WRITE_DATA__TEMPORAL__NT 1 +#define PACKET3_WRITE_DATA__TEMPORAL__HT 2 +#define PACKET3_WRITE_DATA__TEMPORAL__LU 3 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -135,6 +185,42 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -144,8 +230,94 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3 #define PACKET3_COND_INDIRECT_BUFFER 0x3F #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23) +#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_CP_DMA 0x41 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -160,6 +332,23 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x)) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3 #define PACKET3_EVENT_WRITE_EOP 0x47 #define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_RELEASE_MEM 0x49 @@ -304,6 +493,12 @@ * 2: REVERSE */ #define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0) #define PACKET3_REWIND 0x59 #define PACKET3_INTERRUPT 0x5A #define PACKET3_GEN_PDEPTE 0x5B @@ -330,11 +525,17 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) +#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0 +#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1 #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SET_UCONFIG_REG_INDEX 0x7A #define PACKET3_FORWARD_HEADER 0x7C #define PACKET3_SCRATCH_RAM_WRITE 0x7D @@ -369,6 +570,7 @@ # define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) # define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) # define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_AQL_PACKET 0x99 #define PACKET3_DMA_DATA_FILL_MULTI 0x9A #define PACKET3_SET_SH_REG_INDEX 0x9B @@ -462,6 +664,12 @@ # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) #define PACKET3_RUN_LIST 0xA5 #define PACKET3_MAP_PROCESS_VM 0xA6 + +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ + /* GFX11 */ #define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 # define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index b9cbeb389edc..a5000c171c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -93,11 +93,25 @@ #define PACKET3_DISPATCH_INDIRECT 0x16 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 #define PACKET3_COND_EXEC 0x22 #define PACKET3_PRED_EXEC 0x23 +#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0) +#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24) #define PACKET3_DRAW_INDIRECT 0x24 #define PACKET3_DRAW_INDEX_INDIRECT 0x25 #define PACKET3_INDEX_BASE 0x26 @@ -132,6 +146,28 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -160,6 +196,33 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -169,7 +232,63 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 @@ -181,6 +300,15 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 #define PACKET3_RELEASE_MEM 0x49 #define EVENT_TYPE(x) ((x) << 0) #define EVENT_INDEX(x) ((x) << 8) @@ -286,6 +414,13 @@ #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29) #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30) #define PACKET3_REWIND 0x59 +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0) #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F #define PACKET3_LOAD_CONFIG_REG 0x60 @@ -300,12 +435,16 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 #define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_LOAD_CONST_RAM 0x80 -- 2.50.1 From 17585c07c20b063d0b6a2740a5696388d009e9ff Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 24 Jan 2025 11:51:53 +0530 Subject: [PATCH 11/16] drm/amdgpu/gfx10: Enable cleaner shader for GFX10.1.1/10.1.2 GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for GFX10.1.1/10.1.2 GPUs to provide data isolation between GPU workloads. The cleaner shader is responsible for clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps prevent data leakage and ensures accurate computation results. This update extends cleaner shader support to GFX10.1.1/10.1.2 GPUs, previously available for GFX10.1.10. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a2b26551314a..d70574a25326 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4794,6 +4794,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && -- 2.50.1 From 31f9ed58827f682ff00c70ef482442149603a16c Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Jan 2025 11:24:36 +0530 Subject: [PATCH 12/16] drm/amdgpu: Pass IP instance/hwid as parameters Use IP instance number and hwid as function args for validation checks. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index fbe1e23526f0..d34b97a081d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -587,16 +587,19 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev) adev->mman.discovery_bin = NULL; } -static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip) +static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev, + uint8_t instance, uint16_t hw_id) { - if (ip->instance_number >= HWIP_MAX_INSTANCE) { - DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n", - ip->instance_number); + if (instance >= HWIP_MAX_INSTANCE) { + dev_err(adev->dev, + "Unexpected instance_number (%d) from ip discovery blob\n", + instance); return -EINVAL; } - if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) { - DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n", - le16_to_cpu(ip->hw_id)); + if (hw_id >= HW_ID_MAX) { + dev_err(adev->dev, + "Unexpected hw_id (%d) from ip discovery blob\n", + hw_id); return -EINVAL; } @@ -611,6 +614,8 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, struct die_header *dhdr; struct ip_v4 *ip; uint16_t die_offset, ip_offset, num_dies, num_ips; + uint16_t hw_id; + uint8_t inst; int i, j; bhdr = (struct binary_header *)adev->mman.discovery_bin; @@ -627,15 +632,16 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, for (j = 0; j < num_ips; j++) { ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - - if (amdgpu_discovery_validate_ip(ip)) + inst = ip->instance_number; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) goto next_ip; if (le16_to_cpu(ip->variant) == 1) { - switch (le16_to_cpu(ip->hw_id)) { + switch (hw_id) { case VCN_HWID: (*vcn_harvest_count)++; - if (ip->instance_number == 0) { + if (inst == 0) { adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN0; @@ -1019,6 +1025,8 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, bool reg_base_64) { int ii, jj, kk, res; + uint16_t hw_id; + uint8_t inst; DRM_DEBUG("num_ips:%d", num_ips); @@ -1034,8 +1042,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, struct ip_hw_instance *ip_hw_instance; ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - if (amdgpu_discovery_validate_ip(ip) || - le16_to_cpu(ip->hw_id) != ii) + inst = ip->instance_number; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id) || + hw_id != ii) goto next_ip; DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset); @@ -1282,6 +1292,8 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint16_t ip_offset; uint16_t num_dies; uint16_t num_ips; + uint16_t hw_id; + uint8_t inst; int hw_ip; int i, j, k; int r; @@ -1321,7 +1333,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) for (j = 0; j < num_ips; j++) { ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - if (amdgpu_discovery_validate_ip(ip)) + inst = ip->instance_number; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) goto next_ip; num_base_address = ip->num_base_address; -- 2.50.1 From a01e934242f37b52e498958462a04f8ac30b4d66 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Jan 2025 11:39:13 +0530 Subject: [PATCH 13/16] drm/amdgpu: Use version to figure out harvest info IP tables with version <=2 may use harvest bit. For version 3 and above, harvest bit is not applicable, instead uses harvest table. Fix the logic accordingly. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index d34b97a081d8..bb09ae9a67b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -612,7 +612,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; - struct ip_v4 *ip; + struct ip *ip; uint16_t die_offset, ip_offset, num_dies, num_ips; uint16_t hw_id; uint8_t inst; @@ -631,13 +631,14 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, ip_offset = die_offset + sizeof(*dhdr); for (j = 0; j < num_ips; j++) { - ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - inst = ip->instance_number; + ip = (struct ip *)(adev->mman.discovery_bin + + ip_offset); + inst = ip->number_instance; hw_id = le16_to_cpu(ip->hw_id); if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) goto next_ip; - if (le16_to_cpu(ip->variant) == 1) { + if (ip->harvest == 1) { switch (hw_id) { case VCN_HWID: (*vcn_harvest_count)++; @@ -663,10 +664,8 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, } } next_ip: - if (ihdr->base_addr_64_bit) - ip_offset += struct_size(ip, base_address_64, ip->num_base_address); - else - ip_offset += struct_size(ip, base_address, ip->num_base_address); + ip_offset += struct_size(ip, base_address, + ip->num_base_address); } } } @@ -1474,18 +1473,24 @@ next_ip: static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { + struct ip_discovery_header *ihdr; + struct binary_header *bhdr; int vcn_harvest_count = 0; int umc_harvest_count = 0; + uint16_t offset, ihdr_ver; + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset); + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + + offset); + ihdr_ver = le16_to_cpu(ihdr->version); /* * Harvest table does not fit Navi1x and legacy GPUs, * so read harvest bit per IP data structure to set * harvest configuration. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 5, 0)) { + ihdr_ver <= 2) { if ((adev->pdev->device == 0x731E && (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || -- 2.50.1 From a52e6cb06bbb6e2f3d9b9d3cb4b1cf165009fc3a Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Jan 2025 11:02:32 +0530 Subject: [PATCH 14/16] drm/amdgpu: Clean up GFX v9.4.3 IP version checks Remove unnecessary IP version checks for GFX 9.4.3 and similar variants. Wrap checks inside meaningful function. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 68 ++++++------------------ drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 22 ++++---- 2 files changed, 29 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2ba185875baa..f4635fc8a7ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -942,21 +942,12 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; adev->gfx.ras = &gfx_v9_4_3_ras; - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - case IP_VERSION(9, 5, 0): - adev->gfx.config.max_hw_contexts = 8; - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); - break; - default: - BUG(); - break; - } + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); adev->gfx.config.gb_addr_config = gb_addr_config; @@ -2795,16 +2786,10 @@ static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, return 0; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - for (i = 0; i < num_xcc; i++) - gfx_v9_4_3_xcc_update_gfx_clock_gating( - adev, state == AMD_CG_STATE_GATE, i); - break; - default: - break; - } + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_update_gfx_clock_gating( + adev, state == AMD_CG_STATE_GATE, i); + return 0; } @@ -4867,34 +4852,13 @@ static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) { - /* init asci gds info */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - case IP_VERSION(9, 5, 0): - /* 9.4.3 removed all the GDS internal memory, - * only support GWS opcode in kernel, like barrier - * semaphore.etc */ - adev->gds.gds_size = 0; - break; - default: - adev->gds.gds_size = 0x10000; - break; - } - - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - case IP_VERSION(9, 5, 0): - /* deprecated for 9.4.3, no usage at all */ - adev->gds.gds_compute_max_wave_id = 0; - break; - default: - /* this really depends on the chip */ - adev->gds.gds_compute_max_wave_id = 0x7ff; - break; - } + /* 9.4.3 variants removed all the GDS internal memory, + * only support GWS opcode in kernel, like barrier + * semaphore.etc */ + /* init asic gds info */ + adev->gds.gds_size = 0; + adev->gds.gds_compute_max_wave_id = 0; adev->gds.gws_size = 64; adev->gds.oa_size = 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 5470cef7e9bd..cb25f7f0dfc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -313,6 +313,16 @@ gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev, } } +static inline bool +gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev) +{ + /* + * TODO: Check if this function is really needed, so far only 9.4.3 + * variants use GFXHUB 1.2 + */ + return !!adev->aid_mask; +} + static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, uint32_t xcc_mask) { @@ -355,7 +365,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, PAGE_TABLE_BLOCK_SIZE, block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. - * On 9.4.2 and 9.4.3, XNACK can be enabled in + * On 9.4.3 variants, XNACK can be enabled in * the SQ per-process. * Retry faults need to be enabled for that to work. */ @@ -363,14 +373,8 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !adev->gmc.noretry || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 5, 0)); + gfxhub_v1_2_per_process_xnack_support( + adev)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), -- 2.50.1 From 2f9a32b58927cc729f27afc622487db66de03e2e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 28 Jan 2025 12:55:39 +0530 Subject: [PATCH 15/16] drm/amdgpu: Clean up IP version checks in gmcv9.0 Clean up some IP version checks in gmcv9.0 Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 49 ++++++++++----------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 71b8ae7f2194..795d078c359b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -411,6 +411,11 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; +static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev) +{ + return !!adev->aid_mask; +} + static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -647,9 +652,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + if (gmc_v9_0_is_multi_chiplet(adev)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -798,9 +801,7 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + gmc_v9_0_is_multi_chiplet(adev)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1278,9 +1279,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 5, 0)) + if (!(adev->flags & AMD_IS_APU) || + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) return; /* Only direct-mapped memory allows us to determine the NUMA node from @@ -1555,9 +1555,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + if (gmc_v9_0_is_multi_chiplet(adev)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1624,9 +1622,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + gmc_v9_0_is_multi_chiplet(adev)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1635,8 +1631,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { enum amdgpu_pkg_type pkg_type = adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present @@ -2077,9 +2072,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { + if (gmc_v9_0_is_multi_chiplet(adev)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2229,9 +2222,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { + if (gmc_v9_0_is_multi_chiplet(adev)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2260,9 +2251,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vm_manager.first_kfd_vmid = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ? + gmc_v9_0_is_multi_chiplet(adev)) ? 3 : 8; @@ -2274,9 +2263,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + if (gmc_v9_0_is_multi_chiplet(adev)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2286,9 +2273,7 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + if (gmc_v9_0_is_multi_chiplet(adev)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); -- 2.50.1 From c909a49128a31bced8cfbd2dfb0a4fe56e01a6d0 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 17 Jan 2025 17:49:41 -0500 Subject: [PATCH 16/16] drm/amd/display: Fixes for mcache programming in DML21 [WHY & HOW] - Fix indexing phantom planes for mcache programming in the wrapper - Fix phantom mcache allocations to align with HW guidance - Fix mcache assignment for chroma plane for multi-planar formats Reviewed-by: Austin Zheng Signed-off-by: Dillon Varone Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/dml2/dml21/dml21_utils.c | 1 - .../amd/display/dc/dml2/dml21/dml21_wrapper.c | 14 +++++++++ .../display/dc/dml2/dml21/inc/dml_top_types.h | 1 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4.c | 30 ++++++++++++++++++- .../src/dml2_core/dml2_core_dcn4_calcs.c | 3 ++ .../dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c | 21 +++++++++++++ .../dml2/dml21/src/dml2_top/dml2_top_soc15.c | 8 ----- 7 files changed, 68 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 1e56d995cd0e..930e86cdb88a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -232,7 +232,6 @@ void dml21_program_dc_pipe(struct dml2_context *dml_ctx, struct dc_state *contex context->bw_ctx.bw.dcn.clk.dppclk_khz = pipe_ctx->plane_res.bw.dppclk_khz; dml21_populate_mall_allocation_size(context, dml_ctx, pln_prog, pipe_ctx); - memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[pipe_ctx->pipe_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); bool sub_vp_enabled = is_sub_vp_enabled(pipe_ctx->stream->ctx->dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index fb80ba9287b6..be54f0e696ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -124,6 +124,7 @@ static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_sta struct pipe_ctx *dc_main_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__]; struct pipe_ctx *dc_phantom_pipes[__DML2_WRAPPER_MAX_STREAMS_PLANES__] = {0}; int num_pipes; + unsigned int dml_phantom_prog_idx; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; @@ -137,6 +138,9 @@ static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_sta context->bw_ctx.bw.dcn.mall_ss_psr_active_size_bytes = 0; context->bw_ctx.bw.dcn.mall_subvp_size_bytes = 0; + /* phantom's start after main planes */ + dml_phantom_prog_idx = in_ctx->v21.mode_programming.programming->display_config.num_planes; + for (dml_prog_idx = 0; dml_prog_idx < DML2_MAX_PLANES; dml_prog_idx++) { pln_prog = &in_ctx->v21.mode_programming.programming->plane_programming[dml_prog_idx]; @@ -162,6 +166,16 @@ static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_sta dml21_program_dc_pipe(in_ctx, context, dc_phantom_pipes[dc_pipe_index], pln_prog, stream_prog); } } + + /* copy per plane mcache allocation */ + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_prog_idx], &pln_prog->mcache_allocation, sizeof(struct dml2_mcache_surface_allocation)); + if (pln_prog->phantom_plane.valid) { + memcpy(&context->bw_ctx.bw.dcn.mcache_allocations[dml_phantom_prog_idx], + &pln_prog->phantom_plane.mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + + dml_phantom_prog_idx++; + } } /* assign global clocks */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index d2d053f2354d..0ab19cf4d242 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -245,6 +245,7 @@ struct dml2_per_plane_programming { struct { bool valid; struct dml2_plane_parameters descriptor; + struct dml2_mcache_surface_allocation mcache_allocation; struct dml2_dchub_per_pipe_register_set *pipe_regs[DML2_MAX_PLANES]; } phantom_plane; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index d68b4567e218..ec0beb139200 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -254,7 +254,8 @@ static void expand_implict_subvp(const struct display_configuation_with_meta *di static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_instance *core, const struct display_configuation_with_meta *display_cfg, const struct dml2_display_cfg *svp_expanded_display_cfg, struct dml2_display_cfg_programming *programming, struct dml2_core_scratch *scratch) { - unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index; + unsigned int stream_index, plane_index, pipe_offset, stream_already_populated_mask, main_plane_index, mcache_index; + unsigned int total_main_mcaches_required = 0; int total_pipe_regs_copied = 0; int dml_internal_pipe_index = 0; const struct dml2_plane_parameters *main_plane; @@ -325,6 +326,13 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + memcpy(&programming->plane_programming[plane_index].mcache_allocation, + &display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + total_main_mcaches_required += programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane0 + + programming->plane_programming[plane_index].mcache_allocation.num_mcaches_plane1 - + (programming->plane_programming[plane_index].mcache_allocation.last_slice_sharing.plane0_plane1 ? 1 : 0); + for (pipe_offset = 0; pipe_offset < programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { // Assign storage for this pipe's register values programming->plane_programming[plane_index].pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; @@ -363,6 +371,22 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in memcpy(&programming->plane_programming[main_plane_index].phantom_plane.descriptor, phantom_plane, sizeof(struct dml2_plane_parameters)); dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[main_plane_index].svp_size_mall_bytes, dml_internal_pipe_index); + + /* generate mcache allocation, phantoms use identical mcache configuration, but in the MALL set and unique mcache ID's beginning after all main ID's */ + memcpy(&programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation, + &programming->plane_programming[main_plane_index].mcache_allocation, + sizeof(struct dml2_mcache_surface_allocation)); + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane0; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane0[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane0[mcache_index]; + } + for (mcache_index = 0; mcache_index < programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.num_mcaches_plane1; mcache_index++) { + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index] += total_main_mcaches_required; + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_mall_plane1[mcache_index] = + programming->plane_programming[main_plane_index].phantom_plane.mcache_allocation.global_mcache_ids_plane1[mcache_index]; + } + for (pipe_offset = 0; pipe_offset < programming->plane_programming[main_plane_index].num_dpps_required; pipe_offset++) { // Assign storage for this pipe's register values programming->plane_programming[main_plane_index].phantom_plane.pipe_regs[pipe_offset] = &programming->pipe_regs[total_pipe_regs_copied]; @@ -572,6 +596,10 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); + memcpy(&in_out->programming->plane_programming[plane_index].mcache_allocation, + &in_out->display_cfg->stage2.mcache_allocations[plane_index], + sizeof(struct dml2_mcache_surface_allocation)); + for (pipe_offset = 0; pipe_offset < in_out->programming->plane_programming[plane_index].num_dpps_required; pipe_offset++) { in_out->programming->plane_programming[plane_index].plane_descriptor = &in_out->programming->display_config.plane_descriptors[plane_index]; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index e96a13dc43d4..47872c6f657e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -2637,6 +2637,9 @@ static void calculate_mcache_setting( // Luma/Chroma combine in the last mcache // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary if (*p->lc_comb_mcache && l->is_dual_plane) { + /* if luma and chroma planes share an mcache, increase total chroma mcache count */ + *p->num_mcaches_c = *p->num_mcaches_c + 1; + for (n = 0; n < *p->num_mcaches_l - 1; n++) p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l; p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index a3324f7b9ba6..15c906c42ec4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -1082,12 +1082,21 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, const struct dml2_fams2_meta *stream_fams2_meta; unsigned int microschedule_vlines; unsigned int i; + unsigned int mcaches_per_plane; + unsigned int total_mcaches_required = 0; unsigned int num_planes_per_stream[DML2_MAX_PLANES] = { 0 }; /* confirm timing it is not a centered timing */ for (i = 0; i < display_config->display_config.num_planes; i++) { plane_descriptor = &display_config->display_config.plane_descriptors[i]; + mcaches_per_plane = 0; + + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane += display_config->stage2.mcache_allocations[i].num_mcaches_plane0 + + display_config->stage2.mcache_allocations[i].num_mcaches_plane1 - + (display_config->stage2.mcache_allocations[i].last_slice_sharing.plane0_plane1 ? 1 : 0); + } if (is_bit_set_in_bitfield(mask, (unsigned char)plane_descriptor->stream_index)) { num_planes_per_stream[plane_descriptor->stream_index]++; @@ -1098,7 +1107,19 @@ static bool all_timings_support_svp(const struct dml2_pmo_instance *pmo, plane_descriptor->composition.rotation_angle != dml2_rotation_0) { return false; } + + /* phantom requires same number of mcaches as main */ + if (plane_descriptor->surface.dcc.enable) { + mcaches_per_plane *= 2; + } } + + total_mcaches_required += mcaches_per_plane; + } + + if (total_mcaches_required > pmo->soc_bb->num_dcc_mcaches) { + /* too many mcaches required */ + return false; } for (i = 0; i < DML2_MAX_PLANES; i++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c index a8f58f8448e4..dc2ce5e77f57 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -831,7 +831,6 @@ static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_program bool uclk_pstate_success = false; bool vmin_success = false; bool stutter_success = false; - unsigned int i; memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); @@ -976,13 +975,6 @@ static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_program l->base_display_config_with_meta.stage5.success = true; } - /* - * Populate mcache programming - */ - for (i = 0; i < in_out->display_config->num_planes; i++) { - in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i]; - } - /* * Call DPMM to map all requirements to minimum clock state */ -- 2.50.1