From c003b5ccaf625c4e8077a0e7a8a1d9e6e403d603 Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Fri, 17 Jan 2025 17:08:33 +0800 Subject: [PATCH 01/16] drm/amd/pm: Update pm attr for gc_9_5_0 Update power management & clk attributes for gc_v_9_5_0 Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e8ae7681bf0a..0aca0803514e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2006,9 +2006,10 @@ static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdg return 0; } - /* Enable pp_od_clk_voltage node for gc 9.4.3 SRIOV/BM support */ + /* Enable pp_od_clk_voltage node for gc 9.4.3, 9.4.4, 9.5.0 SRIOV/BM support */ if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) { + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) { if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -2087,7 +2088,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_vclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2109,7 +2111,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd gc_ver == IP_VERSION(11, 0, 2) || gc_ver == IP_VERSION(11, 0, 3) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dclk1)) { if (!((gc_ver == IP_VERSION(10, 3, 1) || @@ -2120,7 +2123,8 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) { if (gc_ver == IP_VERSION(9, 4, 2) || gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) *states = ATTR_STATE_UNSUPPORTED; } @@ -2416,6 +2420,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): case IP_VERSION(10, 3, 0): case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): @@ -3530,7 +3535,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* Skip crit temp on APU */ if ((((adev->flags & AMD_IS_APU) && (adev->family >= AMDGPU_FAMILY_CZ)) || - (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4))) && + (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)) return 0; @@ -3605,7 +3611,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ adev->family == AMDGPU_FAMILY_KV || /* not implemented yet */ (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) && + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_in0_input.dev_attr.attr || attr == &sensor_dev_attr_in0_label.dev_attr.attr)) return 0; @@ -3613,7 +3620,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* only APUs other than gc 9,4,3 have vddnb */ if ((!(adev->flags & AMD_IS_APU) || (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4))) && + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0))) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; @@ -3636,7 +3644,8 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* hotspot temperature for gc 9,4,3*/ if (gc_ver == IP_VERSION(9, 4, 3) || - gc_ver == IP_VERSION(9, 4, 4)) { + gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0)) { if (attr == &sensor_dev_attr_temp1_input.dev_attr.attr || attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || attr == &sensor_dev_attr_temp1_label.dev_attr.attr) -- 2.51.0 From 16b85a0942c0b0f1611bcaa42cc98f020e34b1cf Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 22 Jan 2025 19:34:33 +0800 Subject: [PATCH 02/16] drm/amdgpu: Update usage for bad page threshold The driver's behavior varies based on the configuration of amdgpu_bad_page_threshold setting Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 40 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 41 +++++++++++-------- 4 files changed, 45 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e789f6790a1c..f52f674477eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -964,7 +964,7 @@ module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); * result in the GPU entering bad status when the number of total * faulty pages by ECC exceeds the threshold value. */ -MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)"); +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e4..90394f89aba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3080,31 +3080,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); /* - * Justification of value bad_page_cnt_threshold in ras structure - * - * Generally, 0 <= amdgpu_bad_page_threshold <= max record length - * in eeprom or amdgpu_bad_page_threshold == -2, introduce two - * scenarios accordingly. - * - * Bad page retirement enablement: - * - If amdgpu_bad_page_threshold = -2, - * bad_page_cnt_threshold = typical value by formula. - * - * - When the value from user is 0 < amdgpu_bad_page_threshold < - * max record length in eeprom, use it directly. - * - * Bad page retirement disablement: - * - If amdgpu_bad_page_threshold = 0, bad page retirement - * functionality is disabled, and bad_page_cnt_threshold will - * take no effect. + * amdgpu_bad_page_threshold is used to config + * the threshold for the number of bad pages. + * -1: Threshold is set to default value + * Driver will issue a warning message when threshold is reached + * and continue runtime services. + * 0: Disable bad page retirement + * Driver will not retire bad pages + * which is intended for debugging purpose. + * -2: Threshold is determined by a formula + * that assumes 1 bad page per 100M of local memory. + * Driver will continue runtime services when threhold is reached. + * 0 < threshold < max number of bad page records in EEPROM, + * A user-defined threshold is set + * Driver will halt runtime services when this custom threshold is reached. */ - - if (amdgpu_bad_page_threshold < 0) { + if (amdgpu_bad_page_threshold == -2) { u64 val = adev->gmc.mc_vram_size; do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), max_count); + } else if (amdgpu_bad_page_threshold == -1) { + con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4; } else { con->bad_page_cnt_threshold = min_t(int, max_count, amdgpu_bad_page_threshold); @@ -3848,8 +3846,10 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 12): + con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; + break; case IP_VERSION(13, 0, 14): - con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; + con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 82db986c36a0..cc4586581dba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -65,7 +65,7 @@ struct amdgpu_iv_entry; /* Reserve 8 physical dram row for possible retirement. * In worst cases, it will lose 8 * 2MB memory in vram domain */ -#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20) +#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20) /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 52c16bfeccaa..723c655bb4d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -558,16 +558,17 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) return false; if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { - if (amdgpu_bad_page_threshold == -1) { + if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", - con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); + con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { dev_warn(adev->dev, - "But GPU can be operated due to bad_page_threshold = -1.\n"); + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); return false; } else { - dev_warn(adev->dev, "This GPU is in BAD status."); - dev_warn(adev->dev, "Please retire it or set a larger " - "threshold value when reloading driver.\n"); + dev_warn(adev->dev, + "Please consider adjusting the customized threshold.\n"); return true; } } @@ -758,7 +759,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.health_percent = 0; } - if (amdgpu_bad_page_threshold != -1) + if ((amdgpu_bad_page_threshold != -1) && + (amdgpu_bad_page_threshold != -2)) ras->is_rma = true; /* ignore the -ENOTSUPP return value */ @@ -1428,8 +1430,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = __verify_ras_table_checksum(control); if (res) - DRM_ERROR("RAS table incorrect checksum or error:%d\n", - res); + dev_err(adev->dev, + "RAS table incorrect checksum or error:%d\n", + res); /* Warn if we are at 90% of the threshold or above */ @@ -1447,8 +1450,9 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = __verify_ras_table_checksum(control); if (res) { - dev_err(adev->dev, "RAS Table incorrect checksum or error:%d\n", - res); + dev_err(adev->dev, + "RAS Table incorrect checksum or error:%d\n", + res); return -EINVAL; } if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) { @@ -1466,17 +1470,18 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) res = amdgpu_ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); } else { - dev_err(adev->dev, "RAS records:%d exceed threshold:%d", + dev_warn(adev->dev, + "RAS records:%d exceed threshold:%d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); - if (amdgpu_bad_page_threshold == -1) { - dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { res = 0; + dev_warn(adev->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n"); } else { ras->is_rma = true; - dev_err(adev->dev, - "RAS records:%d exceed threshold:%d, " - "GPU will not be initialized. Replace this GPU or increase the threshold", - control->ras_num_bad_pages, ras->bad_page_cnt_threshold); + dev_warn(adev->dev, + "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } } else { -- 2.51.0 From 04893397766a2b2f1bc7fe5c6414e4c0846ed171 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 20 Jan 2025 22:00:22 -0500 Subject: [PATCH 03/16] drm/amdgpu: Skip err_count sysfs creation on VF unsupported RAS blocks VFs are not able to query error counts for all RAS blocks. Rather than returning error for queries on these blocks, skip sysfs the creation all together. Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 17 ++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 90394f89aba6..44d13a60588d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1864,6 +1864,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, if (!obj || obj->attr_inuse) return -EINVAL; + if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block)) + return 0; + get_obj(obj); snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0af469ec6fcc..2056efaf157d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1246,7 +1246,8 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc case AMDGPU_RAS_BLOCK__MPIO: return RAS_TELEMETRY_GPU_BLOCK_MPIO; default: - dev_err(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n", block); + DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n", + block); return RAS_TELEMETRY_GPU_BLOCK_COUNT; } } @@ -1331,3 +1332,17 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) return 0; } + +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + enum amd_sriov_ras_telemetry_gpu_block sriov_block; + + sriov_block = amdgpu_ras_block_to_sriov(adev, block); + + if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT || + !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 5381b8d596e6..270a032e2d70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -406,4 +406,6 @@ bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, struct ras_err_data *err_data); int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block); #endif -- 2.51.0 From 25961bad9212476983c570438366e1f5e9a9cf21 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 21 Jan 2025 12:32:07 +0530 Subject: [PATCH 04/16] drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX10.1.0 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_10_1_0_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 ++ .../drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h | 35 +++++ .../amdgpu/gfx_v10_1_10_cleaner_shader.asm | 126 ++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 4b5e65affb81..1878c83ff7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4794,6 +4794,20 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) break; } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 1, 10): + adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 101 && + adev->gfx.pfp_fw_version >= 158 && + adev->gfx.mec_fw_version >= 152) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 4): diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h index 663c2572d440..5255378af53c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -21,6 +21,41 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +/* Define the cleaner shader gfx_10_1_10 */ +static const u32 gfx_10_1_10_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbf068100, 0xbf840023, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020102, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; + /* Define the cleaner shader gfx_10_3_0 */ static const u32 gfx_10_3_0_cleaner_shader_hex[] = { 0xb0804004, 0xbf8a0000, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm new file mode 100644 index 000000000000..9ba3359253c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader. + +// GFX10.1 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10.1) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1) + + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + -- 2.51.0 From d8c782cac5007e68e7484d420168f12d3490def6 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Mon, 13 Jan 2025 14:22:31 +0800 Subject: [PATCH 05/16] drm/amd/display: Initial psr_version with correct setting [Why & How] The initial setting for psr_version is not correct while create a virtual link. The default psr_version should be DC_PSR_VERSION_UNSUPPORTED. Reviewed-by: Roman Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ba3a34fad26a..f08d3d467372 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -276,6 +276,7 @@ static bool create_links( link->link_id.type = OBJECT_TYPE_CONNECTOR; link->link_id.id = CONNECTOR_ID_VIRTUAL; link->link_id.enum_id = ENUM_ID_1; + link->psr_settings.psr_version = DC_PSR_VERSION_UNSUPPORTED; link->link_enc = kzalloc(sizeof(*link->link_enc), GFP_KERNEL); if (!link->link_enc) { -- 2.51.0 From 6a7fde433231c18164c117592d3e18ced648ad58 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 10 Jan 2025 11:35:46 -0500 Subject: [PATCH 06/16] drm/amd/display: Update CR AUX RD interval interpretation [Why] DP spec updated to have the CR AUX RD interval match the EQ AUX RD interval interpretation of DPCD 0000Eh/0220Eh for 8b/10b non-LTTPR mode and LTTPR transparent mode cases. [How] Update interpretation of DPCD 0000Eh/0220Eh for CR AUX RD interval during 8b/10b link training. Reviewed-by: Michael Strauss Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/protocols/link_dp_training_8b_10b.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c index 3bdce32a85e3..ae95ec48e572 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_8b_10b.c @@ -36,7 +36,8 @@ link->ctx->logger static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, - const struct dc_link_settings *link_settings) + const struct dc_link_settings *link_settings, + enum lttpr_mode lttpr_mode) { union training_aux_rd_interval training_rd_interval; uint32_t wait_in_micro_secs = 100; @@ -49,6 +50,8 @@ static int32_t get_cr_training_aux_rd_interval(struct dc_link *link, DP_TRAINING_AUX_RD_INTERVAL, (uint8_t *)&training_rd_interval, sizeof(training_rd_interval)); + if (lttpr_mode != LTTPR_MODE_NON_TRANSPARENT) + wait_in_micro_secs = 400; if (training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL) wait_in_micro_secs = training_rd_interval.bits.TRAINIG_AUX_RD_INTERVAL * 4000; } @@ -110,7 +113,6 @@ void decide_8b_10b_training_settings( */ lt_settings->link_settings.link_spread = link->dp_ss_off ? LINK_SPREAD_DISABLED : LINK_SPREAD_05_DOWNSPREAD_30KHZ; - lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting); lt_settings->eq_pattern_time = get_eq_training_aux_rd_interval(link, link_setting); lt_settings->pattern_for_cr = decide_cr_training_pattern(link_setting); lt_settings->pattern_for_eq = decide_eq_training_pattern(link, link_setting); @@ -119,6 +121,7 @@ void decide_8b_10b_training_settings( lt_settings->disallow_per_lane_settings = true; lt_settings->always_match_dpcd_with_hw_lane_settings = true; lt_settings->lttpr_mode = dp_decide_8b_10b_lttpr_mode(link); + lt_settings->cr_pattern_time = get_cr_training_aux_rd_interval(link, link_setting, lt_settings->lttpr_mode); dp_hw_to_dpcd_lane_settings(lt_settings, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); } -- 2.51.0 From 4a4077b4b63a8404efd6d37fc2926f03fb25bace Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Thu, 9 Jan 2025 16:11:48 +0800 Subject: [PATCH 07/16] drm/amd/display: Update Cursor request mode to the beginning prefetch always [Why] The double buffer cursor registers is updated by the cursor vupdate event. There is a gap between vupdate and cursor data fetch if cursor fetch data reletive to cursor position. Cursor corruption will happen if we update the cursor surface in this gap. [How] Modify the cursor request mode to the beginning prefetch always and avoid wraparound calculation issues. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Zhikai Zhai Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hubp/dcn31/dcn31_hubp.c | 2 +- .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 22 ++++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index c2900c79a2d3..7fd582a8a4ba 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -44,7 +44,7 @@ void hubp31_set_unbounded_requesting(struct hubp *hubp, bool enable) struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(DCHUBP_CNTL, HUBP_UNBOUNDED_REQ_MODE, enable); - REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, enable); + REG_UPDATE(CURSOR_CONTROL, CURSOR_REQ_MODE, 1); } void hubp31_soft_reset(struct hubp *hubp, bool reset) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 906934128912..35c0d101d7c8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1993,20 +1993,11 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) dc->hwss.get_position(&pipe_ctx, 1, &position); vpos = position.vertical_count; - /* Avoid wraparound calculation issues */ - vupdate_start += stream->timing.v_total; - vupdate_end += stream->timing.v_total; - vpos += stream->timing.v_total; - if (vpos <= vupdate_start) { /* VPOS is in VACTIVE or back porch. */ lines_to_vupdate = vupdate_start - vpos; - } else if (vpos > vupdate_end) { - /* VPOS is in the front porch. */ - return; } else { - /* VPOS is in VUPDATE. */ - lines_to_vupdate = 0; + lines_to_vupdate = stream->timing.v_total - vpos + vupdate_start; } /* Calculate time until VUPDATE in microseconds. */ @@ -2014,13 +2005,18 @@ static void delay_cursor_until_vupdate(struct dc *dc, struct pipe_ctx *pipe_ctx) stream->timing.h_total * 10000u / stream->timing.pix_clk_100hz; us_to_vupdate = lines_to_vupdate * us_per_line; + /* Stall out until the cursor update completes. */ + if (vupdate_end < vupdate_start) + vupdate_end += stream->timing.v_total; + + /* Position is in the range of vupdate start and end*/ + if (lines_to_vupdate > stream->timing.v_total - vupdate_end + vupdate_start) + us_to_vupdate = 0; + /* 70 us is a conservative estimate of cursor update time*/ if (us_to_vupdate > 70) return; - /* Stall out until the cursor update completes. */ - if (vupdate_end < vupdate_start) - vupdate_end += stream->timing.v_total; us_vupdate = (vupdate_end - vupdate_start + 1) * us_per_line; udelay(us_to_vupdate + us_vupdate); } -- 2.51.0 From 36681f15bb12b5c01df924379cdab9234259825c Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Mon, 13 Jan 2025 14:13:51 -0500 Subject: [PATCH 08/16] drm/amd/display: Account For OTO Prefetch Bandwidth When Calculating Urgent Bandwidth [Why] 1) The current calculations for OTO prefetch bandwidth do not consider the number of DPP pipes in use. As a result, OTO prefetch bandwidth may be larger than the vactive bandwidth if multiple DPP pipes are used. OTO prefetch bandwidth should never exceed the vactive bandwidth. 2) Mode programming may be mismatched with mode support In cases where mode support has chosen to use the equalized (equ) prefetch schedule, mode programming may end up using oto prefetch schedule instead. The bandwidth required to do the oto schedule may end up being higher than the equ schedule. This can cause the required urgent bandwidth to exceed the available urgent bandwidth. [How] Output the oto prefetch bandwidth and incorperate it into the urgent bandwidth calculations even if the prefetch schedule being used is not the oto schedule. Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../src/dml2_core/dml2_core_dcn4_calcs.c | 25 ++++++++++++++++++- .../src/dml2_core/dml2_core_shared_types.h | 5 ++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 51b457b6d66f..e96a13dc43d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4909,6 +4909,7 @@ static double get_urgent_bandwidth_required( double ReadBandwidthChroma[], double PrefetchBandwidthLuma[], double PrefetchBandwidthChroma[], + double PrefetchBandwidthOto[], double excess_vactive_fill_bw_l[], double excess_vactive_fill_bw_c[], double cursor_bw[], @@ -4972,8 +4973,9 @@ static double get_urgent_bandwidth_required( l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k]; l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur; l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; + l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre; l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k]; - surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw); + surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto); /* export peak required bandwidth for the surface */ surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]); @@ -5171,6 +5173,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->Tsw_est3 = 0.0; s->cursor_prefetch_bytes = 0; *p->prefetch_cursor_bw = 0; + *p->RequiredPrefetchBWOTO = 0.0; dcc_mrq_enable = (p->dcc_enable && p->mrq_present); @@ -5384,6 +5387,9 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; } + /* oto prefetch bw should be always be less than total vactive bw */ + DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface); + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); @@ -5394,6 +5400,12 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); + /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch. + * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule + * and the required bandwidth increases when going from ms to mp + */ + *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); @@ -6154,6 +6166,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO l->zero_array, l->zero_array, l->zero_array, @@ -6190,6 +6203,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, l->zero_array, //PrefetchBandwidthLuma, l->zero_array, //PrefetchBandwidthChroma, + l->zero_array, //PrefetchBWOTO p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6226,6 +6240,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6262,6 +6277,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -6298,6 +6314,7 @@ static void calculate_peak_bandwidth_required( p->surface_read_bandwidth_c, p->prefetch_bandwidth_l, p->prefetch_bandwidth_c, + p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw p->excess_vactive_fill_bw_l, p->excess_vactive_fill_bw_c, p->cursor_bw, @@ -9060,6 +9077,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k]; @@ -9204,6 +9222,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -9370,6 +9389,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; @@ -11286,6 +11306,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]; CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]; + CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k]; CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]; CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k]; CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k]; @@ -11428,6 +11449,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; @@ -11560,6 +11582,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[k]; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c; calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index 23c0fca5515f..b7cb017b59ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -484,6 +484,8 @@ struct dml2_core_internal_mode_support { double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; + /* oto bw should also be considered when calculating urgent bw to avoid situations oto/equ mismatches between ms and mp */ + double RequiredPrefetchBWOTO[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; double prefetch_cursor_bw[DML2_MAX_PLANES]; double prefetch_vmrow_bw[DML2_MAX_PLANES]; @@ -1381,6 +1383,7 @@ struct dml2_core_shared_get_urgent_bandwidth_required_locals { double vm_row_bw; double flip_and_active_bw; double flip_and_prefetch_bw; + double flip_and_prefetch_bw_oto; double active_and_excess_bw; }; @@ -1792,6 +1795,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *VRatioPrefetchC; double *RequiredPrefetchPixelDataBWLuma; double *RequiredPrefetchPixelDataBWChroma; + double *RequiredPrefetchBWOTO; bool *NotEnoughTimeForDynamicMetadata; double *Tno_bw; double *Tno_bw_flip; @@ -2025,6 +2029,7 @@ struct dml2_core_calcs_calculate_peak_bandwidth_required_params { double *surface_read_bandwidth_c; double *prefetch_bandwidth_l; double *prefetch_bandwidth_c; + double *prefetch_bandwidth_oto; double *excess_vactive_fill_bw_l; double *excess_vactive_fill_bw_c; double *cursor_bw; -- 2.51.0 From c31b41f1cb32450d8ac176eef9bda979760040e7 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Fri, 10 Jan 2025 16:09:45 +0800 Subject: [PATCH 09/16] drm/amd/display: Disable PSR-SU on some OLED panel [Why] PSR-SU may cause some glitching randomly on some OLED panel. [How] Disable the PSR-SU for certain PSR-SU OLED panel. Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d..104f03868266 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -30,6 +30,23 @@ #include "amdgpu_dm.h" #include "modules/power/power_helpers.h" +static bool is_specific_oled_panel(struct dc_link *link) +{ + if (!link->dpcd_sink_ext_caps.bits.oled) + return false; + + /* Disable PSR-SU for some OLED panels to avoid glitches */ + if (link->dpcd_caps.sink_dev_id == 0xBA4159) { + uint8_t sink_dev_id_str1[] = {'4', '0', 'C', 'U', '1'}; + + if (!memcmp(link->dpcd_caps.sink_dev_id_str, sink_dev_id_str1, + sizeof(sink_dev_id_str1))) + return true; + } + + return false; +} + static bool link_supports_psrsu(struct dc_link *link) { struct dc *dc = link->ctx->dc; @@ -40,6 +57,9 @@ static bool link_supports_psrsu(struct dc_link *link) if (dc->ctx->dce_version < DCN_VERSION_3_1) return false; + if (is_specific_oled_panel(link)) + return false; + if (!is_psr_su_specific_panel(link)) return false; -- 2.51.0 From cbd97d621ece1d92c3542e52f8af7c04cd2c6afb Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 14 Jan 2025 12:14:26 -0500 Subject: [PATCH 10/16] drm/amd/display: Ammend DCPG IP control sequences to align with HW guidance [WHY&HOW] IP_REQUEST_CNTL should only be toggled off when it was originally, never unconditionally. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 14 +++++--- .../amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 34 +++++++++++++++++++ .../amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 3 ++ .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a5e18ab72394..dec732c0c59c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -1266,14 +1266,18 @@ static void dcn20_power_on_plane_resources( struct dce_hwseq *hws, struct pipe_ctx *pipe_ctx) { + uint32_t org_ip_request_cntl = 0; + DC_LOGGER_INIT(hws->ctx->logger); if (hws->funcs.dpp_root_clock_control) hws->funcs.dpp_root_clock_control(hws, pipe_ctx->plane_res.dpp->inst, true); if (REG(DC_IP_REQUEST_CNTL)) { - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 1); + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); if (hws->funcs.dpp_pg_control) hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); @@ -1281,8 +1285,10 @@ static void dcn20_power_on_plane_resources( if (hws->funcs.hubp_pg_control) hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 0); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + DC_LOG_DEBUG( "Un-gated front end for pipe %d\n", pipe_ctx->plane_res.hubp->inst); } diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 92bb820817b9..8ad0ff669b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -2610,3 +2610,37 @@ void dcn401_detect_pipe_changes(struct dc_state *old_state, new_pipe->update_flags.bits.test_pattern_changed = 1; } } + +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp) +{ + struct dce_hwseq *hws = dc->hwseq; + uint32_t org_ip_request_cntl = 0; + + DC_LOGGER_INIT(dc->ctx->logger); + + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp->inst, false); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp->inst, false); + + hubp->funcs->hubp_reset(hubp); + dpp->funcs->dpp_reset(dpp); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 0); + + DC_LOG_DEBUG( + "Power gated front end %d\n", hubp->inst); + + if (hws->funcs.dpp_root_clock_control) + hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 17cea748789e..dbd69d215b8b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -102,4 +102,7 @@ void dcn401_detect_pipe_changes( struct dc_state *new_state, struct pipe_ctx *old_pipe, struct pipe_ctx *new_pipe); +void dcn401_plane_atomic_power_down(struct dc *dc, + struct dpp *dpp, + struct hubp *hubp); #endif /* __DC_HWSS_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 44cb376f97c1..a4e3501fadbb 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -123,7 +123,7 @@ static const struct hwseq_private_funcs dcn401_private_funcs = { .disable_vga = dcn20_disable_vga, .bios_golden_init = dcn10_bios_golden_init, .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn10_plane_atomic_power_down, + .plane_atomic_power_down = dcn401_plane_atomic_power_down, .enable_power_gating_plane = dcn32_enable_power_gating_plane, .hubp_pg_control = dcn32_hubp_pg_control, .program_all_writeback_pipes_in_tree = dcn30_program_all_writeback_pipes_in_tree, -- 2.51.0 From 871f65a59f3cca534e54ab0efe9d976cdd05ac9a Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Wed, 15 Jan 2025 14:21:24 -0500 Subject: [PATCH 11/16] drm/amd/display: Add boot option to reduce PHY SSC for HBR3 [Why] Spread on DPREFCLK by 0.3 percent can have a negative effect on sink when PHY SSC is also spread by 0.3 percent [How] Add boot option for DMU to lower PHY SSC Reviewed-by: Nicholas Kazlauskas Signed-off-by: Hansen Dsouza Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index d9f31b191c69..3d0bba602b53 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -371,6 +371,7 @@ void dmub_dcn31_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.usb4_cm_version = params->usb4_cm_version; boot_options.bits.dpia_hpd_int_enable_supported = params->dpia_hpd_int_enable_supported; boot_options.bits.power_optimization = params->power_optimization; + boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; boot_options.bits.sel_mux_phy_c_d_phy_f_g = (dmub->asic == DMUB_ASIC_DCN31B) ? 1 : 0; -- 2.51.0 From c87d202692de34ee71d1fd4679a549a29095658a Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Thu, 16 Jan 2025 09:45:54 -0500 Subject: [PATCH 12/16] drm/amd/display: Guard Possible Null Pointer Dereference [WHY] In some situations, dc->res_pool may be null. [HOW] Check if pointer is null before dereference. Reviewed-by: Joshua Aberback Signed-off-by: Sung Lee Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f08d3d467372..ce917714c7e0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5611,9 +5611,11 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->get_hard_min_memclk) idle_dramclk_khz = dc->clk_mgr->funcs->get_hard_min_memclk(dc->clk_mgr); - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + if (dc->res_pool && context) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + subvp_pipe_type[i] = dc_state_get_pipe_subvp_type(context, pipe); + } } DC_LOG_DC("%s: allow_idle=%d\n HardMinUClk_Khz=%d HardMinDramclk_Khz=%d\n Pipe_0=%d Pipe_1=%d Pipe_2=%d Pipe_3=%d Pipe_4=%d Pipe_5=%d (caller=%s)\n", -- 2.51.0 From a1d79eae960ce1642aed476d98e311aae46bfb82 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Mon, 23 Dec 2024 11:09:52 +0800 Subject: [PATCH 13/16] drm/amd/display: refactor dio link encoder assigning [WHY] We would like to have new dio encoder assigning flow. Which should be aligned with hpo assigning and have simple logic and data representation. [HOW} 1. A new config option to enable/disable the new code. 2. Encoder-link mapping is in res_ctx and assigned encoder. is accessed through pipe_ctx. 3. assign dio encoder when add stream to ctx Reviewed-by: Jun Lei Reviewed-by: Meenakshikumar Somasundaram Signed-off-by: Peichen Huang Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- .../gpu/drm/amd/display/dc/core/dc_resource.c | 202 +++++++++++++++++- drivers/gpu/drm/amd/display/dc/dc.h | 2 + .../gpu/drm/amd/display/dc/inc/core_types.h | 3 + 4 files changed, 206 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index ce917714c7e0..a2b0331ef579 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2325,7 +2325,7 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params /* * Only update link encoder to stream assignment after bandwidth validation passed. */ - if (res == DC_OK && dc->res_pool->funcs->link_encs_assign) + if (res == DC_OK && dc->res_pool->funcs->link_encs_assign && !dc->config.unify_link_enc_assignment) dc->res_pool->funcs->link_encs_assign( dc, context, context->streams, context->stream_count); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7251587c3fb6..f59722e17abd 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2683,6 +2683,162 @@ static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx, } } +static inline int find_acquired_dio_link_enc_for_link( + const struct resource_context *res_ctx, + const struct dc_link *link) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(res_ctx->dio_link_enc_ref_cnts); i++) + if (res_ctx->dio_link_enc_ref_cnts[i] > 0 && + res_ctx->dio_link_enc_to_link_idx[i] == link->link_index) + return i; + + return -1; +} + +static inline int find_fixed_dio_link_enc(const struct dc_link *link) +{ + /* the 8b10b dp phy can only use fixed link encoder */ + return link->eng_id; +} + +static inline int find_free_dio_link_enc(const struct resource_context *res_ctx, + const struct dc_link *link, const struct resource_pool *pool) +{ + int i; + int enc_count = pool->dig_link_enc_count; + + /* for dpia, check preferred encoder first and then the next one */ + for (i = 0; i < enc_count; i++) + if (res_ctx->dio_link_enc_ref_cnts[(link->dpia_preferred_eng_id + i) % enc_count] == 0) + break; + + return (i >= 0 && i < enc_count) ? (link->dpia_preferred_eng_id + i) % enc_count : -1; +} + +static inline void acquire_dio_link_enc( + struct resource_context *res_ctx, + unsigned int link_index, + int enc_index) +{ + res_ctx->dio_link_enc_to_link_idx[enc_index] = link_index; + res_ctx->dio_link_enc_ref_cnts[enc_index] = 1; +} + +static inline void retain_dio_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + res_ctx->dio_link_enc_ref_cnts[enc_index]++; +} + +static inline void release_dio_link_enc( + struct resource_context *res_ctx, + int enc_index) +{ + ASSERT(res_ctx->dio_link_enc_ref_cnts[enc_index] > 0); + res_ctx->dio_link_enc_ref_cnts[enc_index]--; +} + +static bool is_dio_enc_acquired_by_other_link(const struct dc_link *link, + int enc_index, + int *link_index) +{ + const struct dc *dc = link->dc; + const struct resource_context *res_ctx = &dc->current_state->res_ctx; + + /* pass the link_index that acquired the enc_index */ + if (res_ctx->dio_link_enc_ref_cnts[enc_index] > 0 && + res_ctx->dio_link_enc_to_link_idx[enc_index] != link->link_index) { + *link_index = res_ctx->dio_link_enc_to_link_idx[enc_index]; + return true; + } + + return false; +} + +static void swap_dio_link_enc_to_muxable_ctx(struct dc_state *context, + const struct resource_pool *pool, + int new_encoder, + int old_encoder) +{ + struct resource_context *res_ctx = &context->res_ctx; + int stream_count = context->stream_count; + int i = 0; + + res_ctx->dio_link_enc_ref_cnts[new_encoder] = res_ctx->dio_link_enc_ref_cnts[old_encoder]; + res_ctx->dio_link_enc_to_link_idx[new_encoder] = res_ctx->dio_link_enc_to_link_idx[old_encoder]; + res_ctx->dio_link_enc_ref_cnts[old_encoder] = 0; + + for (i = 0; i < stream_count; i++) { + struct dc_stream_state *stream = context->streams[i]; + struct pipe_ctx *pipe_ctx = resource_get_otg_master_for_stream(&context->res_ctx, stream); + + if (pipe_ctx && pipe_ctx->link_res.dio_link_enc == pool->link_encoders[old_encoder]) + pipe_ctx->link_res.dio_link_enc = pool->link_encoders[new_encoder]; + } +} + +static bool add_dio_link_enc_to_ctx(const struct dc *dc, + struct dc_state *context, + const struct resource_pool *pool, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + struct resource_context *res_ctx = &context->res_ctx; + int enc_index; + + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + retain_dio_link_enc(res_ctx, enc_index); + } else { + if (stream->link->is_dig_mapping_flexible) + enc_index = find_free_dio_link_enc(res_ctx, stream->link, pool); + else { + int link_index = 0; + + enc_index = find_fixed_dio_link_enc(stream->link); + /* Fixed mapping link can only use its fixed link encoder. + * If the encoder is acquired by other link then get a new free encoder and swap the new + * one into the acquiring link. + */ + if (enc_index >= 0 && is_dio_enc_acquired_by_other_link(stream->link, enc_index, &link_index)) { + int new_enc_index = find_free_dio_link_enc(res_ctx, dc->links[link_index], pool); + + if (new_enc_index >= 0) + swap_dio_link_enc_to_muxable_ctx(context, pool, new_enc_index, enc_index); + else + return false; + } + } + + if (enc_index >= 0) + acquire_dio_link_enc(res_ctx, stream->link->link_index, enc_index); + } + + if (enc_index >= 0) + pipe_ctx->link_res.dio_link_enc = pool->link_encoders[enc_index]; + + return pipe_ctx->link_res.dio_link_enc != NULL; +} + +static void remove_dio_link_enc_from_ctx(struct resource_context *res_ctx, + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream) +{ + int enc_index = -1; + + if (stream->link) + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, stream->link); + + if (enc_index >= 0) { + release_dio_link_enc(res_ctx, enc_index); + pipe_ctx->link_res.dio_link_enc = NULL; + } +} + static int get_num_of_free_pipes(const struct resource_pool *pool, const struct dc_state *context) { int i; @@ -2730,6 +2886,10 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, remove_hpo_dp_link_enc_from_ctx( &context->res_ctx, otg_master, stream); } + + if (stream->ctx->dc->config.unify_link_enc_assignment) + remove_dio_link_enc_from_ctx(&context->res_ctx, otg_master, stream); + if (otg_master->stream_res.audio) update_audio_usage( &context->res_ctx, @@ -2744,6 +2904,7 @@ void resource_remove_otg_master_for_stream_output(struct dc_state *context, if (pool->funcs->remove_stream_from_ctx) pool->funcs->remove_stream_from_ctx( stream->ctx->dc, context, stream); + memset(otg_master, 0, sizeof(*otg_master)); } @@ -3716,6 +3877,7 @@ enum dc_status resource_map_pool_resources( struct pipe_ctx *pipe_ctx = NULL; int pipe_idx = -1; bool acquired = false; + bool is_dio_encoder = true; calculate_phy_pix_clks(stream); @@ -3781,6 +3943,10 @@ enum dc_status resource_map_pool_resources( } } + if (dc->config.unify_link_enc_assignment && is_dio_encoder) + if (!add_dio_link_enc_to_ctx(dc, context, pool, pipe_ctx, stream)) + return DC_NO_LINK_ENC_RESOURCE; + /* TODO: Add check if ASIC support and EDID audio */ if (!stream->converter_disable_audio && dc_is_audio_capable_signal(pipe_ctx->stream->signal) && @@ -5017,6 +5183,28 @@ void get_audio_check(struct audio_info *aud_modes, } } +static struct link_encoder *get_temp_dio_link_enc( + const struct resource_context *res_ctx, + const struct resource_pool *const pool, + const struct dc_link *link) +{ + struct link_encoder *link_enc = NULL; + int enc_index; + + if (link->is_dig_mapping_flexible) + enc_index = find_acquired_dio_link_enc_for_link(res_ctx, link); + else + enc_index = link->eng_id; + + if (enc_index < 0) + enc_index = find_free_dio_link_enc(res_ctx, link, pool); + + if (enc_index >= 0) + link_enc = pool->link_encoders[enc_index]; + + return link_enc; +} + static struct hpo_dp_link_encoder *get_temp_hpo_dp_link_enc( const struct resource_context *res_ctx, const struct resource_pool *const pool, @@ -5046,11 +5234,17 @@ bool get_temp_dp_link_res(struct dc_link *link, memset(link_res, 0, sizeof(*link_res)); if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_128b_132b_ENCODING) { - link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, - dc->res_pool, link); + link_res->hpo_dp_link_enc = get_temp_hpo_dp_link_enc(res_ctx, dc->res_pool, link); if (!link_res->hpo_dp_link_enc) return false; + } else if (dc->link_srv->dp_get_encoding_format(link_settings) == DP_8b_10b_ENCODING && + dc->config.unify_link_enc_assignment) { + link_res->dio_link_enc = get_temp_dio_link_enc(res_ctx, + dc->res_pool, link); + if (!link_res->dio_link_enc) + return false; } + return true; } @@ -5322,6 +5516,10 @@ enum dc_status update_dp_encoder_resources_for_test_harness(const struct dc *dc, remove_hpo_dp_link_enc_from_ctx(&context->res_ctx, pipe_ctx, pipe_ctx->stream); } + if (pipe_ctx->link_res.dio_link_enc == NULL && dc->config.unify_link_enc_assignment) + if (!add_dio_link_enc_to_ctx(dc, context, dc->res_pool, pipe_ctx, pipe_ctx->stream)) + return DC_NO_LINK_ENC_RESOURCE; + return DC_OK; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 019459dfd6fe..06d9cf0a7edc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -473,6 +473,7 @@ struct dc_config { bool consolidated_dpia_dp_lt; bool set_pipe_unlock_order; bool enable_dpia_pre_training; + bool unify_link_enc_assignment; }; enum visual_confirm { @@ -778,6 +779,7 @@ union dpia_debug_options { uint32_t disable_usb4_pm_support:1; /* bit 5 */ uint32_t enable_consolidated_dpia_dp_lt:1; /* bit 6 */ uint32_t enable_dpia_pre_training:1; /* bit 7 */ + uint32_t unify_link_enc_assignment:1; /* bit 8 */ uint32_t reserved:24; } bits; uint32_t raw; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 652d52040f4e..37632be09e09 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -376,6 +376,7 @@ struct plane_resource { /* all mappable hardware resources used to enable a link */ struct link_resource { + struct link_encoder *dio_link_enc; struct hpo_dp_link_encoder *hpo_dp_link_enc; }; @@ -500,6 +501,8 @@ struct resource_context { uint8_t dp_clock_source_ref_count; bool is_dsc_acquired[MAX_PIPES]; struct link_enc_cfg_context link_enc_cfg_ctx; + unsigned int dio_link_enc_to_link_idx[MAX_DIG_LINK_ENCODERS]; + int dio_link_enc_ref_cnts[MAX_DIG_LINK_ENCODERS]; bool is_hpo_dp_stream_enc_acquired[MAX_HPO_DP2_ENCODERS]; unsigned int hpo_dp_link_enc_to_link_idx[MAX_HPO_DP2_LINK_ENCODERS]; int hpo_dp_link_enc_ref_cnts[MAX_HPO_DP2_LINK_ENCODERS]; -- 2.51.0 From 942bd112c92a13611899cdb075944b6e0a3b4165 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 19 Jan 2025 21:45:59 -0500 Subject: [PATCH 14/16] drm/amd/display: 3.2.318 This version brings along the following fixes: - Fixes on psr_version, dcn35 register address, DCPG OP control sequences - Imporvements to CR AUX RD interval interpretation, dio link encoder - Disable PSR-SU on some OLED panels Acked-by: Aurabindo Pillai Signed-off-by: Aric Cyr Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 06d9cf0a7edc..559446dcd431 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.317" +#define DC_VER "3.2.318" #define MAX_SURFACES 4 #define MAX_PLANES 6 -- 2.51.0 From e818635a31d28de9c991c27b663f3a222d9b6723 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 20 Jan 2025 14:30:59 -0500 Subject: [PATCH 15/16] drm/amdgpu: update and cleanup PM4 headers Consolidate PM4 definitions. Most of these were previously only defined in UMDs. Add them here as well and sync with latest packets. Also no need to include soc15d.h on gfx10+. Reviewed-by: Feifei Xu Suggested-by: Saurabh Verma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 1 - drivers/gpu/drm/amd/amdgpu/nvd.h | 208 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/soc15d.h | 139 +++++++++++++++++ 5 files changed, 347 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 1878c83ff7e3..a2b26551314a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,7 +40,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" -#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e27b5f8705c1..1ecdb4268ec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -42,7 +42,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index b5bddebf528e..37c522791035 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -40,7 +40,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx12.h" #include "v12_structs.h" #include "gfx_v12_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index 631dafb92299..56f1bfac0b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -64,6 +64,24 @@ #define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 +#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2 +#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 @@ -105,6 +123,38 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_WRITE_DATA__TEMPORAL__RT 0 +#define PACKET3_WRITE_DATA__TEMPORAL__NT 1 +#define PACKET3_WRITE_DATA__TEMPORAL__HT 2 +#define PACKET3_WRITE_DATA__TEMPORAL__LU 3 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -135,6 +185,42 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -144,8 +230,94 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3 #define PACKET3_COND_INDIRECT_BUFFER 0x3F #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23) +#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_CP_DMA 0x41 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -160,6 +332,23 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x)) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3 #define PACKET3_EVENT_WRITE_EOP 0x47 #define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_RELEASE_MEM 0x49 @@ -304,6 +493,12 @@ * 2: REVERSE */ #define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0) #define PACKET3_REWIND 0x59 #define PACKET3_INTERRUPT 0x5A #define PACKET3_GEN_PDEPTE 0x5B @@ -330,11 +525,17 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) +#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0 +#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1 #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SET_UCONFIG_REG_INDEX 0x7A #define PACKET3_FORWARD_HEADER 0x7C #define PACKET3_SCRATCH_RAM_WRITE 0x7D @@ -369,6 +570,7 @@ # define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) # define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) # define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_AQL_PACKET 0x99 #define PACKET3_DMA_DATA_FILL_MULTI 0x9A #define PACKET3_SET_SH_REG_INDEX 0x9B @@ -462,6 +664,12 @@ # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) #define PACKET3_RUN_LIST 0xA5 #define PACKET3_MAP_PROCESS_VM 0xA6 + +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ + /* GFX11 */ #define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 # define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index b9cbeb389edc..a5000c171c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -93,11 +93,25 @@ #define PACKET3_DISPATCH_INDIRECT 0x16 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 #define PACKET3_COND_EXEC 0x22 #define PACKET3_PRED_EXEC 0x23 +#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0) +#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24) #define PACKET3_DRAW_INDIRECT 0x24 #define PACKET3_DRAW_INDEX_INDIRECT 0x25 #define PACKET3_INDEX_BASE 0x26 @@ -132,6 +146,28 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -160,6 +196,33 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -169,7 +232,63 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 @@ -181,6 +300,15 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 #define PACKET3_RELEASE_MEM 0x49 #define EVENT_TYPE(x) ((x) << 0) #define EVENT_INDEX(x) ((x) << 8) @@ -286,6 +414,13 @@ #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29) #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30) #define PACKET3_REWIND 0x59 +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0) #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F #define PACKET3_LOAD_CONFIG_REG 0x60 @@ -300,12 +435,16 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 #define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_LOAD_CONST_RAM 0x80 -- 2.51.0 From 17585c07c20b063d0b6a2740a5696388d009e9ff Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 24 Jan 2025 11:51:53 +0530 Subject: [PATCH 16/16] drm/amdgpu/gfx10: Enable cleaner shader for GFX10.1.1/10.1.2 GPUs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Enable the cleaner shader for GFX10.1.1/10.1.2 GPUs to provide data isolation between GPU workloads. The cleaner shader is responsible for clearing the Local Data Store (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs), which helps prevent data leakage and ensures accurate computation results. This update extends cleaner shader support to GFX10.1.1/10.1.2 GPUs, previously available for GFX10.1.10. It enhances security by clearing GPU memory between processes and maintains a consistent GPU state across KGD and KFD workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a2b26551314a..d70574a25326 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4794,6 +4794,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); if (adev->gfx.me_fw_version >= 101 && -- 2.51.0