From 91c9e221fe2553edf2db71627d8453f083de87a1 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 31 Oct 2024 16:28:48 +0100 Subject: [PATCH 01/16] drm/amdgpu: prevent NULL pointer dereference if ATIF is not supported acpi_evaluate_object() may return AE_NOT_FOUND (failure), which would result in dereferencing buffer.pointer (obj) while being NULL. Although this case may be unrealistic for the current code, it is still better to protect against possible bugs. Bail out also when status is AE_NOT_FOUND. This fixes 1 FORWARD_NULL issue reported by Coverity Report: CID 1600951: Null pointer dereferences (FORWARD_NULL) Signed-off-by: Antonio Quartulli Fixes: c9b7c809b89f ("drm/amd: Guard against bad data for ATIF ACPI method") Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241031152848.4716-1-antonio@mandelbit.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index cce85389427f..b8d4e07d2043 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -172,8 +172,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, &buffer); obj = (union acpi_object *)buffer.pointer; - /* Fail if calling the method fails and ATIF is supported */ - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* Fail if calling the method fails */ + if (ACPI_FAILURE(status)) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); kfree(obj); -- 2.51.0 From 136ce12bd5907388cb4e9aa63ee5c9c8c441640b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 1 Nov 2024 11:55:25 +0800 Subject: [PATCH 02/16] drm/amd/pm: always pick the pptable from IFWI always pick the pptable from IFWI on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 65 +------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index cefe10b95d8e..884938d69fca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -367,54 +367,6 @@ static int smu_v14_0_2_store_powerplay_table(struct smu_context *smu) return 0; } -#ifndef atom_smc_dpm_info_table_14_0_0 -struct atom_smc_dpm_info_table_14_0_0 { - struct atom_common_table_header table_header; - BoardTable_t BoardTable; -}; -#endif - -static int smu_v14_0_2_append_powerplay_table(struct smu_context *smu) -{ - struct smu_table_context *table_context = &smu->smu_table; - PPTable_t *smc_pptable = table_context->driver_pptable; - struct atom_smc_dpm_info_table_14_0_0 *smc_dpm_table; - BoardTable_t *BoardTable = &smc_pptable->BoardTable; - int index, ret; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - smc_dpm_info); - - ret = amdgpu_atombios_get_data_table(smu->adev, index, NULL, NULL, NULL, - (uint8_t **)&smc_dpm_table); - if (ret) - return ret; - - memcpy(BoardTable, &smc_dpm_table->BoardTable, sizeof(BoardTable_t)); - - return 0; -} - -#if 0 -static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, - void **table, - uint32_t *size) -{ - struct smu_table_context *smu_table = &smu->smu_table; - void *combo_pptable = smu_table->combo_pptable; - int ret = 0; - - ret = smu_cmn_get_combo_pptable(smu); - if (ret) - return ret; - - *table = combo_pptable; - *size = sizeof(struct smu_14_0_powerplay_table); - - return 0; -} -#endif - static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, void **table, uint32_t *size) @@ -436,16 +388,12 @@ static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, static int smu_v14_0_2_setup_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - struct amdgpu_device *adev = smu->adev; int ret = 0; if (amdgpu_sriov_vf(smu->adev)) return 0; - if (!adev->scpm_enabled) - ret = smu_v14_0_setup_pptable(smu); - else - ret = smu_v14_0_2_get_pptable_from_pmfw(smu, + ret = smu_v14_0_2_get_pptable_from_pmfw(smu, &smu_table->power_play_table, &smu_table->power_play_table_size); if (ret) @@ -455,16 +403,6 @@ static int smu_v14_0_2_setup_pptable(struct smu_context *smu) if (ret) return ret; - /* - * With SCPM enabled, the operation below will be handled - * by PSP. Driver involvment is unnecessary and useless. - */ - if (!adev->scpm_enabled) { - ret = smu_v14_0_2_append_powerplay_table(smu); - if (ret) - return ret; - } - ret = smu_v14_0_2_check_powerplay_table(smu); if (ret) return ret; @@ -2786,7 +2724,6 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .setup_pptable = smu_v14_0_2_setup_pptable, .check_fw_version = smu_v14_0_check_fw_version, - .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v14_0_set_driver_table_location, .system_features_control = smu_v14_0_system_features_control, .set_allowed_mask = smu_v14_0_set_allowed_mask, -- 2.51.0 From 922f0e00017b09d9d47e3efac008c8b20ed546a0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 4 Oct 2024 09:12:39 +0530 Subject: [PATCH 03/16] drm/amdkfd: Use dynamic allocation for CU occupancy array in 'kfd_get_cu_occupancy()' MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The `kfd_get_cu_occupancy` function previously declared a large `cu_occupancy` array as a local variable, which could lead to stack overflows due to excessive stack usage. This commit replaces the static array allocation with dynamic memory allocation using `kcalloc`, thereby reducing the stack size. This change avoids the risk of stack overflows in kernel space, in scenarios where `AMDGPU_MAX_QUEUES` is large. The allocated memory is freed using `kfree` before the function returns to prevent memory leaks. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_process.c: In function ‘kfd_get_cu_occupancy’: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_process.c:322:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=] 322 | } | ^ Fixes: 6ae9e1aba97e ("drm/amdkfd: Update logic for CU occupancy calculations") Cc: Harish Kasiviswanathan Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Mukul Joshi Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d4aa843aacfd..6bab6fc6a35d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -271,11 +271,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; int i; - struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + struct kfd_cu_occupancy *cu_occupancy; u32 queue_format; - memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); - pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; if (dev->kfd2kgd->get_cu_occupancy == NULL) @@ -293,6 +291,10 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt = 0; max_waves_per_cu = 0; + cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL); + if (!cu_occupancy) + return -ENOMEM; + /* * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. * For AQL queues, because of cooperative dispatch we multiply the wave count @@ -318,6 +320,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; + kfree(cu_occupancy); return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); } -- 2.51.0 From afe260df55ac280cd56306248cb6d8a6b0db095c Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 24 Oct 2024 13:40:39 +0800 Subject: [PATCH 04/16] drm/amdgpu: skip amdgpu_device_cache_pci_state under sriov Under sriov, host driver will save and restore vf pci cfg space during reset. And during device init, under sriov, pci_restore_state happens after fullaccess released, and it can have race condition with mmio protection enable from host side leading to missing interrupts. So skip amdgpu_device_cache_pci_state for sriov. Signed-off-by: Victor Zhao Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a7b55d6ac5c6..d97417c4c8e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6451,6 +6451,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); int r; + if (amdgpu_sriov_vf(adev)) + return false; + r = pci_save_state(pdev); if (!r) { kfree(adev->pci_state); -- 2.51.0 From b78612939de33ffd247f3d39eaca7fb2648801ba Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 24 Oct 2024 16:51:05 +0800 Subject: [PATCH 05/16] drm/amdgpu: Fix dummy_read_page overlapping mappings MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use the dma_map_page_attrs() with DMA_ATTR_SKIP_CPU_SYNC attribute setting to handle the dummy page overlapping mappings. Signed-off-by: Prike Liang Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 256b95232de5..b2033f8352f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -78,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; - adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, - PAGE_SIZE, DMA_BIDIRECTIONAL); + adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; @@ -99,8 +100,9 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return; - dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); adev->dummy_page_addr = 0; } -- 2.51.0 From c5c63d9cb5d3bbb2fc5973757616b17629795829 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Tue, 29 Oct 2024 10:11:05 +0800 Subject: [PATCH 06/16] drm/amdgpu: add amdgpu_gfx_sched_mask and amdgpu_compute_sched_mask debugfs compute/gfx may have multiple rings on some hardware. In some cases, userspace wants to run jobs on a specific ring for validation purposes. This debugfs entry helps to disable or enable submitting jobs to a specific ring. This entry is populated only if there are at least two or more cores in the gfx/compute ip. Signed-off-by: Jesse Zhang Suggested-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 141 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 3 files changed, 145 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 37d8657f0776..6e3f657cab9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2096,6 +2096,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); amdgpu_debugfs_jpeg_sched_mask_init(adev); + amdgpu_debugfs_gfx_sched_mask_init(adev); + amdgpu_debugfs_compute_sched_mask_init(adev); amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b8cc4b146bdc..63f5fa736f32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1917,3 +1917,144 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) } mutex_unlock(&adev->enforce_isolation_mutex); } + +/* + * debugfs for to enable/disable gfx job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_gfx_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, + amdgpu_debugfs_gfx_sched_mask_get, + amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_gfx_rings > 1)) + return; + sprintf(name, "amdgpu_gfx_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_gfx_sched_mask_fops); +#endif +} + +/* + * debugfs for to enable/disable compute job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_compute_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops, + amdgpu_debugfs_compute_sched_mask_get, + amdgpu_debugfs_compute_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_compute_rings > 1)) + return; + sprintf(name, "amdgpu_compute_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_compute_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index af9dbd760fee..7183831df0c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -584,6 +584,8 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { -- 2.51.0 From d2e3961ae37171811a3d442e601599b85711adcb Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Tue, 29 Oct 2024 10:14:35 +0800 Subject: [PATCH 07/16] drm/amdgpu: add amdgpu_sdma_sched_mask debugfs Userspace wants to run jobs on a specific sdma ring for verification purposes. This debugfs entry helps to disable or enable submitting jobs to a specific ring. This entry is populated only if there are at least two or more cores in the sdma ip. Signed-off-by: Jesse Zhang Suggested-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 70 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6e3f657cab9c..c446bfccea59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2098,6 +2098,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_jpeg_sched_mask_init(adev); amdgpu_debugfs_gfx_sched_mask_init(adev); amdgpu_debugfs_compute_sched_mask_init(adev); + amdgpu_debugfs_sdma_sched_mask_init(adev); amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 183a976ba29d..5868b4a32ea6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -343,3 +343,73 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) return 0; } + +/* + * debugfs for to enable/disable sdma job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->sdma.num_instances) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops, + amdgpu_debugfs_sdma_sched_mask_get, + amdgpu_debugfs_sdma_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->sdma.num_instances > 1)) + return; + sprintf(name, "amdgpu_sdma_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_sdma_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 087ce0f6fa07..a37fcd9bb981 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -175,5 +175,5 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); - +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); #endif -- 2.51.0 From 12e5df81bb1f006be2bc8341c732ebd966e573e4 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 17 Oct 2024 12:21:40 +0800 Subject: [PATCH 08/16] drm/amdgpu: Add nps_mode in RAS init_flag Add nps_mode in RAS init_flag. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ae24df65a3df..17cf10c0b72b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1834,6 +1834,9 @@ int psp_ras_initialize(struct psp_context *psp) ras_cmd->ras_in_message.init_flags.xcc_mask = adev->gfx.xcc_mask; ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + ras_cmd->ras_in_message.init_flags.nps_mode = + adev->gmc.gmc_funcs->query_mem_partition_mode(adev); ret = psp_ta_load(psp, &psp->ras_context.context); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 3ac56a9645eb..21b71a427b1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -113,6 +113,14 @@ enum ta_ras_address_type { TA_RAS_PA_TO_MCA, }; +enum ta_ras_nps_mode { + TA_RAS_UNKNOWN_MODE = 0, + TA_RAS_NPS1_MODE = 1, + TA_RAS_NPS2_MODE = 2, + TA_RAS_NPS4_MODE = 4, + TA_RAS_NPS8_MODE = 8, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -139,6 +147,7 @@ struct ta_ras_init_flags { uint8_t dgpu_mode; uint16_t xcc_mask; uint8_t channel_dis_num; + uint8_t nps_mode; }; struct ta_ras_mca_addr { -- 2.51.0 From 047767ddc93666704026c79c01554597375beb50 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 29 Oct 2024 10:47:26 +0530 Subject: [PATCH 09/16] drm/amdgpu: Group gfx sysfs functions Make amdgpu_gfx_sysfs_init/fini functions as common entry points for all gfx related sysfs nodes. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 40 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 ---- 7 files changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 63f5fa736f32..2f3f09dfb1fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1602,7 +1602,7 @@ static DEVICE_ATTR(current_compute_partition, 0644, static DEVICE_ATTR(available_compute_partition, 0444, amdgpu_gfx_get_available_compute_partition, NULL); -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1629,7 +1629,7 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return r; } -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1646,25 +1646,47 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) &dev_attr_available_compute_partition); } -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) { int r; r = device_create_file(adev->dev, &dev_attr_enforce_isolation); if (r) return r; + if (adev->gfx.enable_cleaner_shader) + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - if (r) + return r; +} + +static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + if (adev->gfx.enable_cleaner_shader) + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_gfx_sysfs_xcp_init(adev); + if (r) { + dev_err(adev->dev, "failed to create xcp sysfs files"); return r; + } - return 0; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + dev_err(adev->dev, "failed to create isolation sysfs files"); + + return r; } -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { - device_remove_file(adev->dev, &dev_attr_enforce_isolation); - device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); + amdgpu_gfx_sysfs_xcp_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 7183831df0c3..fd73e527f446 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -579,8 +579,6 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size, const void *cleaner_shader_ptr); -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9da95b25e158..d1a18ca584dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4853,9 +4853,10 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v10_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; + return 0; } @@ -4907,7 +4908,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); gfx_v10_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 894fc04201c3..cfe17a36b8ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1708,7 +1708,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v11_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1773,7 +1773,7 @@ static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v11_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 9fec28d8a5fc..1b99f90cd193 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1466,7 +1466,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v12_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1529,7 +1529,7 @@ static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v12_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e9248a855ba7..a880dce16ae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2402,7 +2402,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -2443,7 +2443,7 @@ static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) } gfx_v9_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 016290f00592..983088805c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1171,10 +1171,6 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_4_3_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); - if (r) - return r; - return 0; } @@ -1199,7 +1195,6 @@ static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); amdgpu_gfx_sysfs_fini(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); -- 2.51.0 From 81db4eab2847094137a266616954e5f1c6e33575 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 4 Nov 2024 10:03:15 +0530 Subject: [PATCH 10/16] drm/amdgpu: Skip IP coredump for RAS errors For RAS errors, source of error is known. Skip the core dump of IP states. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1d9eda883bb8..b772299e1067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2605,6 +2605,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; reset_context.src = AMDGPU_RESET_SRC_RAS; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); /* Perform full reset in fatal error mode */ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) -- 2.51.0 From e5ad71779df6f448d6edb910bc635680b9419ec0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 30 Oct 2024 13:54:49 +0530 Subject: [PATCH 11/16] drm/amdgpu: Add compatible NPS mode info Populate the compatible NPS modes also for providing partition configuration details through sysfs. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 1 + drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 7ac89d78a5bf..b63f53242c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -77,6 +77,7 @@ struct amdgpu_xcp_cfg { u8 num_res; struct amdgpu_xcp_mgr *xcp_mgr; struct kobject kobj; + u16 compatible_nps_modes; }; struct amdgpu_xcp_ip_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 890976b7ce77..fccccea0d2d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -455,6 +455,7 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, int max_res[AMDGPU_XCP_RES_MAX] = {}; bool res_lt_xcp; int num_xcp, i; + u16 nps_modes; if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) return -EINVAL; @@ -467,23 +468,33 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, switch (mode) { case AMDGPU_SPX_PARTITION_MODE: num_xcp = 1; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_DPX_PARTITION_MODE: num_xcp = 2; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_TPX_PARTITION_MODE: num_xcp = 3; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_QPX_PARTITION_MODE: num_xcp = 4; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: num_xcp = NUM_XCC(adev->gfx.xcc_mask); + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; default: return -EINVAL; } + xcp_cfg->compatible_nps_modes = + (adev->gmc.supported_nps_modes & nps_modes); xcp_cfg->num_res = ARRAY_SIZE(max_res); for (i = 0; i < xcp_cfg->num_res; i++) { -- 2.51.0 From 8cc438be5d49b8326b2fcade0bdb7e6a97df9e0b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 30 Oct 2024 13:22:44 +0800 Subject: [PATCH 12/16] drm/amd/pm: correct the workload setting Correct the workload setting in order not to mix the setting with the end user. Update the workload mask accordingly. v2: changes as below: 1. the end user can not erase the workload from driver except default workload. 2. always shows the real highest priority workoad to the end user. 3. the real workload mask is combined with driver workload mask and end user workload mask. v3: apply this to the other ASICs as well. v4: simplify the code v5: refine the code based on the review comments. Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 49 +++++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 20 ++++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 5 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 ++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 + 12 files changed, 84 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ba97070acf9f..aa7c57b58765 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1261,26 +1261,33 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->user_dpm_profile.user_workload_mask = 0; atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; - smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; - smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; - smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; - smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; - smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; + smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; + smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; - else - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + } else { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; @@ -2355,17 +2362,20 @@ static int smu_switch_power_profile(void *handle, return -EINVAL; if (!en) { - smu->workload_mask &= ~(1 << smu->workload_prority[type]); + smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } else { - smu->workload_mask |= (1 << smu->workload_prority[type]); + smu->driver_workload_mask |= (1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); @@ -3056,12 +3066,23 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; + int ret; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !smu->ppt_funcs->set_power_profile_mode) return -EOPNOTSUPP; - return smu_bump_power_profile_mode(smu, param, param_size); + if (smu->user_dpm_profile.user_workload_mask & + (1 << smu->workload_priority[param[param_size]])) + return 0; + + smu->user_dpm_profile.user_workload_mask = + (1 << smu->workload_priority[param[param_size]]); + smu->workload_mask = smu->user_dpm_profile.user_workload_mask | + smu->driver_workload_mask; + ret = smu_bump_power_profile_mode(smu, param, param_size); + + return ret; } static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 4ebcc1e53ea2..d665c47f19b7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -240,6 +240,7 @@ struct smu_user_dpm_profile { /* user clock state information */ uint32_t clk_mask[SMU_CLK_COUNT]; uint32_t clk_dependency; + uint32_t user_workload_mask; }; #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ @@ -557,7 +558,8 @@ struct smu_context { bool disable_uclk_switch; uint32_t workload_mask; - uint32_t workload_prority[WORKLOAD_POLICY_MAX]; + uint32_t driver_workload_mask; + uint32_t workload_priority[WORKLOAD_POLICY_MAX]; uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 6c8e80f6b592..4b36c230e43a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1455,7 +1455,6 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, return -EINVAL; } - if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && (smu->smc_fw_version >= 0x360d00)) { if (size != 10) @@ -1523,14 +1522,14 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index faa8e7d9c3c6..211635dabed8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2083,10 +2083,13 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a9cb28ce2133..d0ed0d060a8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1788,10 +1788,13 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index cd3e9ba3eff4..f89c487dce72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1081,7 +1081,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", @@ -1089,7 +1089,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index a34797f3576b..75a9ea87f419 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -892,14 +892,14 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 8d25cc1f218f..1d29e99d7955 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2473,7 +2473,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask, selected_workload_mask; + u32 workload_mask; smu->power_profile_mode = input[size]; @@ -2540,7 +2540,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - selected_workload_mask = workload_mask = 1 << workload_type; + workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2555,12 +2555,22 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask |= 1 << workload_type; } + smu->workload_mask |= workload_mask; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - workload_mask, + smu->workload_mask, NULL); - if (!ret) - smu->workload_mask = selected_workload_mask; + if (!ret) { + smu_cmn_assign_power_profile(smu); + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_FULLSCREEN3D); + smu->power_profile_mode = smu->workload_mask & (1 << workload_type) + ? PP_SMC_POWER_PROFILE_FULLSCREEN3D + : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + } + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 23f13388455f..12d622d4aceb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2487,13 +2487,14 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); else - smu->workload_mask = (1 << workload_type); + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 884938d69fca..59b369eff30f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1795,12 +1795,11 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - 1 << workload_type, - NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + if (!ret) - smu->workload_mask = 1 << workload_type; + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 843f00c9e407..f1ab1a6bb467 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1141,6 +1141,14 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } +void smu_cmn_assign_power_profile(struct smu_context *smu) +{ + uint32_t index; + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + smu->power_profile_mode = smu->workload_setting[index]; +} + bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) { struct pci_dev *p = NULL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1de685defe85..8a801e389659 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -130,6 +130,8 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); +void smu_cmn_assign_power_profile(struct smu_context *smu); + /* * Helper function to make sysfs_emit_at() happy. Align buf to * the current page boundary and record the offset. -- 2.51.0 From cfffd980bf21b5a84fd364861d482d5a2ec21c49 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Wolfgang=20M=C3=BCller?= Date: Tue, 29 Oct 2024 12:17:52 +0100 Subject: [PATCH 13/16] drm/amd/pm: add zero RPM OD setting support for SMU13 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Whilst we have support for setting fan curves there is no support for disabling the zero RPM feature. Since the relevant bits are already present in the OverDriveTable, hook them up to a sysctl setting so users can influence this behaviour. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3489 Reviewed-by: Kenneth Feng Signed-off-by: Wolfgang Müller Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 6 ++ .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 62 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 55 +++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 55 +++++++++++++++- 8 files changed, 183 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 6d942b5c58f0..ec6c1f1d5a4a 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -100,6 +100,12 @@ fan_minimum_pwm .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: fan_minimum_pwm +fan_zero_rpm_enable +---------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_zero_rpm_enable + GFXOFF ====== diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2fa71f68205e..80e4b5a7df23 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -119,6 +119,7 @@ enum pp_clock_type { OD_ACOUSTIC_TARGET, OD_FAN_TARGET_TEMPERATURE, OD_FAN_MINIMUM_PWM, + OD_FAN_ZERO_RPM_ENABLE, }; enum amd_pp_sensors { @@ -199,6 +200,7 @@ enum PP_OD_DPM_TABLE_COMMAND { PP_OD_EDIT_ACOUSTIC_TARGET, PP_OD_EDIT_FAN_TARGET_TEMPERATURE, PP_OD_EDIT_FAN_MINIMUM_PWM, + PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, }; struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ea940773353c..cb96f1f8ca8d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4109,6 +4109,60 @@ static umode_t fan_minimum_pwm_visible(struct amdgpu_device *adev) return umode; } +/** + * DOC: fan_zero_rpm_enable + * + * The amdgpu driver provides a sysfs API for checking and adjusting the + * zero RPM feature. + * + * Reading back the file shows you the current setting and the permitted + * ranges if changable. + * + * Writing an integer to the file, change the setting accordingly. + * + * When you have finished the editing, write "c" (commit) to the file to commit + * your changes. + * + * If you want to reset to the default value, write "r" (reset) to the file to + * reset them. + */ +static ssize_t fan_zero_rpm_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_ENABLE, buf); +} + +static ssize_t fan_zero_rpm_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_distribute_custom_od_settings(adev, + PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, + buf, + count); +} + +static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) +{ + umode_t umode = 0000; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE) + umode |= S_IRUSR | S_IRGRP | S_IROTH; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET) + umode |= S_IWUSR; + + return umode; +} + static struct od_feature_set amdgpu_od_set = { .containers = { [0] = { @@ -4154,6 +4208,14 @@ static struct od_feature_set amdgpu_od_set = { .store = fan_minimum_pwm_store, }, }, + [5] = { + .name = "fan_zero_rpm_enable", + .ops = { + .is_visible = fan_zero_rpm_enable_visible, + .show = fan_zero_rpm_enable_show, + .store = fan_zero_rpm_enable_store, + }, + }, }, }, }, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f5bf41f21c41..b5daa12c0864 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -328,6 +328,8 @@ struct config_table_setting #define OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET BIT(7) #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE BIT(8) #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) struct amdgpu_pm { struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index aa7c57b58765..3458674fe864 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2895,6 +2895,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) clk_type = SMU_OD_FAN_TARGET_TEMPERATURE; break; case OD_FAN_MINIMUM_PWM: clk_type = SMU_OD_FAN_MINIMUM_PWM; break; + case OD_FAN_ZERO_RPM_ENABLE: + clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; default: clk_type = SMU_CLK_COUNT; break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index e71a721c12b9..e0abb449a3a1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -313,6 +313,7 @@ enum smu_clk_type { SMU_OD_ACOUSTIC_TARGET, SMU_OD_FAN_TARGET_TEMPERATURE, SMU_OD_FAN_MINIMUM_PWM, + SMU_OD_FAN_ZERO_RPM_ENABLE, SMU_CLK_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 1d29e99d7955..a7c62bb09aa0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -107,6 +107,7 @@ #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 +#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 #define LINK_SPEED_MAX 3 @@ -1130,6 +1131,10 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanMinimumPwm; od_max_setting = overdrive_upperlimits->FanMinimumPwm; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: + od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; + od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1450,6 +1455,24 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_0_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmEnable); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1547,6 +1570,11 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + od_table->OverDriveTable.FanZeroRpmEnable = + boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1840,6 +1868,27 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmEnable = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); @@ -2110,7 +2159,9 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; } static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) @@ -2176,6 +2227,8 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanTargetTemperature; user_od_table->OverDriveTable.FanMinimumPwm = user_od_table_bak.OverDriveTable.FanMinimumPwm; + user_od_table->OverDriveTable.FanZeroRpmEnable = + user_od_table_bak.OverDriveTable.FanZeroRpmEnable; } smu_v13_0_0_set_supported_od_feature_mask(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 12d622d4aceb..1ba2089d8650 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -83,6 +83,7 @@ #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 +#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 #define LINK_SPEED_MAX 3 @@ -1119,6 +1120,10 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanMinimumPwm; od_max_setting = overdrive_upperlimits->FanMinimumPwm; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: + od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; + od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1439,6 +1444,24 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_7_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmEnable); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1535,6 +1558,11 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + od_table->OverDriveTable.FanZeroRpmEnable = + boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1828,6 +1856,27 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmEnable = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); @@ -2094,7 +2143,9 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; } static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) @@ -2160,6 +2211,8 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanTargetTemperature; user_od_table->OverDriveTable.FanMinimumPwm = user_od_table_bak.OverDriveTable.FanMinimumPwm; + user_od_table->OverDriveTable.FanZeroRpmEnable = + user_od_table_bak.OverDriveTable.FanZeroRpmEnable; } smu_v13_0_7_set_supported_od_feature_mask(smu); -- 2.51.0 From e89bd3615bc0883adc90209c1aac6d4bac7d221f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Oct 2024 22:54:38 -0400 Subject: [PATCH 14/16] drm/amdgpu/mes: fetch fw version from firmware header We need this prior to the firmware being loaded so fetch from the header. v2: fetch directly from the firmware v3: store both fw versions Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 6909af56fcad..b10383f83d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1594,6 +1594,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) char ucode_prefix[30]; char fw_name[50]; bool need_retry = false; + u32 *ucode_ptr; int r; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, @@ -1631,6 +1632,10 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) adev->mes.data_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + + sizeof(union amdgpu_firmware_header)); + adev->mes.fw_version[pipe] = + le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { int ucode, ucode_data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 96788c0f42f1..0684e482a204 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + uint32_t fw_version[AMDGPU_MAX_MES_PIPES]; bool enable_legacy_queue_map; uint32_t total_max_queue; -- 2.51.0 From 6bfe777e9267ee6d1c4712b52bb5d32e59508a3d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Wolfgang=20M=C3=BCller?= Date: Tue, 29 Oct 2024 12:17:53 +0100 Subject: [PATCH 15/16] drm/amd/pm: add zero RPM stop temperature OD setting support for SMU13 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Together with the feature to enable or disable zero RPM in the last commit, it also makes sense to expose the OD setting determining under which temperature the fan should stop if zero RPM is enabled. Reviewed-by: Kenneth Feng Signed-off-by: Wolfgang Müller Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 6 ++ .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 65 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 55 +++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 55 +++++++++++++++- 8 files changed, 186 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index ec6c1f1d5a4a..1768a106aab1 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -106,6 +106,12 @@ fan_zero_rpm_enable .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: fan_zero_rpm_enable +fan_zero_rpm_stop_temperature +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_zero_rpm_stop_temperature + GFXOFF ====== diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 80e4b5a7df23..bb27c0d2a9ae 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -120,6 +120,7 @@ enum pp_clock_type { OD_FAN_TARGET_TEMPERATURE, OD_FAN_MINIMUM_PWM, OD_FAN_ZERO_RPM_ENABLE, + OD_FAN_ZERO_RPM_STOP_TEMP, }; enum amd_pp_sensors { @@ -201,6 +202,7 @@ enum PP_OD_DPM_TABLE_COMMAND { PP_OD_EDIT_FAN_TARGET_TEMPERATURE, PP_OD_EDIT_FAN_MINIMUM_PWM, PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, + PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, }; struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index cb96f1f8ca8d..136e8193867c 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4163,6 +4163,63 @@ static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) return umode; } +/** + * DOC: fan_zero_rpm_stop_temperature + * + * The amdgpu driver provides a sysfs API for checking and adjusting the + * zero RPM stop temperature feature. + * + * Reading back the file shows you the current setting and the permitted + * ranges if changable. + * + * Writing an integer to the file, change the setting accordingly. + * + * When you have finished the editing, write "c" (commit) to the file to commit + * your changes. + * + * If you want to reset to the default value, write "r" (reset) to the file to + * reset them. + * + * This setting works only if the Zero RPM setting is enabled. It adjusts the + * temperature below which the fan can stop. + */ +static ssize_t fan_zero_rpm_stop_temp_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_STOP_TEMP, buf); +} + +static ssize_t fan_zero_rpm_stop_temp_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_distribute_custom_od_settings(adev, + PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, + buf, + count); +} + +static umode_t fan_zero_rpm_stop_temp_visible(struct amdgpu_device *adev) +{ + umode_t umode = 0000; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE) + umode |= S_IRUSR | S_IRGRP | S_IROTH; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET) + umode |= S_IWUSR; + + return umode; +} + static struct od_feature_set amdgpu_od_set = { .containers = { [0] = { @@ -4216,6 +4273,14 @@ static struct od_feature_set amdgpu_od_set = { .store = fan_zero_rpm_enable_store, }, }, + [6] = { + .name = "fan_zero_rpm_stop_temperature", + .ops = { + .is_visible = fan_zero_rpm_stop_temp_visible, + .show = fan_zero_rpm_stop_temp_show, + .store = fan_zero_rpm_stop_temp_store, + }, + }, }, }, }, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index b5daa12c0864..363af8990aa2 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -330,6 +330,8 @@ struct config_table_setting #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) #define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) #define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE BIT(12) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET BIT(13) struct amdgpu_pm { struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 3458674fe864..64f917959576 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2897,6 +2897,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) clk_type = SMU_OD_FAN_MINIMUM_PWM; break; case OD_FAN_ZERO_RPM_ENABLE: clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; + case OD_FAN_ZERO_RPM_STOP_TEMP: + clk_type = SMU_OD_FAN_ZERO_RPM_STOP_TEMP; break; default: clk_type = SMU_CLK_COUNT; break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index e0abb449a3a1..a299dc4a8071 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -314,6 +314,7 @@ enum smu_clk_type { SMU_OD_FAN_TARGET_TEMPERATURE, SMU_OD_FAN_MINIMUM_PWM, SMU_OD_FAN_ZERO_RPM_ENABLE, + SMU_OD_FAN_ZERO_RPM_STOP_TEMP, SMU_CLK_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a7c62bb09aa0..80c6b1e523aa 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -108,6 +108,7 @@ #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 #define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 +#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 #define LINK_SPEED_MAX 3 @@ -1135,6 +1136,10 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: + od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; + od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1473,6 +1478,24 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_0_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmStopTemp); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1575,6 +1598,11 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + od_table->OverDriveTable.FanZeroRpmStopTemp = + boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1889,6 +1917,27 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); @@ -2161,7 +2210,9 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | - OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; } static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) @@ -2229,6 +2280,8 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanMinimumPwm; user_od_table->OverDriveTable.FanZeroRpmEnable = user_od_table_bak.OverDriveTable.FanZeroRpmEnable; + user_od_table->OverDriveTable.FanZeroRpmStopTemp = + user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; } smu_v13_0_0_set_supported_od_feature_mask(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 1ba2089d8650..c5d3e25cc967 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -84,6 +84,7 @@ #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 #define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 +#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 #define LINK_SPEED_MAX 3 @@ -1124,6 +1125,10 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: + od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; + od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1462,6 +1467,24 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_7_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmStopTemp); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1563,6 +1586,11 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + od_table->OverDriveTable.FanZeroRpmStopTemp = + boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1877,6 +1905,27 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); @@ -2145,7 +2194,9 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | - OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; } static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) @@ -2213,6 +2264,8 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanMinimumPwm; user_od_table->OverDriveTable.FanZeroRpmEnable = user_od_table_bak.OverDriveTable.FanZeroRpmEnable; + user_od_table->OverDriveTable.FanZeroRpmStopTemp = + user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; } smu_v13_0_7_set_supported_od_feature_mask(smu); -- 2.51.0 From 949d817c78cc6416d6e22f3f72a6960cd7412755 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 30 Oct 2024 14:41:41 +0530 Subject: [PATCH 16/16] drm/amdgpu/gfx11: Add cleaner shader for GFX11.0.3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX11.0.3 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_11_0_3_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 17 +++ .../amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm | 118 ++++++++++++++++++ .../drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h | 56 +++++++++ 3 files changed, 191 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index cfe17a36b8ee..62e4c446793d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -46,6 +46,7 @@ #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" +#include "gfx_v11_0_cleaner_shader.h" #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" @@ -1579,8 +1580,24 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 2280 && + adev->gfx.pfp_fw_version >= 2370 && + adev->gfx.mec_fw_version >= 2450 && + adev->mes.fw_version[0] >= 99) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; + break; } /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm new file mode 100644 index 000000000000..9b90b66368c7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// Navi3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + +shader main + asic(GFX11) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + + // + // CLEAR VGPRs + // + s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance) + +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_sub_u32 m0, m0, 8 + s_cbranch_scc0 label_0005 + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h new file mode 100644 index 000000000000..3218cc04f543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_11_0_3 */ +static const u32 gfx_11_0_3_cleaner_shader_hex[] = { + 0xb0804006, 0xbe8200ff, + 0x00000058, 0xbefd0080, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefd0002, 0x80828802, + 0xbfa1fff5, 0xbe8200ff, + 0x80000000, 0x8b020002, + 0xbfa10012, 0xbefe00c1, + 0xbeff00c1, 0xd71f0001, + 0x0001007f, 0xd7200001, + 0x0002027e, 0x16020288, + 0xbe8200bf, 0xbefd00c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd7006a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbfa1fff7, 0xbefd00ff, + 0x00000068, 0xbe804280, + 0xbe814280, 0xbe824280, + 0xbe834280, 0x80fd847d, + 0xbfa1fffa, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbfb00000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; -- 2.51.0