From e90711946b53590371ecce32e8fcc381a99d6333 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Tue, 25 Feb 2025 18:08:02 -0500 Subject: [PATCH 01/16] drm/amdkfd: clamp queue size to minimum If queue size is less than minimum, clamp it to minimum to prevent underflow when writing queue mqd. Signed-off-by: David Yat Sin Reviewed-by: Jay Cornwall Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 10 ++++++++++ include/uapi/linux/kfd_ioctl.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 433de9e9a77e..8c2e92378b49 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -212,6 +212,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + if (!access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); @@ -461,6 +466,11 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, return -EINVAL; } + if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { + args->ring_size = KFD_MIN_QUEUE_RING_SIZE; + pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); + } + properties.queue_address = args->ring_base_address; properties.queue_size = args->ring_size; properties.queue_percent = args->queue_percentage & 0xFF; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index fa9f9846b88e..b0160b09987c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -62,6 +62,8 @@ struct kfd_ioctl_get_version_args { #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 +#define KFD_MIN_QUEUE_RING_SIZE 1024 + struct kfd_ioctl_create_queue_args { __u64 ring_base_address; /* to KFD */ __u64 write_pointer_address; /* from KFD */ -- 2.51.0 From 509d662a57ef5b1b460ec000913e9553423e7bd3 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Mon, 24 Feb 2025 16:50:50 -0600 Subject: [PATCH 02/16] drm/amdkfd: remove kfd_pasid.c from amdgpu driver build Since kfd uses pasid values from graphic driver now do not need use kfd pasid fucntions. Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Makefile | 1 - drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 46 -------------------------- 2 files changed, 47 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pasid.c diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d3d8972240d..0ce08113c9f0 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -27,7 +27,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_device.o \ $(AMDKFD_PATH)/kfd_chardev.o \ $(AMDKFD_PATH)/kfd_topology.o \ - $(AMDKFD_PATH)/kfd_pasid.o \ $(AMDKFD_PATH)/kfd_doorbell.o \ $(AMDKFD_PATH)/kfd_flat_memory.o \ $(AMDKFD_PATH)/kfd_process.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c deleted file mode 100644 index 8896426e0556..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright 2014-2022 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include "kfd_priv.h" -#include "amdgpu_ids.h" - -static unsigned int pasid_bits = 16; -static bool pasids_allocated; /* = false */ - -u32 kfd_pasid_alloc(void) -{ - int r = amdgpu_pasid_alloc(pasid_bits); - - if (r > 0) { - pasids_allocated = true; - return r; - } - - return 0; -} - -void kfd_pasid_free(u32 pasid) -{ - amdgpu_pasid_free(pasid); -} -- 2.51.0 From 0107c595c5d0521c6397836be0767532121ef16c Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 27 Feb 2025 10:13:53 +0800 Subject: [PATCH 03/16] drm/amd/pm: add fan abnormal detection add fan abnormal detection on smu v14.0.2&smu v14.0.3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index adbb6332376e..14a5760082b8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -950,6 +950,14 @@ static int smu_v14_0_irq_process(struct amdgpu_device *adev, uint32_t client_id = entry->client_id; uint32_t src_id = entry->src_id; + /* + * ctxid is used to distinguish different + * events for SMCToHost interrupt. + */ + uint32_t ctxid = entry->src_data[0]; + uint32_t data; + uint32_t high; + if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { case THM_11_0__SRCID__THM_DIG_THERM_L2H: @@ -964,6 +972,50 @@ static int smu_v14_0_irq_process(struct amdgpu_device *adev, src_id); break; } + } else if (client_id == SOC15_IH_CLIENTID_MP1) { + if (src_id == SMU_IH_INTERRUPT_ID_TO_DRIVER) { + /* ACK SMUToHost interrupt */ + data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); + data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1); + WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data); + + switch (ctxid) { + case SMU_IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case SMU_IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY: + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + default: + dev_dbg(adev->dev, "Unhandled context id %d from client:%d!\n", + ctxid, client_id); + break; + } + } } return 0; -- 2.51.0 From 6f16d101dab5ec6e8fc9567769f73ae8baa3fe38 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Feb 2025 16:16:43 +0530 Subject: [PATCH 04/16] drm/amdgpu: Move xgmi definitions to xgmi header Move definitions related to xgmi to amdgpu_xgmi header Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 23 +--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 8 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 35 +++++++++++++++++------- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 459a30fe239f..bd7fc123b8f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -29,6 +29,7 @@ #include #include "amdgpu_irq.h" +#include "amdgpu_xgmi.h" #include "amdgpu_ras.h" /* VA hole for 48bit addresses on Vega10 */ @@ -174,28 +175,6 @@ struct amdgpu_gmc_funcs { bool (*need_reset_on_init)(struct amdgpu_device *adev); }; -struct amdgpu_xgmi_ras { - struct amdgpu_ras_block_object ras_block; -}; - -struct amdgpu_xgmi { - /* from psp */ - u64 node_id; - u64 hive_id; - /* fixed per family */ - u64 node_segment_size; - /* physical node (0-3) */ - unsigned physical_node_id; - /* number of nodes (0-4) */ - unsigned num_physical_nodes; - /* gpu list in the same hive */ - struct list_head head; - bool supported; - struct ras_common_if *ras_if; - bool connected_to_cpu; - struct amdgpu_xgmi_ras *ras; -}; - struct amdgpu_mem_partition_info { union { struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index c313c2cf6969..59e44d20124f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1714,3 +1714,11 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, return r; } + +bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev) +{ + return (amdgpu_use_xgmi_p2p && adev != bo_adev && + adev->gmc.xgmi.hive_id && + adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 924da0bec509..1d4e822652a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -23,7 +23,6 @@ #define __AMDGPU_XGMI_H__ #include -#include "amdgpu_psp.h" #include "amdgpu_ras.h" struct amdgpu_hive_info { @@ -71,7 +70,29 @@ enum amdgpu_xgmi_bw_unit { AMDGPU_XGMI_BW_UNIT_MBYTES }; -extern struct amdgpu_xgmi_ras xgmi_ras; +struct amdgpu_xgmi_ras { + struct amdgpu_ras_block_object ras_block; +}; +extern struct amdgpu_xgmi_ras xgmi_ras; + +struct amdgpu_xgmi { + /* from psp */ + u64 node_id; + u64 hive_id; + /* fixed per family */ + u64 node_segment_size; + /* physical node (0-3) */ + unsigned physical_node_id; + /* number of nodes (0-4) */ + unsigned num_physical_nodes; + /* gpu list in the same hive */ + struct list_head head; + bool supported; + struct ras_common_if *ras_if; + bool connected_to_cpu; + struct amdgpu_xgmi_ras *ras; +}; + struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); @@ -86,14 +107,8 @@ bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); -static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev) -{ - return (amdgpu_use_xgmi_p2p && - adev != bo_adev && - adev->gmc.xgmi.hive_id && - adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); -} +bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev); int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev); int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); -- 2.51.0 From 485993e2f1a6bf3d1e8fb5a38c82edd3ae697ad9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Feb 2025 17:24:51 +0530 Subject: [PATCH 05/16] drm/amdgpu: Add xgmi speed/width related info Add APIs to initialize XGMI speed, width details and get to max bandwidth supported. It is assumed that a device only supports same generation of XGMI links with uniform width. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 11 +++++++++++ 2 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 59e44d20124f..9c539f26e5fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1722,3 +1722,26 @@ bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, adev->gmc.xgmi.hive_id && adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); } + +void amdgpu_xgmi_early_init(struct amdgpu_device *adev) +{ + if (!adev->gmc.xgmi.supported) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + adev->gmc.xgmi.max_speed = XGMI_SPEED_25GT; + adev->gmc.xgmi.max_width = 16; + break; + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + adev->gmc.xgmi.max_speed = XGMI_SPEED_32GT; + adev->gmc.xgmi.max_width = 16; + break; + default: + break; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 1d4e822652a1..32dabba4062f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -25,6 +25,12 @@ #include #include "amdgpu_ras.h" +enum amdgpu_xgmi_link_speed { + XGMI_SPEED_16GT = 16, + XGMI_SPEED_25GT = 25, + XGMI_SPEED_32GT = 32 +}; + struct amdgpu_hive_info { struct kobject kobj; uint64_t hive_id; @@ -91,6 +97,8 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; struct amdgpu_xgmi_ras *ras; + enum amdgpu_xgmi_link_speed max_speed; + uint8_t max_width; }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); @@ -118,4 +126,7 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num); +void amdgpu_xgmi_early_init(struct amdgpu_device *adev); +uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); + #endif -- 2.51.0 From f9234217d0167f43cc0cdccc39353c37de1d028d Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Wed, 26 Feb 2025 12:52:50 +0800 Subject: [PATCH 06/16] drm/amd/amdgpu: Add support for xgmi_v6_4_1 Add support for xgmi_v6_4_1 and use it appropriate places Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 9c539f26e5fd..7fdf30f1161c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -315,6 +315,7 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num); break; default: @@ -1205,6 +1206,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL, &xgmi_v6_4_0_aca_info, NULL); if (r) @@ -1264,6 +1266,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) pcs_clear_status(adev, xgmi3x16_pcs_err_status_reg_v6_4[i]); @@ -1298,6 +1301,7 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_v6_4_0_reset_ras_error_count(adev); break; default: @@ -1323,7 +1327,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0) || amdgpu_ip_version(adev, XGMI_HWIP, 0) == - IP_VERSION(6, 4, 0)) { + IP_VERSION(6, 4, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 1)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -1431,6 +1437,7 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): /* check xgmi3x16 pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); @@ -1527,6 +1534,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, { switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); break; default: -- 2.51.0 From ee3ed100663d2ae8280bd5fc63b3e2c13c689cce Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 25 Feb 2025 10:04:06 -0500 Subject: [PATCH 07/16] drm/amdkfd: Remove kfd_process_hw_exception worker With GPU reset-domain worker implemented, KFD hw_exception worker is not needed any more, just call amdgpu_amdkfd_gpu_reset directly from kfd_hws_hang. Suggested-by: Felix Kuehling Signed-off-by: Philip Yang Reviewed-by: Lijo Lazar Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +---------- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 94b1ac8a4735..91e4988dc1e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -66,7 +66,6 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm, static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); static int allocate_sdma_queue(struct device_queue_manager *dqm, struct queue *q, const uint32_t *restore_sdma_id); -static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) @@ -170,7 +169,7 @@ static void kfd_hws_hang(struct device_queue_manager *dqm) /* * Issue a GPU reset if HWS is unresponsive */ - schedule_work(&dqm->hw_exception_work); + amdgpu_amdkfd_gpu_reset(dqm->dev->adev); } static int convert_to_mes_queue_type(int queue_type) @@ -1740,7 +1739,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->active_cp_queue_count = 0; dqm->gws_queue_count = 0; dqm->active_runlist = false; - INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); dqm->trap_debug_vmid = 0; init_sdma_bitmaps(dqm); @@ -3080,13 +3078,6 @@ int kfd_evict_process_device(struct kfd_process_device *pdd) return ret; } -static void kfd_process_hw_exception(struct work_struct *work) -{ - struct device_queue_manager *dqm = container_of(work, - struct device_queue_manager, hw_exception_work); - amdgpu_amdkfd_gpu_reset(dqm->dev->adev); -} - int reserve_debug_trap_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 09ab36f8e8c6..7146e227e2c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -269,7 +269,6 @@ struct device_queue_manager { /* hw exception */ bool is_hws_hang; bool is_resetting; - struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; bool sched_halt; -- 2.51.0 From 1b9366c601039d60546794c63fbb83ce8e53b978 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 17 Feb 2025 20:08:29 -0500 Subject: [PATCH 08/16] drm/amdkfd: KFD release_work possible circular locking If waiting for gpu reset done in KFD release_work, thers is WARNING: possible circular locking dependency detected #2 kfd_create_process kfd_process_mutex flush kfd release work #1 kfd release work wait for amdgpu reset work #0 amdgpu_device_gpu_reset kgd2kfd_pre_reset kfd_process_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&p->release_work)); lock((wq_completion)kfd_process_wq); lock((work_completion)(&p->release_work)); lock((wq_completion)amdgpu-reset-dev); To fix this, KFD create process move flush release work outside kfd_process_mutex. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1067afdb456e..2715ca53e9da 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -838,6 +838,14 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) return ERR_PTR(-EINVAL); } + /* If the process just called exec(3), it is possible that the + * cleanup of the kfd_process (following the release of the mm + * of the old process image) is still in the cleanup work queue. + * Make sure to drain any job before trying to recreate any + * resource for this process. + */ + flush_workqueue(kfd_process_wq); + /* * take kfd processes mutex before starting of process creation * so there won't be a case where two threads of the same process @@ -858,14 +866,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) if (process) { pr_debug("Process already found\n"); } else { - /* If the process just called exec(3), it is possible that the - * cleanup of the kfd_process (following the release of the mm - * of the old process image) is still in the cleanup work queue. - * Make sure to drain any job before trying to recreate any - * resource for this process. - */ - flush_workqueue(kfd_process_wq); - process = create_process(thread); if (IS_ERR(process)) goto out; -- 2.51.0 From f0b4440cdc1807bb6ec3dce0d6de81170803569b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 6 Feb 2025 17:50:13 -0500 Subject: [PATCH 09/16] drm/amdkfd: Fix mode1 reset crash issue If HW scheduler hangs and mode1 reset is used to recover GPU, KFD signal user space to abort the processes. After process abort exit, user queues still use the GPU to access system memory before h/w is reset while KFD cleanup worker free system memory and free VRAM. There is use-after-free race bug that KFD allocate and reuse the freed system memory, and user queue write to the same system memory to corrupt the data structure and cause driver crash. To fix this race, KFD cleanup worker terminate user queues, then flush reset_domain wq to wait for any GPU ongoing reset complete, and then free outstanding BOs. Signed-off-by: Philip Yang Reviewed-by: Lijo Lazar Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 2715ca53e9da..7c0c24732481 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,6 +35,7 @@ #include #include "amdgpu_amdkfd.h" #include "amdgpu.h" +#include "amdgpu_reset.h" struct mm_struct; @@ -1133,6 +1134,17 @@ static void kfd_process_remove_sysfs(struct kfd_process *p) p->kobj = NULL; } +/* + * If any GPU is ongoing reset, wait for reset complete. + */ +static void kfd_process_wait_gpu_reset_complete(struct kfd_process *p) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) + flush_workqueue(p->pdds[i]->dev->adev->reset_domain->wq); +} + /* No process locking is needed in this function, because the process * is not findable any more. We must assume that no other thread is * using it any more, otherwise we couldn't safely free the process @@ -1147,6 +1159,11 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); + /* + * If GPU in reset, user queues may still running, wait for reset complete. + */ + kfd_process_wait_gpu_reset_complete(p); + /* Signal the eviction fence after user mode queues are * destroyed. This allows any BOs to be freed without * triggering pointless evictions or waiting for fences. -- 2.51.0 From 0f3fda3117507e22e0c8bfe1849ea483a6e1d793 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 26 Feb 2025 07:10:38 +0530 Subject: [PATCH 10/16] drm/amdgpu: Fix parameter annotations for VCN clock gating functions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The previous references to a non-existent `adev` parameter have been removed & corrected to reflect the use of the `vinst` pointer, which points to the VCN instance structure, in the below files: - vcn_v1_0.c - vcn_v2_0.c - vcn_v3_0.c Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:624: warning: Function parameter or struct member 'vinst' not described in 'vcn_v1_0_enable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c:624: warning: Excess function parameter 'adev' description in 'vcn_v1_0_enable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c:376: warning: Function parameter or struct member 'vinst' not described in 'vcn_v2_0_mc_resume' drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c:376: warning: Excess function parameter 'adev' description in 'vcn_v2_0_mc_resume' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Function parameter or struct member 'vinst' not described in 'vcn_v3_0_disable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Excess function parameter 'adev' description in 'vcn_v3_0_disable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:776: warning: Excess function parameter 'inst' description in 'vcn_v3_0_disable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Function parameter or struct member 'vinst' not described in 'vcn_v3_0_enable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Excess function parameter 'adev' description in 'vcn_v3_0_enable_clock_gating' drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c:965: warning: Excess function parameter 'inst' description in 'vcn_v3_0_enable_clock_gating' Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 8bad63282de4..21b57c29bf7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -616,7 +616,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) /** * vcn_v1_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: Pointer to the VCN instance structure * * Enable clock gating for VCN block */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index f53feb60772e..8e7a36f26e9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -368,7 +368,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v2_0_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer + * @vinst: Pointer to the VCN instance structure * * Let the VCN memory controller know it's offsets */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index a3f16fd69927..22ae1939476f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -767,8 +767,7 @@ static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) /** * vcn_v3_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: Pointer to the VCN instance structure * * Disable clock gating for VCN block */ @@ -956,8 +955,7 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, /** * vcn_v3_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: Pointer to the VCN instance structure * * Enable clock gating for VCN block */ -- 2.51.0 From 7919b4cad5545ed93778f11881ceee72e4dbed66 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 20 Feb 2025 16:02:13 -0500 Subject: [PATCH 11/16] drm/amdkfd: Fix pqm_destroy_queue race with GPU reset If GPU in reset, destroy_queue return -EIO, pqm_destroy_queue should delete the queue from process_queue_list and free the resource. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 93647ee27325..662c595ce783 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -548,7 +548,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", pdd->pasid, pqn->q->properties.queue_id, retval); - if (retval != -ETIME) + if (retval != -ETIME && retval != -EIO) goto err_destroy_queue; } kfd_procfs_del_queue(pqn->q); -- 2.51.0 From fe9d0061c413f8fb8c529b18b592b04170850ded Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 10 Feb 2025 09:42:31 -0500 Subject: [PATCH 12/16] drm/amdkfd: debugfs hang_hws skip GPU with MES debugfs hang_hws is used by GPU reset test with HWS, for MES this crash the kernel with NULL pointer access because dqm->packet_mgr is not setup for MES path. Skip GPU with MES for now, MES hang_hws debugfs interface will be supported later. Signed-off-by: Philip Yang Reviewed-by: Kent Russell Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 095e73790dc4..b9c82be6ce13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1604,6 +1604,11 @@ int kfd_debugfs_hang_hws(struct kfd_node *dev) return -EINVAL; } + if (dev->kfd->shared_resources.enable_mes) { + dev_err(dev->adev->dev, "Inducing MES hang is not supported\n"); + return -EINVAL; + } + return dqm_debugfs_hang_hws(dev->dqm); } -- 2.51.0 From 7d83c129a8d7df23334d4a35bca9090a26b0a118 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 26 Feb 2025 07:20:51 +0530 Subject: [PATCH 13/16] drm/amdgpu: Fix parameter annotation in vcn_v5_0_0_is_idle MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Update parameter description in the vcn_v5_0_0_is_idle function Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c:1231: warning: Function parameter or struct member 'ip_block' not described in 'vcn_v5_0_0_is_idle' drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c:1231: warning: Excess function parameter 'handle' description in 'vcn_v5_0_0_is_idle' Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index e07b500235b5..d99d05f42f1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1223,7 +1223,7 @@ static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) /** * vcn_v5_0_0_is_idle - check VCN block is idle * - * @handle: amdgpu_device pointer + * @ip_block: Pointer to the amdgpu_ip_block structure * * Check whether VCN block is idle */ -- 2.51.0 From ce43abd7ec9464cf954f90e1c69e11768b02fa0a Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 27 Feb 2025 00:05:04 -0500 Subject: [PATCH 14/16] drm/amdgpu: fix spelling typos Found some typos while exploring amdgpu code. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 3 ++- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c6e5c50a3322..4eefa17fa39b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -269,7 +269,7 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc * @mc: memory controller structure holding memory information * @gart_placement: GART placement policy with respect to VRAM * - * Function will place try to place GART before or after VRAM. + * Function will try to place GART before or after VRAM. * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails. */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 6954848d34d4..5dbaebb592b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring, } /** - * uvd_v3_1_ring_emit_fence - emit an fence & trap command + * uvd_v3_1_ring_emit_fence - emit a fence & trap command * * @ring: amdgpu_ring pointer * @addr: address @@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); @@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev) /* Set the write pointer delay */ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0); - /* programm the 4GB memory segment for rptr and ring buffer */ + /* Program the 4GB memory segment for rptr and ring buffer */ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | (0x7 << 16) | (0x1 << 31)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index db6644e3907c..4b96fd583772 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -302,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32(mmUVD_VCPU_CNTL, 1 << 9); - /* disable interupt */ + /* disable interrupt */ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); #ifdef __BIG_ENDIAN @@ -312,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) #endif WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl); + /* initialize UVD memory controller */ WREG32(mmUVD_LMI_CTRL, 0x203108); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index a68fef65219d..8c8c02606d25 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -280,7 +280,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) if (vce_v2_0_lmi_clean(adev)) { - DRM_INFO("vce is not idle \n"); + DRM_INFO("VCE is not idle \n"); return 0; } -- 2.51.0 From edbf0f302dce5882424d37e460ad8c0f76126115 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 27 Feb 2025 00:05:05 -0500 Subject: [PATCH 15/16] drm/radeon: fix spelling typos Found some typos while exploring radeon code. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 6 +++--- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- drivers/gpu/drm/radeon/si.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 6f071e61f764..bbd39348a7ab 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -530,7 +530,7 @@ int radeon_wb_init(struct radeon_device *rdev) * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM * - * Function will place try to place VRAM at base address provided + * Function will try to place VRAM at base address provided * as parameter (which is so far either PCI aperture address or * for IGP TOM base address). * @@ -557,7 +557,7 @@ int radeon_wb_init(struct radeon_device *rdev) * * Note 3: when limiting vram it's safe to overwritte real_vram_size because * we are not in case where real_vram_size is inferior to mc_vram_size (ie - * note afected by bogus hw of Novell bug 204882 + along with lots of ubuntu + * not affected by bogus hw of Novell bug 204882 + along with lots of ubuntu * ones) * * Note 4: IGP TOM addr should be the same as the aperture addr, we don't @@ -594,7 +594,7 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 * @rdev: radeon device structure holding all necessary informations * @mc: memory controller structure holding memory informations * - * Function will place try to place GTT before or after VRAM. + * Function will try to place GTT before or after VRAM. * * If GTT size is bigger than space left then we ajust GTT size. * Thus function will never fails. diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index daff61586be5..8ff4f18b51a9 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -840,7 +840,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring); rdev->fence_drv[ring].initialized = true; - dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx\n", + dev_info(rdev->dev, "fence driver on ring %d uses gpu addr 0x%016llx\n", ring, rdev->fence_drv[ring].gpu_addr); return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 6c95575ce109..26197aceb001 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6198,7 +6198,7 @@ static inline u32 si_get_ih_wptr(struct radeon_device *rdev) if (wptr & RB_OVERFLOW) { wptr &= ~RB_OVERFLOW; - /* When a ring buffer overflow happen start parsing interrupt + /* When a ring buffer overflow happens, start parsing interrupts * from the last not overwritten vector (wptr + 16). Hopefully * this should allow us to catchup. */ -- 2.51.0 From 899634a57abcbdd62367db6194623b13372d9da8 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 27 Feb 2025 00:05:06 -0500 Subject: [PATCH 16/16] drm/amdgpu: fix spelling typos in SI Fix typos Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 026e8376e2c0..f90e07375396 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -909,7 +909,7 @@ static const u32 hainan_mgcg_cgcg_init[] = /* XXX: update when we support VCE */ #if 0 -/* tahiti, pitcarin, verde */ +/* tahiti, pitcairn, verde */ static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] = { { @@ -940,7 +940,7 @@ static const struct amdgpu_video_codecs hainan_video_codecs_encode = .codec_array = NULL, }; -/* tahiti, pitcarin, verde, oland */ +/* tahiti, pitcairn, verde, oland */ static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] = { { @@ -1888,7 +1888,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev) WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { - DRM_ERROR("Timeout setting UVD clocks!\n"); + DRM_ERROR("Timeout setting VCE clocks!\n"); return -ETIMEDOUT; } -- 2.51.0