]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/amdgpu: add command to check address validity
authorYiPeng Chai <YiPeng.Chai@amd.com>
Wed, 16 Jul 2025 03:16:20 +0000 (11:16 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 28 Jul 2025 20:40:06 +0000 (16:40 -0400)
Add command to check address validity and remove
unused command codes.

v2:
 The command interface adds new parameters to support
 multiple check address strategies.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

index a0a14370745e4ed410a01bc3a062b277dfd072b8..527ffad475638bbe982f96382588f5c98c93d2bd 100644 (file)
@@ -128,6 +128,9 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
 
 #define MAX_FLUSH_RETIRE_DWORK_TIMES  100
 
+#define BYPASS_ALLOCATED_ADDRESS        0x0
+#define BYPASS_INITIALIZATION_ADDRESS   0x1
+
 enum amdgpu_ras_retire_page_reservation {
        AMDGPU_RAS_RETIRE_PAGE_RESERVED,
        AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -207,6 +210,49 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
        return 0;
 }
 
+static int amdgpu_check_address_validity(struct amdgpu_device *adev,
+                       uint64_t address, uint64_t flags)
+{
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct amdgpu_vram_block_info blk_info;
+       uint64_t page_pfns[32] = {0};
+       int i, ret, count;
+
+       if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
+               return 0;
+
+       if ((address >= adev->gmc.mc_vram_size) ||
+           (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+               return -EFAULT;
+
+       count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
+                               address, page_pfns, ARRAY_SIZE(page_pfns));
+       if (count <= 0)
+               return -EPERM;
+
+       for (i = 0; i < count; i++) {
+               memset(&blk_info, 0, sizeof(blk_info));
+               ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
+                                       page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
+               if (!ret) {
+                       /* The input address that needs to be checked is allocated by
+                        * current calling process, so it is necessary to exclude
+                        * the calling process.
+                        */
+                       if ((flags == BYPASS_ALLOCATED_ADDRESS) &&
+                           ((blk_info.task.pid != task_pid_nr(current)) ||
+                               strncmp(blk_info.task.comm, current->comm, TASK_COMM_LEN)))
+                               return -EACCES;
+                       else if ((flags == BYPASS_INITIALIZATION_ADDRESS) &&
+                               (blk_info.task.pid == con->init_task_pid) &&
+                               !strncmp(blk_info.task.comm, con->init_task_comm, TASK_COMM_LEN))
+                               return -EACCES;
+               }
+       }
+
+       return 0;
+}
+
 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
                                        size_t size, loff_t *pos)
 {
@@ -297,6 +343,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
                op = 2;
        else if (strstr(str, "retire_page") != NULL)
                op = 3;
+       else if (strstr(str, "check_address") != NULL)
+               op = 4;
        else if (str[0] && str[1] && str[2] && str[3])
                /* ascii string, but commands are not matched. */
                return -EINVAL;
@@ -310,6 +358,15 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
                        data->op = op;
                        data->inject.address = address;
 
+                       return 0;
+               } else if (op == 4) {
+                       if (sscanf(str, "%*s 0x%llx 0x%llx", &address, &value) != 2 &&
+                           sscanf(str, "%*s %llu %llu", &address, &value) != 2)
+                               return -EINVAL;
+
+                       data->op = op;
+                       data->inject.address = address;
+                       data->inject.value = value;
                        return 0;
                }
 
@@ -500,6 +557,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f,
                        return size;
                else
                        return ret;
+       } else if (data.op == 4) {
+               ret = amdgpu_check_address_validity(adev, data.inject.address, data.inject.value);
+               return ret ? ret : size;
        }
 
        if (!amdgpu_ras_is_supported(adev, data.head.block))
@@ -4087,6 +4147,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
                        goto release_con;
        }
 
+       con->init_task_pid = task_pid_nr(current);
+       get_task_comm(con->init_task_comm, current);
+
        dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
                 "hardware ability[%x] ras_mask[%x]\n",
                 adev->ras_hw_enabled, adev->ras_enabled);
index 927d6bff734ae9160074cb5e2b2a182a7337e7e7..7f10a740216009e0d383fbf968576f06fcbe1b5a 100644 (file)
@@ -570,6 +570,9 @@ struct amdgpu_ras {
        struct ras_event_manager *event_mgr;
 
        uint64_t reserved_pages_in_bytes;
+
+       pid_t init_task_pid;
+       char init_task_comm[TASK_COMM_LEN];
 };
 
 struct ras_fs_data {