]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: add return value for convert_ras_err_addr
authorTao Zhou <tao.zhou1@amd.com>
Fri, 18 Oct 2024 08:43:44 +0000 (16:43 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Dec 2024 15:26:46 +0000 (10:26 -0500)
So upper layer can return failure directly if address conversion fails.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c

index 5dd011a73ace05333b51aa9171199abfda17d3da..64e1a3406c99e77e001ecb6f1468ee34deb947f7 100644 (file)
@@ -461,11 +461,14 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
 
        addr_out.pa.pa = pa_addr;
 
-       if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
-               adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
+       if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+               ret = adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL,
                                &addr_out, false);
-       else
+               if (ret)
+                       goto out;
+       } else {
                goto out;
+       }
 
        for (i = 0; i < adev->umc.retire_unit; i++) {
                if (pos >= len)
@@ -488,6 +491,7 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
 {
        struct ta_ras_query_address_input addr_in;
        struct ta_ras_query_address_output addr_out;
+       int ret;
 
        memset(&addr_in, 0, sizeof(addr_in));
        addr_in.ma.err_addr = err_addr;
@@ -496,11 +500,14 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev,
        addr_in.ma.node_inst = node;
        addr_in.ma.socket_id = socket;
 
-       if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr)
-               adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
+       if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) {
+               ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in,
                                &addr_out, dump_addr);
-       else
+               if (ret)
+                       return ret;
+       } else {
                return 0;
+       }
 
        *addr = addr_out.pa.pa;
 
index abde7597bda8972835c316fbfbc7363a714f80d2..f45408a6ff03fd52d09de16a32367e1511a1ef50 100644 (file)
@@ -70,7 +70,7 @@ struct amdgpu_umc_ras {
                        enum amdgpu_mca_error_type type, void *ras_error_status);
        int (*update_ecc_status)(struct amdgpu_device *adev,
                        uint64_t status, uint64_t ipid, uint64_t addr);
-       void (*convert_ras_err_addr)(struct amdgpu_device *adev,
+       int (*convert_ras_err_addr)(struct amdgpu_device *adev,
                        struct ras_err_data *err_data,
                        struct ta_ras_query_address_input *addr_in,
                        struct ta_ras_query_address_output *addr_out,
index 9b93ff769b86d34d3c3aa6567ae4ae7a97125d38..ce60fd6675cedb4ddb58241218a71088e8a26e72 100644 (file)
@@ -173,7 +173,7 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
        umc_v12_0_reset_error_count(adev);
 }
 
-static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+static int umc_v12_0_convert_error_address(struct amdgpu_device *adev,
                                        struct ras_err_data *err_data,
                                        struct ta_ras_query_address_input *addr_in,
                                        struct ta_ras_query_address_output *addr_out,
@@ -183,6 +183,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
        uint64_t soc_pa, retired_page, column, err_addr;
        struct ta_ras_query_address_output addr_out_tmp;
        struct ta_ras_query_address_output *paddr_out;
+       int ret = 0;
 
        if (!addr_out)
                paddr_out = &addr_out_tmp;
@@ -193,11 +194,12 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
        if (addr_in) {
                err_addr = addr_in->ma.err_addr;
                addr_in->addr_type = TA_RAS_MCA_TO_PA;
-               if (psp_ras_query_address(&adev->psp, addr_in, paddr_out)) {
+               ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out);
+               if (ret) {
                        dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
                                err_addr);
 
-                       return;
+                       return ret;
                }
 
                bank = paddr_out->pa.bank;
@@ -209,7 +211,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
        soc_pa = paddr_out->pa.pa;
 
        if (!err_data && !dump_addr)
-               return;
+               return ret;
 
        col = (err_addr >> 1) & 0x1fULL;
        /* clear [C3 C2] in soc physical address */
@@ -241,6 +243,8 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
                        amdgpu_umc_fill_error_record(err_data, err_addr,
                                retired_page, channel_index, umc_inst);
        }
+
+       return ret;
 }
 
 static int umc_v12_0_query_error_address(struct amdgpu_device *adev,