]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: Add msg handlers for SRIOV RAS Telemetry
authorVictor Skvortsov <victor.skvortsov@amd.com>
Wed, 30 Oct 2024 13:45:27 +0000 (09:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 11 Nov 2024 16:55:08 +0000 (11:55 -0500)
Add message handlers for RAS telemetry.

Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c

index b650a2032c42bd8ebbea3522056f8c1568512811..f6eee57338df82e3ad227e76a096436e76917bc9 100644 (file)
@@ -95,6 +95,7 @@ struct amdgpu_virt_ops {
        void (*ras_poison_handler)(struct amdgpu_device *adev,
                                        enum amdgpu_ras_block block);
        bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
+       int (*req_ras_err_count)(struct amdgpu_device *adev);
 };
 
 /*
index f47bd7ada4d79cd7b392583c03e056f5e570196f..4dcb72d1bdda21685e3b1488fccb203b9794f8ba 100644 (file)
@@ -61,15 +61,18 @@ static enum idh_event xgpu_nv_mailbox_peek_msg(struct amdgpu_device *adev)
 static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev,
                                   enum idh_event event)
 {
+       int r = 0;
        u32 reg;
 
        reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0);
-       if (reg != event)
+       if (reg == IDH_FAIL)
+               r = -EINVAL;
+       else if (reg != event)
                return -ENOENT;
 
        xgpu_nv_mailbox_send_ack(adev);
 
-       return 0;
+       return r;
 }
 
 static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev)
@@ -178,6 +181,9 @@ send_request:
                if (data1 != 0)
                        event = IDH_RAS_POISON_READY;
                break;
+       case IDH_REQ_RAS_ERROR_COUNT:
+               event = IDH_RAS_ERROR_COUNT_READY;
+               break;
        default:
                break;
        }
@@ -456,6 +462,11 @@ static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev)
        return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF);
 }
 
+static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev)
+{
+       return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT);
+}
+
 const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
        .req_full_gpu   = xgpu_nv_request_full_gpu_access,
        .rel_full_gpu   = xgpu_nv_release_full_gpu_access,
@@ -466,4 +477,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
        .trans_msg = xgpu_nv_mailbox_trans_msg,
        .ras_poison_handler = xgpu_nv_ras_poison_handler,
        .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
+       .req_ras_err_count = xgpu_nv_req_ras_err_count,
 };