]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: bypass RAS error reset in some conditions
authorTao Zhou <tao.zhou1@amd.com>
Thu, 12 Oct 2023 06:33:37 +0000 (14:33 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 26 Oct 2023 22:41:22 +0000 (18:41 -0400)
PMFW is responsible for RAS error reset in some conditions, driver can
skip the operation.

v2: add check for ras->in_recovery, it's set earlier than
amdgpu_in_reset.

v3: fix error in gpu reset check.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index f4c2c737b12f5448d56dfacec9a315fcf6bac59d..303fbb6a48b66c073dbb4f3a9b0f911c4a939208 100644 (file)
@@ -1220,6 +1220,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
                enum amdgpu_ras_block block)
 {
        struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0);
+       struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+       const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
 
        if (!block_obj || !block_obj->hw_ops) {
                dev_dbg_once(adev->dev, "%s doesn't config RAS function\n",
@@ -1227,7 +1229,13 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev,
                return -EOPNOTSUPP;
        }
 
-       if (!amdgpu_ras_is_supported(adev, block))
+       /* skip ras error reset in gpu reset */
+       if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) &&
+           mca_funcs && mca_funcs->mca_set_debug_mode)
+               return -EOPNOTSUPP;
+
+       if (!amdgpu_ras_is_supported(adev, block) ||
+           !amdgpu_ras_get_mca_debug_mode(adev))
                return -EOPNOTSUPP;
 
        if (block_obj->hw_ops->reset_ras_error_count)