]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: Workaround to skip kiq ring test during ras gpu recovery
authorStanley.Yang <Stanley.Yang@amd.com>
Tue, 17 Oct 2023 13:49:09 +0000 (21:49 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 19 Oct 2023 22:26:52 +0000 (18:26 -0400)
This is workaround, kiq ring test failed in suspend stage when do ras
recovery.

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index 9a158018ae165230e5bd54248599460e55693b9e..c92e0aba69e157a4c903d40aa4e35374e4283b5c 100644 (file)
@@ -29,6 +29,7 @@
 #include "amdgpu_rlc.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_xcp.h"
+#include "amdgpu_xgmi.h"
 
 /* delay 0.1 second to enable gfx off feature */
 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
@@ -501,6 +502,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
 {
        struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
        struct amdgpu_ring *kiq_ring = &kiq->ring;
+       struct amdgpu_hive_info *hive;
+       struct amdgpu_ras *ras;
+       int hive_ras_recovery = 0;
        int i, r = 0;
        int j;
 
@@ -521,6 +525,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
                                           RESET_QUEUES, 0, 0);
        }
 
+       /**
+        * This is workaround: only skip kiq_ring test
+        * during ras recovery in suspend stage for gfx9.4.3
+        */
+       hive = amdgpu_get_xgmi_hive(adev);
+       if (hive) {
+               hive_ras_recovery = atomic_read(&hive->ras_recovery);
+               amdgpu_put_xgmi_hive(hive);
+       }
+
+       ras = amdgpu_ras_get_context(adev);
+       if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
+               ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
+               spin_unlock(&kiq->ring_lock);
+               return 0;
+       }
+
        if (kiq_ring->sched.ready && !adev->job_hang)
                r = amdgpu_ring_test_helper(kiq_ring);
        spin_unlock(&kiq->ring_lock);