]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/amdgpu: Fix for GPU reset being blocked by KIQ I/O.
authorHeng Zhou <Heng.Zhou@amd.com>
Fri, 26 Sep 2025 03:07:44 +0000 (11:07 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 7 Oct 2025 18:09:06 +0000 (14:09 -0400)
There is some probability that reset workqueue is blocked by KIQ I/O for 10+ seconds after gpu hangs.
So we need to add a in_reset check during each KIQ register poll.

Signed-off-by: Heng Zhou <Heng.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index a09ccf7d8aa2fe7eb51a988a610fc3b7482ec32d..ebe2b4c68b0f363348b8149a6c95b4fa477c6423 100644 (file)
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
 
        might_sleep();
        while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+               if (amdgpu_in_reset(adev))
+                       goto failed_kiq_read;
+
                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
        }
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
 
        might_sleep();
        while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+               if (amdgpu_in_reset(adev))
+                       goto failed_kiq_write;
 
                msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
                r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);