From: Jiadong Zhu Date: Tue, 2 Jul 2024 01:03:49 +0000 (+0800) Subject: drm/amdgpu/gfx9: wait for reset done before remap X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=fdbd69486b468e4963b4ef9f76901d3788252dd5;p=users%2Fjedix%2Flinux-maple.git drm/amdgpu/gfx9: wait for reset done before remap There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cd18c10a290d..f87e6e9c7d6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; - int r; + int i, r; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; + /* make sure dequeue is complete*/ + gfx_v9_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + soc15_grbm_select(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + gfx_v9_0_unset_safe_mode(adev, 0); + if (r) { + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + return r; + } + r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)){ - DRM_ERROR("fail to resv mqd_obj\n"); + dev_err(adev->dev, "fail to resv mqd_obj\n"); return r; } r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); @@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); - if (r){ - DRM_ERROR("fail to unresv mqd_obj\n"); + if (r) { + dev_err(adev->dev, "fail to unresv mqd_obj\n"); return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); + if (r) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); - if (r){ + if (r) { DRM_ERROR("fail to remap queue\n"); return r; }