]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/amdkfd: fix missed queue reset on queue destroy
authorJonathan Kim <Jonathan.Kim@amd.com>
Thu, 22 Aug 2024 14:44:39 +0000 (10:44 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 6 Sep 2024 21:55:05 +0000 (17:55 -0400)
If a queue is being destroyed but causes a HWS hang on removal, the KFD
may issue an unnecessary gpu reset if the destroyed queue can be fixed
by a queue reset.

This is because the queue has been removed from the KFD's queue list
prior to the preemption action on destroy so the reset call will fail to
match the HQD PQ reset information against the KFD's queue record to do
the actual reset.

To fix this, deactivate the queue prior to preemption since it's being
destroyed anyways and remove the queue from the KFD's queue list after
preemption.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 577d121cc6d1a1a8af76aaafcde806e09fb04e44..71b465f8d83ee64bed6e1739dd4682117acef329 100644 (file)
@@ -2407,10 +2407,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                pdd->sdma_past_activity_counter += sdma_val;
        }
 
-       list_del(&q->list);
-       qpd->queue_count--;
        if (q->properties.is_active) {
                decrement_queue_count(dqm, qpd, q);
+               q->properties.is_active = false;
                if (!dqm->dev->kfd->shared_resources.enable_mes) {
                        retval = execute_queues_cpsch(dqm,
                                                      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@@ -2421,6 +2420,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                        retval = remove_queue_mes(dqm, q, qpd);
                }
        }
+       list_del(&q->list);
+       qpd->queue_count--;
 
        /*
         * Unconditionally decrement this counter, regardless of the queue's