]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/amd/amdkfd: add/remove kfd queues on start/stop KFD scheduling
authorShaoyun Liu <shaoyun.liu@amd.com>
Fri, 4 Oct 2024 16:00:36 +0000 (12:00 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 11 Nov 2024 17:22:39 +0000 (12:22 -0500)
Add back kfd queues in start scheduling that originally been
removed on stop scheduling.

Signed-off-by: Shaoyun Liu <shaoyun.liu@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 38c19dc8311dcf74e13e98f75ba33e30da12ed54..c79fe9069e220e46e9508785972458c8499393b7 100644 (file)
@@ -202,6 +202,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        int r, queue_type;
        uint64_t wptr_addr_off;
 
+       if (!dqm->sched_running || dqm->sched_halt)
+               return 0;
        if (!down_read_trylock(&adev->reset_domain->sem))
                return -EIO;
 
@@ -270,6 +272,8 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        int r;
        struct mes_remove_queue_input queue_input;
 
+       if (!dqm->sched_running || dqm->sched_halt)
+               return 0;
        if (!down_read_trylock(&adev->reset_domain->sem))
                return -EIO;
 
@@ -292,7 +296,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        return r;
 }
 
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
 {
        struct device_process_node *cur;
        struct device *dev = dqm->dev->adev->dev;
@@ -319,6 +323,33 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
        return retval;
 }
 
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+       struct device_process_node *cur;
+       struct device *dev = dqm->dev->adev->dev;
+       struct qcm_process_device *qpd;
+       struct queue *q;
+       int retval = 0;
+
+       list_for_each_entry(cur, &dqm->queues, list) {
+               qpd = cur->qpd;
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       if (!q->properties.is_active)
+                               continue;
+                       retval = add_queue_mes(dqm, q, qpd);
+                       if (retval) {
+                               dev_err(dev, "%s: Failed to add queue %d for dev %d",
+                                       __func__,
+                                       q->properties.queue_id,
+                                       dqm->dev->id);
+                               return retval;
+                       }
+               }
+       }
+
+       return retval;
+}
+
 static int suspend_all_queues_mes(struct device_queue_manager *dqm)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
@@ -1742,7 +1773,7 @@ static int halt_cpsch(struct device_queue_manager *dqm)
                                                 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
                                USE_DEFAULT_GRACE_PERIOD, false);
                else
-                       ret = remove_all_queues_mes(dqm);
+                       ret = remove_all_kfd_queues_mes(dqm);
        }
        dqm->sched_halt = true;
        dqm_unlock(dqm);
@@ -1768,6 +1799,9 @@ static int unhalt_cpsch(struct device_queue_manager *dqm)
                ret = execute_queues_cpsch(dqm,
                                           KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
                        0, USE_DEFAULT_GRACE_PERIOD);
+       else
+               ret = add_all_kfd_queues_mes(dqm);
+
        dqm_unlock(dqm);
 
        return ret;
@@ -1867,7 +1901,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
        if (!dqm->dev->kfd->shared_resources.enable_mes)
                unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
        else
-               remove_all_queues_mes(dqm);
+               remove_all_kfd_queues_mes(dqm);
 
        dqm->sched_running = false;