]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/amdkfd: fix suspend/resume all calls in mes based eviction path
authorJonathan Kim <jonathan.kim@amd.com>
Wed, 18 Jun 2025 14:31:15 +0000 (10:31 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 13 Oct 2025 18:14:28 +0000 (14:14 -0400)
Suspend/resume all gangs should be done with the device lock is held.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harish Kasiviswanathan <harish.kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 6c5c7c1bf5eda25ec15cdae291050e265594fb20..6e7bc983fc0b681ca621a82d2848b1ba32568980 100644 (file)
@@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
        pr_debug_ratelimited("Evicting process pid %d queues\n",
                            pdd->process->lead_thread->pid);
 
+       if (dqm->dev->kfd->shared_resources.enable_mes) {
+               pdd->last_evict_timestamp = get_jiffies_64();
+               retval = suspend_all_queues_mes(dqm);
+               if (retval) {
+                       dev_err(dev, "Suspending all queues failed");
+                       goto out;
+               }
+       }
+
        /* Mark all queues as evicted. Deactivate all active queues on
         * the qpd.
         */
@@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
                decrement_queue_count(dqm, qpd, q);
 
                if (dqm->dev->kfd->shared_resources.enable_mes) {
-                       int err;
-
-                       err = remove_queue_mes(dqm, q, qpd);
-                       if (err) {
+                       retval = remove_queue_mes(dqm, q, qpd);
+                       if (retval) {
                                dev_err(dev, "Failed to evict queue %d\n",
                                        q->properties.queue_id);
-                               retval = err;
+                               goto out;
                        }
                }
        }
-       pdd->last_evict_timestamp = get_jiffies_64();
-       if (!dqm->dev->kfd->shared_resources.enable_mes)
+
+       if (!dqm->dev->kfd->shared_resources.enable_mes) {
+               pdd->last_evict_timestamp = get_jiffies_64();
                retval = execute_queues_cpsch(dqm,
                                              qpd->is_debug ?
                                              KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
                                              KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
                                              USE_DEFAULT_GRACE_PERIOD);
+       } else {
+               retval = resume_all_queues_mes(dqm);
+               if (retval)
+                       dev_err(dev, "Resuming all queues failed");
+       }
 
 out:
        dqm_unlock(dqm);
@@ -3098,61 +3111,17 @@ out:
        return ret;
 }
 
-static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm,
-                                  struct qcm_process_device *qpd)
-{
-       struct device *dev = dqm->dev->adev->dev;
-       int ret = 0;
-
-       /* Check if process is already evicted */
-       dqm_lock(dqm);
-       if (qpd->evicted) {
-               /* Increment the evicted count to make sure the
-                * process stays evicted before its terminated.
-                */
-               qpd->evicted++;
-               dqm_unlock(dqm);
-               goto out;
-       }
-       dqm_unlock(dqm);
-
-       ret = suspend_all_queues_mes(dqm);
-       if (ret) {
-               dev_err(dev, "Suspending all queues failed");
-               goto out;
-       }
-
-       ret = dqm->ops.evict_process_queues(dqm, qpd);
-       if (ret) {
-               dev_err(dev, "Evicting process queues failed");
-               goto out;
-       }
-
-       ret = resume_all_queues_mes(dqm);
-       if (ret)
-               dev_err(dev, "Resuming all queues failed");
-
-out:
-       return ret;
-}
-
 int kfd_evict_process_device(struct kfd_process_device *pdd)
 {
        struct device_queue_manager *dqm;
        struct kfd_process *p;
-       int ret = 0;
 
        p = pdd->process;
        dqm = pdd->dev->dqm;
 
        WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
 
-       if (dqm->dev->kfd->shared_resources.enable_mes)
-               ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
-       else
-               ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
-
-       return ret;
+       return dqm->ops.evict_process_queues(dqm, &pdd->qpd);
 }
 
 int reserve_debug_trap_vmid(struct device_queue_manager *dqm,