return retval;
 }
 
+/* suspend_single_queue does not lock the dqm like the
+ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int suspend_single_queue(struct device_queue_manager *dqm,
+                                     struct kfd_process_device *pdd,
+                                     struct queue *q)
+{
+       bool is_new;
+
+       if (q->properties.is_suspended)
+               return 0;
+
+       pr_debug("Suspending PASID %u queue [%i]\n",
+                       pdd->process->pasid,
+                       q->properties.queue_id);
+
+       is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
+
+       if (is_new || q->properties.is_being_destroyed) {
+               pr_debug("Suspend: skip %s queue id %i\n",
+                               is_new ? "new" : "destroyed",
+                               q->properties.queue_id);
+               return -EBUSY;
+       }
+
+       q->properties.is_suspended = true;
+       if (q->properties.is_active) {
+               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                       int r = remove_queue_mes(dqm, q, &pdd->qpd);
+
+                       if (r)
+                               return r;
+               }
+
+               decrement_queue_count(dqm, &pdd->qpd, q);
+               q->properties.is_active = false;
+       }
+
+       return 0;
+}
+
+/* resume_single_queue does not lock the dqm like the functions
+ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int resume_single_queue(struct device_queue_manager *dqm,
+                                     struct qcm_process_device *qpd,
+                                     struct queue *q)
+{
+       struct kfd_process_device *pdd;
+
+       if (!q->properties.is_suspended)
+               return 0;
+
+       pdd = qpd_to_pdd(qpd);
+
+       pr_debug("Restoring from suspend PASID %u queue [%i]\n",
+                           pdd->process->pasid,
+                           q->properties.queue_id);
+
+       q->properties.is_suspended = false;
+
+       if (QUEUE_IS_ACTIVE(q->properties)) {
+               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                       int r = add_queue_mes(dqm, q, &pdd->qpd);
+
+                       if (r)
+                               return r;
+               }
+
+               q->properties.is_active = true;
+               increment_queue_count(dqm, qpd, q);
+       }
+
+       return 0;
+}
+
 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
        return map_queues_cpsch(dqm);
 }
 
+static int wait_on_destroy_queue(struct device_queue_manager *dqm,
+                                struct queue *q)
+{
+       struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
+                                                               q->process);
+       int ret = 0;
+
+       if (pdd->qpd.is_debug)
+               return ret;
+
+       q->properties.is_being_destroyed = true;
+
+       if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
+               dqm_unlock(dqm);
+               mutex_unlock(&q->process->mutex);
+               ret = wait_event_interruptible(dqm->destroy_wait,
+                                               !q->properties.is_suspended);
+
+               mutex_lock(&q->process->mutex);
+               dqm_lock(dqm);
+       }
+
+       return ret;
+}
+
 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                                struct qcm_process_device *qpd,
                                struct queue *q)
                                q->properties.queue_id);
        }
 
-       retval = 0;
-
        /* remove queue from list to prevent rescheduling after preemption */
        dqm_lock(dqm);
 
+       retval = wait_on_destroy_queue(dqm, q);
+
+       if (retval) {
+               dqm_unlock(dqm);
+               return retval;
+       }
+
        if (qpd->is_debug) {
                /*
                 * error, currently we do not allow to destroy a queue
 
        dqm_unlock(dqm);
 
-       /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
+       /*
+        * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
+        * circular locking
+        */
+       kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
+                               qpd->pqm->process, q->device,
+                               -1, false, NULL, 0);
+
        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 
        return retval;
                goto out_free;
        }
 
-       if (!dqm->ops.initialize(dqm))
+       if (!dqm->ops.initialize(dqm)) {
+               init_waitqueue_head(&dqm->destroy_wait);
                return dqm;
+       }
 
 out_free:
        kfree(dqm);
        return r;
 }
 
+#define QUEUE_NOT_FOUND                -1
+/* invalidate queue operation in array */
+static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
+{
+       int i;
+
+       for (i = 0; i < num_queues; i++)
+               queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
+}
+
+/* find queue index in array */
+static int q_array_get_index(unsigned int queue_id,
+               uint32_t num_queues,
+               uint32_t *queue_ids)
+{
+       int i;
+
+       for (i = 0; i < num_queues; i++)
+               if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
+                       return i;
+
+       return QUEUE_NOT_FOUND;
+}
+
+struct copy_context_work_handler_workarea {
+       struct work_struct copy_context_work;
+       struct kfd_process *p;
+};
+
+static void copy_context_work_handler (struct work_struct *work)
+{
+       struct copy_context_work_handler_workarea *workarea;
+       struct mqd_manager *mqd_mgr;
+       struct queue *q;
+       struct mm_struct *mm;
+       struct kfd_process *p;
+       uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+       int i;
+
+       workarea = container_of(work,
+                       struct copy_context_work_handler_workarea,
+                       copy_context_work);
+
+       p = workarea->p;
+       mm = get_task_mm(p->lead_thread);
+
+       if (!mm)
+               return;
+
+       kthread_use_mm(mm);
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+                       /* We ignore the return value from get_wave_state
+                        * because
+                        * i) right now, it always returns 0, and
+                        * ii) if we hit an error, we would continue to the
+                        *      next queue anyway.
+                        */
+                       mqd_mgr->get_wave_state(mqd_mgr,
+                                       q->mqd,
+                                       &q->properties,
+                                       (void __user *) q->properties.ctx_save_restore_area_address,
+                                       &tmp_ctl_stack_used_size,
+                                       &tmp_save_area_used_size);
+               }
+       }
+       kthread_unuse_mm(mm);
+       mmput(mm);
+}
+
+static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
+{
+       size_t array_size = num_queues * sizeof(uint32_t);
+       uint32_t *queue_ids = NULL;
+
+       if (!usr_queue_id_array)
+               return NULL;
+
+       queue_ids = kzalloc(array_size, GFP_KERNEL);
+       if (!queue_ids)
+               return ERR_PTR(-ENOMEM);
+
+       if (copy_from_user(queue_ids, usr_queue_id_array, array_size))
+               return ERR_PTR(-EFAULT);
+
+       return queue_ids;
+}
+
+int resume_queues(struct kfd_process *p,
+               uint32_t num_queues,
+               uint32_t *usr_queue_id_array)
+{
+       uint32_t *queue_ids = NULL;
+       int total_resumed = 0;
+       int i;
+
+       if (usr_queue_id_array) {
+               queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+
+               if (IS_ERR(queue_ids))
+                       return PTR_ERR(queue_ids);
+
+               /* mask all queues as invalid.  unmask per successful request */
+               q_array_invalidate(num_queues, queue_ids);
+       }
+
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+               struct queue *q;
+               int r, per_device_resumed = 0;
+
+               dqm_lock(dqm);
+
+               /* unmask queues that resume or already resumed as valid */
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = QUEUE_NOT_FOUND;
+
+                       if (queue_ids)
+                               q_idx = q_array_get_index(
+                                               q->properties.queue_id,
+                                               num_queues,
+                                               queue_ids);
+
+                       if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
+                               int err = resume_single_queue(dqm, &pdd->qpd, q);
+
+                               if (queue_ids) {
+                                       if (!err) {
+                                               queue_ids[q_idx] &=
+                                                       ~KFD_DBG_QUEUE_INVALID_MASK;
+                                       } else {
+                                               queue_ids[q_idx] |=
+                                                       KFD_DBG_QUEUE_ERROR_MASK;
+                                               break;
+                                       }
+                               }
+
+                               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                                       wake_up_all(&dqm->destroy_wait);
+                                       if (!err)
+                                               total_resumed++;
+                               } else {
+                                       per_device_resumed++;
+                               }
+                       }
+               }
+
+               if (!per_device_resumed) {
+                       dqm_unlock(dqm);
+                       continue;
+               }
+
+               r = execute_queues_cpsch(dqm,
+                                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+                                       0,
+                                       USE_DEFAULT_GRACE_PERIOD);
+               if (r) {
+                       pr_err("Failed to resume process queues\n");
+                       if (queue_ids) {
+                               list_for_each_entry(q, &qpd->queues_list, list) {
+                                       int q_idx = q_array_get_index(
+                                                       q->properties.queue_id,
+                                                       num_queues,
+                                                       queue_ids);
+
+                                       /* mask queue as error on resume fail */
+                                       if (q_idx != QUEUE_NOT_FOUND)
+                                               queue_ids[q_idx] |=
+                                                       KFD_DBG_QUEUE_ERROR_MASK;
+                               }
+                       }
+               } else {
+                       wake_up_all(&dqm->destroy_wait);
+                       total_resumed += per_device_resumed;
+               }
+
+               dqm_unlock(dqm);
+       }
+
+       if (queue_ids) {
+               if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+                               num_queues * sizeof(uint32_t)))
+                       pr_err("copy_to_user failed on queue resume\n");
+
+               kfree(queue_ids);
+       }
+
+       return total_resumed;
+}
+
+int suspend_queues(struct kfd_process *p,
+                       uint32_t num_queues,
+                       uint32_t grace_period,
+                       uint64_t exception_clear_mask,
+                       uint32_t *usr_queue_id_array)
+{
+       uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+       int total_suspended = 0;
+       int i;
+
+       if (IS_ERR(queue_ids))
+               return PTR_ERR(queue_ids);
+
+       /* mask all queues as invalid.  umask on successful request */
+       q_array_invalidate(num_queues, queue_ids);
+
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+               struct queue *q;
+               int r, per_device_suspended = 0;
+
+               mutex_lock(&p->event_mutex);
+               dqm_lock(dqm);
+
+               /* unmask queues that suspend or already suspended */
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = q_array_get_index(q->properties.queue_id,
+                                                       num_queues,
+                                                       queue_ids);
+
+                       if (q_idx != QUEUE_NOT_FOUND) {
+                               int err = suspend_single_queue(dqm, pdd, q);
+                               bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
+
+                               if (!err) {
+                                       queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
+                                       if (exception_clear_mask && is_mes)
+                                               q->properties.exception_status &=
+                                                       ~exception_clear_mask;
+
+                                       if (is_mes)
+                                               total_suspended++;
+                                       else
+                                               per_device_suspended++;
+                               } else if (err != -EBUSY) {
+                                       r = err;
+                                       queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+                                       break;
+                               }
+                       }
+               }
+
+               if (!per_device_suspended) {
+                       dqm_unlock(dqm);
+                       mutex_unlock(&p->event_mutex);
+                       if (total_suspended)
+                               amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
+                       continue;
+               }
+
+               r = execute_queues_cpsch(dqm,
+                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+                       grace_period);
+
+               if (r)
+                       pr_err("Failed to suspend process queues.\n");
+               else
+                       total_suspended += per_device_suspended;
+
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = q_array_get_index(q->properties.queue_id,
+                                               num_queues, queue_ids);
+
+                       if (q_idx == QUEUE_NOT_FOUND)
+                               continue;
+
+                       /* mask queue as error on suspend fail */
+                       if (r)
+                               queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+                       else if (exception_clear_mask)
+                               q->properties.exception_status &=
+                                                       ~exception_clear_mask;
+               }
+
+               dqm_unlock(dqm);
+               mutex_unlock(&p->event_mutex);
+               amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
+       }
+
+       if (total_suspended) {
+               struct copy_context_work_handler_workarea copy_context_worker;
+
+               INIT_WORK_ONSTACK(
+                               ©_context_worker.copy_context_work,
+                               copy_context_work_handler);
+
+               copy_context_worker.p = p;
+
+               schedule_work(©_context_worker.copy_context_work);
+
+
+               flush_work(©_context_worker.copy_context_work);
+               destroy_work_on_stack(©_context_worker.copy_context_work);
+       }
+
+       if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+                       num_queues * sizeof(uint32_t)))
+               pr_err("copy_to_user failed on queue suspend\n");
+
+       kfree(queue_ids);
+
+       return total_suspended;
+}
+
 int debug_lock_and_unmap(struct device_queue_manager *dqm)
 {
        int r;