]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/amdkfd: add debug suspend and resume process queues operation
authorJonathan Kim <jonathan.kim@amd.com>
Thu, 5 May 2022 20:15:37 +0000 (16:15 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 16:36:43 +0000 (12:36 -0400)
In order to inspect waves from the saved context at any point during a
debug session, the debugger must be able to preempt queues to trigger
context save by suspending them.

On queue suspend, the KFD will copy the context save header information
so that the debugger can correctly crawl the appropriate size of the saved
context. The debugger must then also be allowed to resume suspended queues.

A queue that is newly created cannot be suspended because queue ids are
recycled after destruction so the debugger needs to know that this has
occurred.  Query functions will be later added that will clear a given
queue of its new queue status.

A queue cannot be destroyed while it is suspended to preserve its saved
context during debugger inspection.  Have queue destruction block while
a queue is suspended and unblocked when it is resumed.  Likewise, if a
queue is about to be destroyed, it cannot be suspended.

Return the number of queues successfully suspended or resumed along with
a per queue status array where the upper bits per queue status show that
the request was invalid (new/destroyed queue suspend request, missing
queue) or an error occurred (HWS in a fatal state so it can't suspend or
resume queues).

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index 98cd52bb005fdfcc3c7e0e5ed8c71bfdb01cbdd2..b4fcad0e62f7ec23b22a1f27b3a8b352a3e0b1aa 100644 (file)
@@ -772,6 +772,11 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
        return adev->have_atomics_support;
 }
 
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev)
+{
+       amdgpu_device_flush_hdp(adev, NULL);
+}
+
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
 {
        amdgpu_umc_poison_handler(adev, reset);
index dd740e64e6e15ef86cffc89d2e2c85b09aa0cb85..2d0406bff84ecbeca3651d7e74fa3d588cfd161c 100644 (file)
@@ -322,6 +322,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
                                      uint64_t *mmap_offset);
 int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem,
                                      struct dma_buf **dmabuf);
+void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
                                struct tile_config *config);
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
index a6570b124b2b8bfd6cda5005b2a06ee4569833a1..1fae97df7a1efed73e38b5ea3cb92f4286cdbdbb 100644 (file)
@@ -410,6 +410,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
        pr_debug("Write ptr address   == 0x%016llX\n",
                        args->write_pointer_address);
 
+       kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);
        return 0;
 
 err_create_queue:
@@ -2996,7 +2997,17 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
                                args->launch_mode.launch_mode);
                break;
        case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
+               r = suspend_queues(target,
+                               args->suspend_queues.num_queues,
+                               args->suspend_queues.grace_period,
+                               args->suspend_queues.exception_mask,
+                               (uint32_t *)args->suspend_queues.queue_array_ptr);
+
+               break;
        case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
+               r = resume_queues(target, args->resume_queues.num_queues,
+                               (uint32_t *)args->resume_queues.queue_array_ptr);
+               break;
        case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
        case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
        case KFD_IOC_DBG_TRAP_SET_FLAGS:
index 53c3418562d4c7674512cc6beb0c9355eab1282a..f4d3dfb35cb393b031b3c2829ae8cff5c28afd6e 100644 (file)
@@ -339,6 +339,13 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
        }
 
        kfd_dbg_set_workaround(target, false);
+
+       if (!unwind) {
+               int resume_count = resume_queues(target, 0, NULL);
+
+               if (resume_count)
+                       pr_debug("Resumed %d queues\n", resume_count);
+       }
 }
 
 static void kfd_dbg_clean_exception_status(struct kfd_process *target)
index 44d87943e40ac64affc69d7f90cc3e66c77c81eb..bc9e81293165548481e435c9a54a6f27c5b331b7 100644 (file)
@@ -952,6 +952,92 @@ out_unlock:
        return retval;
 }
 
+/* suspend_single_queue does not lock the dqm like the
+ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int suspend_single_queue(struct device_queue_manager *dqm,
+                                     struct kfd_process_device *pdd,
+                                     struct queue *q)
+{
+       bool is_new;
+
+       if (q->properties.is_suspended)
+               return 0;
+
+       pr_debug("Suspending PASID %u queue [%i]\n",
+                       pdd->process->pasid,
+                       q->properties.queue_id);
+
+       is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
+
+       if (is_new || q->properties.is_being_destroyed) {
+               pr_debug("Suspend: skip %s queue id %i\n",
+                               is_new ? "new" : "destroyed",
+                               q->properties.queue_id);
+               return -EBUSY;
+       }
+
+       q->properties.is_suspended = true;
+       if (q->properties.is_active) {
+               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                       int r = remove_queue_mes(dqm, q, &pdd->qpd);
+
+                       if (r)
+                               return r;
+               }
+
+               decrement_queue_count(dqm, &pdd->qpd, q);
+               q->properties.is_active = false;
+       }
+
+       return 0;
+}
+
+/* resume_single_queue does not lock the dqm like the functions
+ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int resume_single_queue(struct device_queue_manager *dqm,
+                                     struct qcm_process_device *qpd,
+                                     struct queue *q)
+{
+       struct kfd_process_device *pdd;
+
+       if (!q->properties.is_suspended)
+               return 0;
+
+       pdd = qpd_to_pdd(qpd);
+
+       pr_debug("Restoring from suspend PASID %u queue [%i]\n",
+                           pdd->process->pasid,
+                           q->properties.queue_id);
+
+       q->properties.is_suspended = false;
+
+       if (QUEUE_IS_ACTIVE(q->properties)) {
+               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                       int r = add_queue_mes(dqm, q, &pdd->qpd);
+
+                       if (r)
+                               return r;
+               }
+
+               q->properties.is_active = true;
+               increment_queue_count(dqm, qpd, q);
+       }
+
+       return 0;
+}
+
 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                                        struct qcm_process_device *qpd)
 {
@@ -1926,6 +2012,31 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
        return map_queues_cpsch(dqm);
 }
 
+static int wait_on_destroy_queue(struct device_queue_manager *dqm,
+                                struct queue *q)
+{
+       struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
+                                                               q->process);
+       int ret = 0;
+
+       if (pdd->qpd.is_debug)
+               return ret;
+
+       q->properties.is_being_destroyed = true;
+
+       if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
+               dqm_unlock(dqm);
+               mutex_unlock(&q->process->mutex);
+               ret = wait_event_interruptible(dqm->destroy_wait,
+                                               !q->properties.is_suspended);
+
+               mutex_lock(&q->process->mutex);
+               dqm_lock(dqm);
+       }
+
+       return ret;
+}
+
 static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                                struct qcm_process_device *qpd,
                                struct queue *q)
@@ -1945,11 +2056,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                                q->properties.queue_id);
        }
 
-       retval = 0;
-
        /* remove queue from list to prevent rescheduling after preemption */
        dqm_lock(dqm);
 
+       retval = wait_on_destroy_queue(dqm, q);
+
+       if (retval) {
+               dqm_unlock(dqm);
+               return retval;
+       }
+
        if (qpd->is_debug) {
                /*
                 * error, currently we do not allow to destroy a queue
@@ -1996,7 +2112,14 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
        dqm_unlock(dqm);
 
-       /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
+       /*
+        * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
+        * circular locking
+        */
+       kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
+                               qpd->pqm->process, q->device,
+                               -1, false, NULL, 0);
+
        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 
        return retval;
@@ -2461,8 +2584,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
                goto out_free;
        }
 
-       if (!dqm->ops.initialize(dqm))
+       if (!dqm->ops.initialize(dqm)) {
+               init_waitqueue_head(&dqm->destroy_wait);
                return dqm;
+       }
 
 out_free:
        kfree(dqm);
@@ -2602,6 +2727,320 @@ out_unlock:
        return r;
 }
 
+#define QUEUE_NOT_FOUND                -1
+/* invalidate queue operation in array */
+static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
+{
+       int i;
+
+       for (i = 0; i < num_queues; i++)
+               queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
+}
+
+/* find queue index in array */
+static int q_array_get_index(unsigned int queue_id,
+               uint32_t num_queues,
+               uint32_t *queue_ids)
+{
+       int i;
+
+       for (i = 0; i < num_queues; i++)
+               if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
+                       return i;
+
+       return QUEUE_NOT_FOUND;
+}
+
+struct copy_context_work_handler_workarea {
+       struct work_struct copy_context_work;
+       struct kfd_process *p;
+};
+
+static void copy_context_work_handler (struct work_struct *work)
+{
+       struct copy_context_work_handler_workarea *workarea;
+       struct mqd_manager *mqd_mgr;
+       struct queue *q;
+       struct mm_struct *mm;
+       struct kfd_process *p;
+       uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+       int i;
+
+       workarea = container_of(work,
+                       struct copy_context_work_handler_workarea,
+                       copy_context_work);
+
+       p = workarea->p;
+       mm = get_task_mm(p->lead_thread);
+
+       if (!mm)
+               return;
+
+       kthread_use_mm(mm);
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+                       /* We ignore the return value from get_wave_state
+                        * because
+                        * i) right now, it always returns 0, and
+                        * ii) if we hit an error, we would continue to the
+                        *      next queue anyway.
+                        */
+                       mqd_mgr->get_wave_state(mqd_mgr,
+                                       q->mqd,
+                                       &q->properties,
+                                       (void __user *) q->properties.ctx_save_restore_area_address,
+                                       &tmp_ctl_stack_used_size,
+                                       &tmp_save_area_used_size);
+               }
+       }
+       kthread_unuse_mm(mm);
+       mmput(mm);
+}
+
+static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
+{
+       size_t array_size = num_queues * sizeof(uint32_t);
+       uint32_t *queue_ids = NULL;
+
+       if (!usr_queue_id_array)
+               return NULL;
+
+       queue_ids = kzalloc(array_size, GFP_KERNEL);
+       if (!queue_ids)
+               return ERR_PTR(-ENOMEM);
+
+       if (copy_from_user(queue_ids, usr_queue_id_array, array_size))
+               return ERR_PTR(-EFAULT);
+
+       return queue_ids;
+}
+
+int resume_queues(struct kfd_process *p,
+               uint32_t num_queues,
+               uint32_t *usr_queue_id_array)
+{
+       uint32_t *queue_ids = NULL;
+       int total_resumed = 0;
+       int i;
+
+       if (usr_queue_id_array) {
+               queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+
+               if (IS_ERR(queue_ids))
+                       return PTR_ERR(queue_ids);
+
+               /* mask all queues as invalid.  unmask per successful request */
+               q_array_invalidate(num_queues, queue_ids);
+       }
+
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+               struct queue *q;
+               int r, per_device_resumed = 0;
+
+               dqm_lock(dqm);
+
+               /* unmask queues that resume or already resumed as valid */
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = QUEUE_NOT_FOUND;
+
+                       if (queue_ids)
+                               q_idx = q_array_get_index(
+                                               q->properties.queue_id,
+                                               num_queues,
+                                               queue_ids);
+
+                       if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
+                               int err = resume_single_queue(dqm, &pdd->qpd, q);
+
+                               if (queue_ids) {
+                                       if (!err) {
+                                               queue_ids[q_idx] &=
+                                                       ~KFD_DBG_QUEUE_INVALID_MASK;
+                                       } else {
+                                               queue_ids[q_idx] |=
+                                                       KFD_DBG_QUEUE_ERROR_MASK;
+                                               break;
+                                       }
+                               }
+
+                               if (dqm->dev->kfd->shared_resources.enable_mes) {
+                                       wake_up_all(&dqm->destroy_wait);
+                                       if (!err)
+                                               total_resumed++;
+                               } else {
+                                       per_device_resumed++;
+                               }
+                       }
+               }
+
+               if (!per_device_resumed) {
+                       dqm_unlock(dqm);
+                       continue;
+               }
+
+               r = execute_queues_cpsch(dqm,
+                                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+                                       0,
+                                       USE_DEFAULT_GRACE_PERIOD);
+               if (r) {
+                       pr_err("Failed to resume process queues\n");
+                       if (queue_ids) {
+                               list_for_each_entry(q, &qpd->queues_list, list) {
+                                       int q_idx = q_array_get_index(
+                                                       q->properties.queue_id,
+                                                       num_queues,
+                                                       queue_ids);
+
+                                       /* mask queue as error on resume fail */
+                                       if (q_idx != QUEUE_NOT_FOUND)
+                                               queue_ids[q_idx] |=
+                                                       KFD_DBG_QUEUE_ERROR_MASK;
+                               }
+                       }
+               } else {
+                       wake_up_all(&dqm->destroy_wait);
+                       total_resumed += per_device_resumed;
+               }
+
+               dqm_unlock(dqm);
+       }
+
+       if (queue_ids) {
+               if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+                               num_queues * sizeof(uint32_t)))
+                       pr_err("copy_to_user failed on queue resume\n");
+
+               kfree(queue_ids);
+       }
+
+       return total_resumed;
+}
+
+int suspend_queues(struct kfd_process *p,
+                       uint32_t num_queues,
+                       uint32_t grace_period,
+                       uint64_t exception_clear_mask,
+                       uint32_t *usr_queue_id_array)
+{
+       uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+       int total_suspended = 0;
+       int i;
+
+       if (IS_ERR(queue_ids))
+               return PTR_ERR(queue_ids);
+
+       /* mask all queues as invalid.  umask on successful request */
+       q_array_invalidate(num_queues, queue_ids);
+
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+               struct device_queue_manager *dqm = pdd->dev->dqm;
+               struct qcm_process_device *qpd = &pdd->qpd;
+               struct queue *q;
+               int r, per_device_suspended = 0;
+
+               mutex_lock(&p->event_mutex);
+               dqm_lock(dqm);
+
+               /* unmask queues that suspend or already suspended */
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = q_array_get_index(q->properties.queue_id,
+                                                       num_queues,
+                                                       queue_ids);
+
+                       if (q_idx != QUEUE_NOT_FOUND) {
+                               int err = suspend_single_queue(dqm, pdd, q);
+                               bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
+
+                               if (!err) {
+                                       queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
+                                       if (exception_clear_mask && is_mes)
+                                               q->properties.exception_status &=
+                                                       ~exception_clear_mask;
+
+                                       if (is_mes)
+                                               total_suspended++;
+                                       else
+                                               per_device_suspended++;
+                               } else if (err != -EBUSY) {
+                                       r = err;
+                                       queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+                                       break;
+                               }
+                       }
+               }
+
+               if (!per_device_suspended) {
+                       dqm_unlock(dqm);
+                       mutex_unlock(&p->event_mutex);
+                       if (total_suspended)
+                               amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
+                       continue;
+               }
+
+               r = execute_queues_cpsch(dqm,
+                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+                       grace_period);
+
+               if (r)
+                       pr_err("Failed to suspend process queues.\n");
+               else
+                       total_suspended += per_device_suspended;
+
+               list_for_each_entry(q, &qpd->queues_list, list) {
+                       int q_idx = q_array_get_index(q->properties.queue_id,
+                                               num_queues, queue_ids);
+
+                       if (q_idx == QUEUE_NOT_FOUND)
+                               continue;
+
+                       /* mask queue as error on suspend fail */
+                       if (r)
+                               queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+                       else if (exception_clear_mask)
+                               q->properties.exception_status &=
+                                                       ~exception_clear_mask;
+               }
+
+               dqm_unlock(dqm);
+               mutex_unlock(&p->event_mutex);
+               amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
+       }
+
+       if (total_suspended) {
+               struct copy_context_work_handler_workarea copy_context_worker;
+
+               INIT_WORK_ONSTACK(
+                               &copy_context_worker.copy_context_work,
+                               copy_context_work_handler);
+
+               copy_context_worker.p = p;
+
+               schedule_work(&copy_context_worker.copy_context_work);
+
+
+               flush_work(&copy_context_worker.copy_context_work);
+               destroy_work_on_stack(&copy_context_worker.copy_context_work);
+       }
+
+       if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+                       num_queues * sizeof(uint32_t)))
+               pr_err("copy_to_user failed on queue suspend\n");
+
+       kfree(queue_ids);
+
+       return total_suspended;
+}
+
 int debug_lock_and_unmap(struct device_queue_manager *dqm)
 {
        int r;
index bb75d93712eb24f20ce27f37047f1af476af3523..d4e6dbffe8c215eba49d23582d1002e85031563d 100644 (file)
@@ -263,6 +263,8 @@ struct device_queue_manager {
        uint32_t                current_logical_xcc_start;
 
        uint32_t                wait_times;
+
+       wait_queue_head_t       destroy_wait;
 };
 
 void device_queue_manager_init_cik(
@@ -290,6 +292,14 @@ int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
                        struct qcm_process_device *qpd);
 int release_debug_trap_vmid(struct device_queue_manager *dqm,
                        struct qcm_process_device *qpd);
+int suspend_queues(struct kfd_process *p,
+                       uint32_t num_queues,
+                       uint32_t grace_period,
+                       uint64_t exception_clear_mask,
+                       uint32_t *usr_queue_id_array);
+int resume_queues(struct kfd_process *p,
+               uint32_t num_queues,
+               uint32_t *usr_queue_id_array);
 int debug_lock_and_unmap(struct device_queue_manager *dqm);
 int debug_map_and_unlock(struct device_queue_manager *dqm);
 int debug_refresh_runlist(struct device_queue_manager *dqm);
index a0ac4f2fe6b58260855f5b8458e4a0830b47c99a..94c0fc2e57b7f8e6afafe438e1d75ee4f05eae75 100644 (file)
@@ -237,6 +237,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
                          u32 *save_area_used_size)
 {
        struct v10_compute_mqd *m;
+       struct kfd_context_save_area_header header;
 
        m = get_mqd(mqd);
 
@@ -255,6 +256,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
         * accessible to user mode
         */
 
+       header.wave_state.control_stack_size = *ctl_stack_used_size;
+       header.wave_state.wave_state_size = *save_area_used_size;
+
+       header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+       header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+       if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+               return -EFAULT;
+
        return 0;
 }
 
index 9a9b4e85351626371be0084e9ae160af85769acb..31fec5e70d13523a36781a8f78b0d32df5b2b537 100644 (file)
@@ -291,7 +291,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
                          u32 *save_area_used_size)
 {
        struct v11_compute_mqd *m;
-       /*struct mqd_user_context_save_area_header header;*/
+       struct kfd_context_save_area_header header;
 
        m = get_mqd(mqd);
 
@@ -309,16 +309,15 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
         * it's part of the context save area that is already
         * accessible to user mode
         */
-/*
-       header.control_stack_size = *ctl_stack_used_size;
-       header.wave_state_size = *save_area_used_size;
+       header.wave_state.control_stack_size = *ctl_stack_used_size;
+       header.wave_state.wave_state_size = *save_area_used_size;
 
-       header.wave_state_offset = m->cp_hqd_wg_state_offset;
-       header.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+       header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+       header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
 
-       if (copy_to_user(ctl_stack, &header, sizeof(header)))
+       if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
                return -EFAULT;
-*/
+
        return 0;
 }
 
index 5b87c244e909f150bd63a3afc5f2f158c251c2f5..601bb9f68048ccfc13f7cc5dcab641e7ca727e73 100644 (file)
@@ -311,6 +311,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
                          u32 *save_area_used_size)
 {
        struct v9_mqd *m;
+       struct kfd_context_save_area_header header;
 
        /* Control stack is located one page after MQD. */
        void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
@@ -322,7 +323,18 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
        *save_area_used_size = m->cp_hqd_wg_state_offset -
                m->cp_hqd_cntl_stack_size;
 
-       if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+       header.wave_state.control_stack_size = *ctl_stack_used_size;
+       header.wave_state.wave_state_size = *save_area_used_size;
+
+       header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+       header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+       if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+               return -EFAULT;
+
+       if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
+                               mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
+                               *ctl_stack_used_size))
                return -EFAULT;
 
        return 0;
index cd2d56e5cdf059c7d05e331dabeff0ba07a17072..05da43bf233a5dfa543336124114a6f5c1289073 100644 (file)
@@ -510,6 +510,8 @@ struct queue_properties {
        uint32_t doorbell_off;
        bool is_interop;
        bool is_evicted;
+       bool is_suspended;
+       bool is_being_destroyed;
        bool is_active;
        bool is_gws;
        uint32_t pm4_target_xcc;
@@ -535,7 +537,8 @@ struct queue_properties {
 #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 &&      \
                            (q).queue_address != 0 &&   \
                            (q).queue_percent > 0 &&    \
-                           !(q).is_evicted)
+                           !(q).is_evicted &&          \
+                           !(q).is_suspended)
 
 enum mqd_update_flag {
        UPDATE_FLAG_DBG_WA_ENABLE = 1,
index 70852a200d8f765f61425f00ff6d65b2fb7702b9..01ccab607a6998ff03c9e76431d476ed0ef499e5 100644 (file)
@@ -187,6 +187,7 @@ static int init_user_queue(struct process_queue_manager *pqm,
 
        /* Doorbell initialized in user space*/
        q_properties->doorbell_ptr = NULL;
+       q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
 
        /* let DQM handle it*/
        q_properties->vmid = 0;