]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
drm/amdkfd: add debug queue snapshot operation
authorJonathan Kim <jonathan.kim@amd.com>
Tue, 10 May 2022 15:15:29 +0000 (11:15 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 16:36:57 +0000 (12:36 -0400)
Allow the debugger to get a snapshot of a specified number of queues
containing various queue property information that is copied to the
debugger.

Since the debugger doesn't know how many queues exist at any given time,
allow the debugger to pass the requested number of snapshots as 0 to get
the actual number of potential snapshots to use for a subsequent snapshot
request for actual information.

To prevent future ABI breakage, pass in the requested entry_size.
The KFD will return it's own entry_size in case the debugger still wants
log the information in a core dump on sizing failure.

Also allow the debugger to clear exceptions when doing a snapshot.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c

index b7ee79b5220a8611e681ee233e6a840700ecdb1f..24066756e478a23a1db723018e70d5710d3a12cd 100644 (file)
@@ -3053,6 +3053,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
                                &args->query_exception_info.info_size);
                break;
        case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
+               r = pqm_get_queue_snapshot(&target->pqm,
+                               args->queue_snapshot.exception_mask,
+                               (void __user *)args->queue_snapshot.snapshot_buf_ptr,
+                               &args->queue_snapshot.num_queues,
+                               &args->queue_snapshot.entry_size);
+               break;
        case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
                pr_warn("Debug op %i not supported yet\n", args->op);
                r = -EACCES;
index bc9e81293165548481e435c9a54a6f27c5b331b7..0c1be91a87c69179510dae407aec78b248d303e6 100644 (file)
@@ -3041,6 +3041,42 @@ int suspend_queues(struct kfd_process *p,
        return total_suspended;
 }
 
+static uint32_t set_queue_type_for_user(struct queue_properties *q_props)
+{
+       switch (q_props->type) {
+       case KFD_QUEUE_TYPE_COMPUTE:
+               return q_props->format == KFD_QUEUE_FORMAT_PM4
+                                       ? KFD_IOC_QUEUE_TYPE_COMPUTE
+                                       : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
+       case KFD_QUEUE_TYPE_SDMA:
+               return KFD_IOC_QUEUE_TYPE_SDMA;
+       case KFD_QUEUE_TYPE_SDMA_XGMI:
+               return KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
+       default:
+               WARN_ONCE(true, "queue type not recognized!");
+               return 0xffffffff;
+       };
+}
+
+void set_queue_snapshot_entry(struct queue *q,
+                             uint64_t exception_clear_mask,
+                             struct kfd_queue_snapshot_entry *qss_entry)
+{
+       qss_entry->ring_base_address = q->properties.queue_address;
+       qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr;
+       qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr;
+       qss_entry->ctx_save_restore_address =
+                               q->properties.ctx_save_restore_area_address;
+       qss_entry->ctx_save_restore_area_size =
+                               q->properties.ctx_save_restore_area_size;
+       qss_entry->exception_status = q->properties.exception_status;
+       qss_entry->queue_id = q->properties.queue_id;
+       qss_entry->gpu_id = q->device->id;
+       qss_entry->ring_size = (uint32_t)q->properties.queue_size;
+       qss_entry->queue_type = set_queue_type_for_user(&q->properties);
+       q->properties.exception_status &= ~exception_clear_mask;
+}
+
 int debug_lock_and_unmap(struct device_queue_manager *dqm)
 {
        int r;
index d4e6dbffe8c215eba49d23582d1002e85031563d..7dd4b177219dec5c36931c1a65c3f36f6d4609b4 100644 (file)
@@ -300,6 +300,9 @@ int suspend_queues(struct kfd_process *p,
 int resume_queues(struct kfd_process *p,
                uint32_t num_queues,
                uint32_t *usr_queue_id_array);
+void set_queue_snapshot_entry(struct queue *q,
+                             uint64_t exception_clear_mask,
+                             struct kfd_queue_snapshot_entry *qss_entry);
 int debug_lock_and_unmap(struct device_queue_manager *dqm);
 int debug_map_and_unlock(struct device_queue_manager *dqm);
 int debug_refresh_runlist(struct device_queue_manager *dqm);
index 8ec87bc8ba823575f0ca51dbe0fa6fb1e2b43695..023b17e0116be6f651afeb70f017c502d4a66a0e 100644 (file)
@@ -1355,6 +1355,11 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
                       void __user *ctl_stack,
                       u32 *ctl_stack_used_size,
                       u32 *save_area_used_size);
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+                          uint64_t exception_clear_mask,
+                          void __user *buf,
+                          int *num_qss_entries,
+                          uint32_t *entry_size);
 
 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
                              uint64_t fence_value,
index 01ccab607a6998ff03c9e76431d476ed0ef499e5..9ad1a2186a245ef74c07681d9324648d2dfb9628 100644 (file)
@@ -585,6 +585,46 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
                                                       save_area_used_size);
 }
 
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+                          uint64_t exception_clear_mask,
+                          void __user *buf,
+                          int *num_qss_entries,
+                          uint32_t *entry_size)
+{
+       struct process_queue_node *pqn;
+       struct kfd_queue_snapshot_entry src;
+       uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
+       int r = 0;
+
+       *num_qss_entries = 0;
+       if (!(*entry_size))
+               return -EINVAL;
+
+       *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
+       mutex_lock(&pqm->process->event_mutex);
+
+       memset(&src, 0, sizeof(src));
+
+       list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+               if (!pqn->q)
+                       continue;
+
+               if (*num_qss_entries < tmp_qss_entries) {
+                       set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
+
+                       if (copy_to_user(buf, &src, *entry_size)) {
+                               r = -EFAULT;
+                               break;
+                       }
+                       buf += tmp_entry_size;
+               }
+               *num_qss_entries += 1;
+       }
+
+       mutex_unlock(&pqm->process->event_mutex);
+       return r;
+}
+
 static int get_queue_data_sizes(struct kfd_process_device *pdd,
                                struct queue *q,
                                uint32_t *mqd_size,