idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
- mutex_init(&adev->mes.mutex);
+ mutex_init(&adev->mes.mutex_hidden);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
adev->mes.vmid_mask_mmhub = 0xffffff00;
for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
- for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
- adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+ if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+ adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+ else
+ adev->mes.sdma_hqd_mask[i] = 0xfc;
+ }
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
adev->mes.agreegated_doorbells[i] = 0xffffffff;
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
return r;
}
idr_destroy(&adev->mes.gang_id_idr);
idr_destroy(&adev->mes.queue_id_idr);
ida_destroy(&adev->mes.doorbell_ida);
- mutex_destroy(&adev->mes.mutex);
+ mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
}
int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_process *process;
int r;
- mutex_lock(&adev->mes.mutex);
-
/* allocate the mes process buffer */
process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
if (!process) {
DRM_ERROR("no more memory to create mes process\n");
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
if (!process->doorbell_bitmap) {
DRM_ERROR("failed to allocate doorbell bitmap\n");
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes process to idr list */
- r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
- GFP_KERNEL);
- if (r < 0) {
- DRM_ERROR("failed to lock pasid=%d\n", pasid);
- goto clean_up_memory;
- }
-
/* allocate the process context bo and map it */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&process->proc_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up_pasid;
+ goto clean_up_memory;
}
memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ /* add the mes process to idr list */
+ r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to lock pasid=%d\n", pasid);
+ goto clean_up_ctx;
+ }
+
/* allocate the starting doorbell index of the process */
r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
if (r < 0) {
DRM_ERROR("failed to allocate doorbell for process\n");
- goto clean_up_ctx;
+ goto clean_up_pasid;
}
DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
process->process_quantum = adev->mes.default_process_quantum;
process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
+clean_up_pasid:
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
clean_up_ctx:
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
-clean_up_pasid:
- idr_remove(&adev->mes.pasid_idr, pasid);
clean_up_memory:
kfree(process->doorbell_bitmap);
kfree(process);
- mutex_unlock(&adev->mes.mutex);
return r;
}
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
process = idr_find(&adev->mes.pasid_idr, pasid);
if (!process) {
DRM_WARN("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return;
}
- /* free all gangs in the process */
+ /* Remove all queues from hardware */
list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
- /* free all queues in the gang */
list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
&queue_input);
if (r)
DRM_WARN("failed to remove hardware queue\n");
+ }
+
+ idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ }
+ amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
+ idr_remove(&adev->mes.pasid_idr, pasid);
+ amdgpu_mes_unlock(&adev->mes);
+
+ /* free all memory allocated by the process */
+ list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+ /* free all queues in the gang */
+ list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+ amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
kfree(queue);
}
-
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
list_del(&gang->list);
kfree(gang);
- }
- amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
-
- idr_remove(&adev->mes.pasid_idr, pasid);
+ }
amdgpu_bo_free_kernel(&process->proc_ctx_bo,
&process->proc_ctx_gpu_addr,
&process->proc_ctx_cpu_ptr);
kfree(process->doorbell_bitmap);
kfree(process);
-
- mutex_unlock(&adev->mes.mutex);
}
int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
struct amdgpu_mes_gang *gang;
int r;
- mutex_lock(&adev->mes.mutex);
-
- process = idr_find(&adev->mes.pasid_idr, pasid);
- if (!process) {
- DRM_ERROR("pasid %d doesn't exist\n", pasid);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes gang buffer */
gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
if (!gang) {
- mutex_unlock(&adev->mes.mutex);
return -ENOMEM;
}
- /* add the mes gang to idr list */
- r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
- GFP_KERNEL);
- if (r < 0) {
- kfree(gang);
- mutex_unlock(&adev->mes.mutex);
- return r;
- }
-
- gang->gang_id = r;
- *gang_id = r;
-
/* allocate the gang context bo and map it to cpu space */
r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&gang->gang_ctx_cpu_ptr);
if (r) {
DRM_ERROR("failed to allocate process context bo\n");
- goto clean_up;
+ goto clean_up_mem;
}
memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ process = idr_find(&adev->mes.pasid_idr, pasid);
+ if (!process) {
+ DRM_ERROR("pasid %d doesn't exist\n", pasid);
+ r = -EINVAL;
+ goto clean_up_ctx;
+ }
+
+ /* add the mes gang to idr list */
+ r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+ GFP_KERNEL);
+ if (r < 0) {
+ DRM_ERROR("failed to allocate idr for gang\n");
+ goto clean_up_ctx;
+ }
+
+ gang->gang_id = r;
+ *gang_id = r;
+
INIT_LIST_HEAD(&gang->queue_list);
gang->process = process;
gang->priority = gprops->priority;
gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
list_add_tail(&gang->list, &process->gang_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up:
- idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+clean_up_ctx:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+ &gang->gang_ctx_gpu_addr,
+ &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return r;
}
{
struct amdgpu_mes_gang *gang;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
if (!list_empty(&gang->queue_list)) {
DRM_ERROR("queue list is not empty\n");
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EBUSY;
}
idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+ list_del(&gang->list);
+ amdgpu_mes_unlock(&adev->mes);
+
amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
&gang->gang_ctx_gpu_addr,
&gang->gang_ctx_cpu_ptr);
- list_del(&gang->list);
+
kfree(gang);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
struct mes_suspend_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
struct mes_resume_gang_input input;
int r, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
idp = &adev->mes.pasid_idr;
}
}
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
}
-static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
struct amdgpu_mes_queue *q,
struct amdgpu_mes_queue_properties *p)
{
struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
u32 mqd_size = mqd_mgr->mqd_size;
- struct amdgpu_mqd_prop mqd_prop = {0};
int r;
r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
}
memset(q->mqd_cpu_ptr, 0, mqd_size);
+ r = amdgpu_bo_reserve(q->mqd_obj, false);
+ if (unlikely(r != 0))
+ goto clean_up;
+
+ return 0;
+
+clean_up:
+ amdgpu_bo_free_kernel(&q->mqd_obj,
+ &q->mqd_gpu_addr,
+ &q->mqd_cpu_ptr);
+ return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+ struct amdgpu_mes_queue *q,
+ struct amdgpu_mes_queue_properties *p)
+{
+ struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+ struct amdgpu_mqd_prop mqd_prop = {0};
+
mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
mqd_prop.hqd_active = false;
- r = amdgpu_bo_reserve(q->mqd_obj, false);
- if (unlikely(r != 0))
- goto clean_up;
-
mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
amdgpu_bo_unreserve(q->mqd_obj);
- return 0;
-
-clean_up:
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
- return r;
-}
-
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
- amdgpu_bo_free_kernel(&q->mqd_obj,
- &q->mqd_gpu_addr,
- &q->mqd_cpu_ptr);
}
int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
unsigned long flags;
int r;
- mutex_lock(&adev->mes.mutex);
-
- gang = idr_find(&adev->mes.gang_id_idr, gang_id);
- if (!gang) {
- DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
- return -EINVAL;
- }
-
/* allocate the mes queue buffer */
queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
if (!queue) {
- mutex_unlock(&adev->mes.mutex);
+ DRM_ERROR("Failed to allocate memory for queue\n");
return -ENOMEM;
}
+ /* Allocate the queue mqd */
+ r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+ if (r)
+ goto clean_up_memory;
+
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
+
+ gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+ if (!gang) {
+ DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+ r = -EINVAL;
+ goto clean_up_mqd;
+ }
+
/* add the mes gang to idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
GFP_ATOMIC);
if (r < 0) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- goto clean_up_memory;
+ goto clean_up_mqd;
}
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
*queue_id = queue->queue_id = r;
goto clean_up_queue_id;
/* initialize the queue mqd */
- r = amdgpu_mes_queue_init_mqd(adev, queue, qprops);
- if (r)
- goto clean_up_doorbell;
+ amdgpu_mes_queue_init_mqd(adev, queue, qprops);
/* add hw queue to mes */
queue_input.process_id = gang->process->pasid;
- queue_input.page_table_base_addr = gang->process->pd_gpu_addr;
+
+ queue_input.page_table_base_addr =
+ adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+ adev->gmc.vram_start;
+
queue_input.process_va_start = 0;
queue_input.process_va_end =
(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
if (r) {
DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
qprops->doorbell_off);
- goto clean_up_mqd;
+ goto clean_up_doorbell;
}
DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
queue->gang = gang;
list_add_tail(&queue->list, &gang->queue_list);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return 0;
-clean_up_mqd:
- amdgpu_mes_queue_free_mqd(queue);
clean_up_doorbell:
amdgpu_mes_queue_doorbell_free(adev, gang->process,
qprops->doorbell_off);
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+ amdgpu_mes_unlock(&adev->mes);
+ amdgpu_mes_queue_free_mqd(queue);
clean_up_memory:
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return r;
}
struct mes_remove_queue_input queue_input;
int r;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
/* remove the mes gang from idr list */
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
queue = idr_find(&adev->mes.queue_id_idr, queue_id);
if (!queue) {
spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
DRM_ERROR("queue id %d doesn't exist\n", queue_id);
return -EINVAL;
}
DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
queue_id);
- amdgpu_mes_queue_free_mqd(queue);
list_del(&queue->list);
amdgpu_mes_queue_doorbell_free(adev, gang->process,
queue->doorbell_off);
+ amdgpu_mes_unlock(&adev->mes);
+
+ amdgpu_mes_queue_free_mqd(queue);
kfree(queue);
- mutex_unlock(&adev->mes.mutex);
return 0;
}
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq)
+{
+ struct mes_unmap_legacy_queue_input queue_input;
+ int r;
+
+ amdgpu_mes_lock(&adev->mes);
+
+ queue_input.action = action;
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.trail_fence_addr = gpu_addr;
+ queue_input.trail_fence_data = seq;
+
+ r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to unmap legacy queue\n");
+
+ amdgpu_mes_unlock(&adev->mes);
+ return r;
+}
+
static void
amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_mes_queue_properties qprops = {0};
int r, queue_id, pasid;
- mutex_lock(&adev->mes.mutex);
+ /*
+ * Avoid taking any other locks under MES lock to avoid circular
+ * lock dependencies.
+ */
+ amdgpu_mes_lock(&adev->mes);
gang = idr_find(&adev->mes.gang_id_idr, gang_id);
if (!gang) {
DRM_ERROR("gang id %d doesn't exist\n", gang_id);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -EINVAL;
}
pasid = gang->process->pasid;
ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
if (!ring) {
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return -ENOMEM;
}
dma_fence_wait(gang->process->vm->last_update, false);
dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
if (r)
amdgpu_ring_fini(ring);
clean_up_memory:
kfree(ring);
- mutex_unlock(&adev->mes.mutex);
+ amdgpu_mes_unlock(&adev->mes);
return r;
}
}
for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
- /* On sienna cichlid+, fw hasn't supported to map sdma queue. */
- if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
- i == AMDGPU_RING_TYPE_SDMA)
+ /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+ queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
continue;
r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
struct amdgpu_mes {
struct amdgpu_device *adev;
- struct mutex mutex;
+ struct mutex mutex_hidden;
struct idr pasid_idr;
struct idr gang_id_idr;
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
+ uint32_t saved_flags;
/* initialize kiq pipe */
int (*kiq_hw_init)(struct amdgpu_device *adev);
+ int (*kiq_hw_fini)(struct amdgpu_device *adev);
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
uint64_t wptr_addr;
uint32_t queue_type;
uint32_t paging;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
+struct mes_unmap_legacy_queue_input {
+ enum amdgpu_unmap_queues_action action;
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t trail_fence_addr;
+ uint64_t trail_fence_data;
+};
+
struct mes_suspend_gang_input {
bool suspend_all_gangs;
uint64_t gang_context_addr;
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input);
+
int (*suspend_gang)(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
int *queue_id);
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ enum amdgpu_unmap_queues_action action,
+ u64 gpu_addr, u64 seq);
+
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,
uint32_t doorbell_index,
unsigned int doorbell_id);
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+ mutex_lock(&mes->mutex_hidden);
+ mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+ memalloc_noreclaim_restore(mes->saved_flags);
+ mutex_unlock(&mes->mutex_hidden);
+}
#endif /* __AMDGPU_MES_H__ */
{
struct amdgpu_device *adev = mes->adev;
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.process_id = input->process_id;
- mes_add_queue_pkt.page_table_base_addr =
- input->page_table_base_addr - adev->gmc.vram_start;
+ mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
mes_add_queue_pkt.process_va_start = input->process_va_start;
mes_add_queue_pkt.process_va_end = input->process_va_end;
mes_add_queue_pkt.process_quantum = input->process_quantum;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
+ mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+ mes_add_queue_pkt.gws_base = input->gws_base;
+ mes_add_queue_pkt.gws_size = input->gws_size;
+ mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
}
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+ struct mes_unmap_legacy_queue_input *input)
+{
+ union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+ memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+ mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+ mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+ mes_remove_queue_pkt.gang_context_addr = 0;
+
+ mes_remove_queue_pkt.pipe_id = input->pipe_id;
+ mes_remove_queue_pkt.queue_id = input->queue_id;
+
+ if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+ mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+ mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+ mes_remove_queue_pkt.tf_data =
+ lower_32_bits(input->trail_fence_data);
+ } else {
+ if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+ mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+ else
+ mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+ }
+
+ mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+ mes->ring.fence_drv.gpu_addr;
+ mes_remove_queue_pkt.api_status.api_completion_fence_value =
+ ++mes->ring.fence_drv.sync_seq;
+
+ return mes_v10_1_submit_pkt_and_poll_completion(mes,
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+}
+
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
struct mes_suspend_gang_input *input)
{
mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
- mes_set_hw_res_pkt.agreegated_doorbells[i] =
+ mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->agreegated_doorbells[i];
+ for (i = 0; i < 5; i++) {
+ mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+ mes_set_hw_res_pkt.mmhub_base[i] =
+ adev->reg_offset[MMHUB_HWIP][0][i];
+ mes_set_hw_res_pkt.osssys_base[i] =
+ adev->reg_offset[OSSSYS_HWIP][0][i];
+ }
+
+ mes_set_hw_res_pkt.disable_reset = 1;
+ mes_set_hw_res_pkt.disable_mes_log = 1;
+ mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
.add_hw_queue = mes_v10_1_add_hw_queue,
.remove_hw_queue = mes_v10_1_remove_hw_queue,
+ .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
.suspend_gang = mes_v10_1_suspend_gang,
.resume_gang = mes_v10_1_resume_gang,
};
goto failure;
}
+ /*
+ * Disable KIQ ring usage from the driver once MES is enabled.
+ * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+ * with MES enabled.
+ */
+ adev->gfx.kiq.ring.sched.ready = false;
+
return 0;
failure:
MES_SCH_API_PROGRAM_GDS = 12,
MES_SCH_API_SET_DEBUG_VMID = 13,
MES_SCH_API_MISC = 14,
+ MES_SCH_API_UPDATE_ROOT_PAGE_TABLE = 15,
+ MES_SCH_API_AMD_LOG = 16,
MES_SCH_API_MAX = 0xFF
};
enum { MAX_VMID_MMHUB = 16 };
enum MES_LOG_OPERATION {
- MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
+ MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0,
+ MES_LOG_OPERATION_QUEUE_NEW_WORK = 1,
+ MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2,
+ MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3,
+ MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4,
+ MES_LOG_OPERATION_QUEUE_INVALID = 0xF,
};
enum MES_LOG_CONTEXT_STATE {
MES_LOG_CONTEXT_STATE_RUNNING = 1,
MES_LOG_CONTEXT_STATE_READY = 2,
MES_LOG_CONTEXT_STATE_READY_STANDBY = 3,
+ MES_LOG_CONTEXT_STATE_INVALID = 0xF,
};
struct MES_LOG_CONTEXT_STATE_CHANGE {
enum MES_LOG_CONTEXT_STATE new_context_state;
};
+struct MES_LOG_QUEUE_NEW_WORK {
+ uint64_t h_queue;
+ uint64_t reserved;
+};
+
+struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT {
+ uint64_t h_queue;
+ uint64_t h_sync_object;
+};
+
+struct MES_LOG_QUEUE_NO_MORE_WORK {
+ uint64_t h_queue;
+ uint64_t reserved;
+};
+
+struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT {
+ uint64_t h_queue;
+ uint64_t h_sync_object;
+};
+
struct MES_LOG_ENTRY_HEADER {
uint32_t first_free_entry_index;
uint32_t wraparound_count;
uint32_t operation_type; /* operation_type is of MES_LOG_OPERATION type */
uint32_t reserved_operation_type_bits;
union {
- struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
- uint64_t reserved_operation_data[2];
+ struct MES_LOG_CONTEXT_STATE_CHANGE context_state_change;
+ struct MES_LOG_QUEUE_NEW_WORK queue_new_work;
+ struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object;
+ struct MES_LOG_QUEUE_NO_MORE_WORK queue_no_more_work;
+ struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT queue_wait_sync_object;
+ uint64_t all[2];
};
};
struct MES_LOG_ENTRY_DATA entries[1];
};
+enum MES_SWIP_TO_HWIP_DEF {
+ MES_MAX_HWIP_SEGMENT = 6,
+};
+
union MESAPI_SET_HW_RESOURCES {
struct {
union MES_API_HEADER header;
uint32_t compute_hqd_mask[MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
+ uint32_t aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS];
uint64_t g_sch_ctx_gpu_mc_ptr;
uint64_t query_status_fence_gpu_mc_ptr;
+ uint32_t gc_base[MES_MAX_HWIP_SEGMENT];
+ uint32_t mmhub_base[MES_MAX_HWIP_SEGMENT];
+ uint32_t osssys_base[MES_MAX_HWIP_SEGMENT];
struct MES_API_STATUS api_status;
union {
struct {
uint32_t disable_reset : 1;
- uint32_t reserved : 31;
+ uint32_t use_different_vmid_compute : 1;
+ uint32_t disable_mes_log : 1;
+ uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1;
+ uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
+ uint32_t second_gfx_pipe_enabled : 1;
+ uint32_t enable_level_process_quantum_check : 1;
+ uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1;
+ uint32_t enable_mqd_active_poll : 1;
+ uint32_t disable_timer_int : 1;
+ uint32_t reserved : 22;
};
uint32_t uint32_t_all;
};
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
+ uint64_t h_context;
+ uint64_t h_queue;
enum MES_QUEUE_TYPE queue_type;
uint32_t gds_base;
uint32_t gds_size;
uint32_t gws_base;
uint32_t gws_size;
uint32_t oa_mask;
+ uint64_t trap_handler_addr;
+ uint32_t vm_context_cntl;
struct {
uint32_t paging : 1;
uint32_t program_gds : 1;
uint32_t is_gang_suspended : 1;
uint32_t is_tmz_queue : 1;
- uint32_t reserved : 24;
+ uint32_t map_kiq_utility_queue : 1;
+ uint32_t reserved : 23;
};
struct MES_API_STATUS api_status;
};
uint64_t gang_context_addr;
struct {
- uint32_t unmap_legacy_gfx_queue : 1;
- uint32_t reserved : 31;
+ uint32_t unmap_legacy_gfx_queue : 1;
+ uint32_t unmap_kiq_utility_queue : 1;
+ uint32_t preempt_legacy_gfx_queue : 1;
+ uint32_t reserved : 29;
};
- struct MES_API_STATUS api_status;
+ struct MES_API_STATUS api_status;
+
+ uint32_t pipe_id;
+ uint32_t queue_id;
+
+ uint64_t tf_addr;
+ uint32_t tf_data;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
union MESAPI__RESET {
struct {
- union MES_API_HEADER header;
+ union MES_API_HEADER header;
struct {
- uint32_t reset_queue : 1;
- uint32_t reserved : 31;
+ /* Only reset the queue given by doorbell_offset (not entire gang) */
+ uint32_t reset_queue_only : 1;
+ /* Hang detection first then reset any queues that are hung */
+ uint32_t hang_detect_then_reset : 1;
+ /* Only do hang detection (no reset) */
+ uint32_t hang_detect_only : 1;
+ /* Rest HP and LP kernel queues not managed by MES */
+ uint32_t reset_legacy_gfx : 1;
+ uint32_t reserved : 28;
};
- uint64_t gang_context_addr;
- uint32_t doorbell_offset; /* valid only if reset_queue = true */
- struct MES_API_STATUS api_status;
+ uint64_t gang_context_addr;
+
+ /* valid only if reset_queue_only = true */
+ uint32_t doorbell_offset;
+
+ /* valid only if hang_detect_then_reset = true */
+ uint64_t doorbell_offset_addr;
+ enum MES_QUEUE_TYPE queue_type;
+
+ /* valid only if reset_legacy_gfx = true */
+ uint32_t pipe_id_lp;
+ uint32_t queue_id_lp;
+ uint32_t vmid_id_lp;
+ uint64_t mqd_mc_addr_lp;
+ uint32_t doorbell_offset_lp;
+ uint64_t wptr_addr_lp;
+
+ uint32_t pipe_id_hp;
+ uint32_t queue_id_hp;
+ uint32_t vmid_id_hp;
+ uint64_t mqd_mc_addr_hp;
+ uint32_t doorbell_offset_hp;
+ uint64_t wptr_addr_hp;
+
+ struct MES_API_STATUS api_status;
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
enum MESAPI_MISC_OPCODE {
MESAPI_MISC__MODIFY_REG,
+ MESAPI_MISC__INV_GART,
+ MESAPI_MISC__QUERY_STATUS,
MESAPI_MISC__MAX,
};
enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
+struct MODIFY_REG {
+ enum MODIFY_REG_SUBCODE subcode;
+ uint32_t reg_offset;
+ uint32_t reg_value;
+};
+
+struct INV_GART {
+ uint64_t inv_range_va_start;
+ uint64_t inv_range_size;
+};
+
+struct QUERY_STATUS {
+ uint32_t context_id;
+};
+
union MESAPI__MISC {
struct {
union MES_API_HEADER header;
struct MES_API_STATUS api_status;
union {
- struct {
- enum MODIFY_REG_SUBCODE subcode;
- uint32_t reg_offset;
- uint32_t reg_value;
- } modify_reg;
+ struct MODIFY_REG modify_reg;
+ struct INV_GART inv_gart;
+ struct QUERY_STATUS query_status;
uint32_t data[MISC_DATA_MAX_SIZE_IN_DWORDS];
};
};
uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
};
+union MESAPI__UPDATE_ROOT_PAGE_TABLE {
+ struct {
+ union MES_API_HEADER header;
+ uint64_t page_table_base_addr;
+ uint64_t process_context_addr;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI_AMD_LOG {
+ struct {
+ union MES_API_HEADER header;
+ uint64_t p_buffer_memory;
+ uint64_t p_buffer_size_used;
+ struct MES_API_STATUS api_status;
+ };
+
+ uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
#pragma pack(pop)
#endif