drm/amdgpu: add mes unmap legacy queue routine

author Jack Xiao <Jack.Xiao@amd.com>

Wed, 13 Apr 2022 18:30:37 +0000 (14:30 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 4 May 2022 14:43:54 +0000 (10:43 -0400)
author Jack Xiao <Jack.Xiao@amd.com>
Wed, 13 Apr 2022 18:30:37 +0000 (14:30 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 4 May 2022 14:43:54 +0000 (10:43 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

index 40df1e04d682f978033857598adf9cd3b9658f4d..5d6b04fc6206ef9dc8a66b8722769960c54a20b6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -367,7 +367,7 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
  
         /* create MQD for KIQ */
         ring = &adev->gfx.kiq.ring;
-       if (!ring->mqd_obj) {
+       if (!adev->enable_mes_kiq && !ring->mqd_obj) {
                 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
                  * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
                  * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
@@ -464,7 +464,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
  {
         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
         struct amdgpu_ring *kiq_ring = &kiq->ring;
-       int i, r;
+       int i, r = 0;
  
         if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
                 return -EINVAL;
@@ -479,7 +479,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
         for (i = 0; i < adev->gfx.num_compute_rings; i++)
                 kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
                                            RESET_QUEUES, 0, 0);
-       r = amdgpu_ring_test_helper(kiq_ring);
+
+       if (adev->gfx.kiq.ring.sched.ready)
+               r = amdgpu_ring_test_helper(kiq_ring);
         spin_unlock(&adev->gfx.kiq.ring_lock);
  
         return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c

index 5be30bf68b0c6afefbe5c6fdd3c892670d8cce53..72bafba1c470ce0a516204fd531a607ca53d9d0a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -150,7 +150,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
         idr_init(&adev->mes.queue_id_idr);
         ida_init(&adev->mes.doorbell_ida);
         spin_lock_init(&adev->mes.queue_id_lock);
-       mutex_init(&adev->mes.mutex);
+       mutex_init(&adev->mes.mutex_hidden);
  
         adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
         adev->mes.vmid_mask_mmhub = 0xffffff00;
@@ -166,8 +166,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
         for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
                 adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
  
-       for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
-               adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+       for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+               if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+                       adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+               else
+                       adev->mes.sdma_hqd_mask[i] = 0xfc;
+       }
  
         for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
                 adev->mes.agreegated_doorbells[i] = 0xffffffff;
@@ -207,7 +211,7 @@ error_ids:
         idr_destroy(&adev->mes.gang_id_idr);
         idr_destroy(&adev->mes.queue_id_idr);
         ida_destroy(&adev->mes.doorbell_ida);
-       mutex_destroy(&adev->mes.mutex);
+       mutex_destroy(&adev->mes.mutex_hidden);
         return r;
  }
  
@@ -219,7 +223,14 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
         idr_destroy(&adev->mes.gang_id_idr);
         idr_destroy(&adev->mes.queue_id_idr);
         ida_destroy(&adev->mes.doorbell_ida);
-       mutex_destroy(&adev->mes.mutex);
+       mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+       amdgpu_bo_free_kernel(&q->mqd_obj,
+                             &q->mqd_gpu_addr,
+                             &q->mqd_cpu_ptr);
  }
  
  int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
@@ -228,13 +239,10 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
         struct amdgpu_mes_process *process;
         int r;
  
-       mutex_lock(&adev->mes.mutex);
-
         /* allocate the mes process buffer */
         process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
         if (!process) {
                 DRM_ERROR("no more memory to create mes process\n");
-               mutex_unlock(&adev->mes.mutex);
                 return -ENOMEM;
         }
  
@@ -244,18 +252,9 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
         if (!process->doorbell_bitmap) {
                 DRM_ERROR("failed to allocate doorbell bitmap\n");
                 kfree(process);
-               mutex_unlock(&adev->mes.mutex);
                 return -ENOMEM;
         }
  
-       /* add the mes process to idr list */
-       r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
-                     GFP_KERNEL);
-       if (r < 0) {
-               DRM_ERROR("failed to lock pasid=%d\n", pasid);
-               goto clean_up_memory;
-       }
-
         /* allocate the process context bo and map it */
         r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
                                     AMDGPU_GEM_DOMAIN_GTT,
@@ -264,15 +263,29 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
                                     &process->proc_ctx_cpu_ptr);
         if (r) {
                 DRM_ERROR("failed to allocate process context bo\n");
-               goto clean_up_pasid;
+               goto clean_up_memory;
         }
         memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
  
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       /* add the mes process to idr list */
+       r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+                     GFP_KERNEL);
+       if (r < 0) {
+               DRM_ERROR("failed to lock pasid=%d\n", pasid);
+               goto clean_up_ctx;
+       }
+
         /* allocate the starting doorbell index of the process */
         r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
         if (r < 0) {
                 DRM_ERROR("failed to allocate doorbell for process\n");
-               goto clean_up_ctx;
+               goto clean_up_pasid;
         }
  
         DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
@@ -283,19 +296,19 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
         process->process_quantum = adev->mes.default_process_quantum;
         process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
  
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return 0;
  
+clean_up_pasid:
+       idr_remove(&adev->mes.pasid_idr, pasid);
+       amdgpu_mes_unlock(&adev->mes);
  clean_up_ctx:
         amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                               &process->proc_ctx_gpu_addr,
                               &process->proc_ctx_cpu_ptr);
-clean_up_pasid:
-       idr_remove(&adev->mes.pasid_idr, pasid);
  clean_up_memory:
         kfree(process->doorbell_bitmap);
         kfree(process);
-       mutex_unlock(&adev->mes.mutex);
         return r;
  }
  
@@ -308,18 +321,21 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
         unsigned long flags;
         int r;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
  
         process = idr_find(&adev->mes.pasid_idr, pasid);
         if (!process) {
                 DRM_WARN("pasid %d doesn't exist\n", pasid);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 return;
         }
  
-       /* free all gangs in the process */
+       /* Remove all queues from hardware */
         list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
-               /* free all queues in the gang */
                 list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
                         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
                         idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
@@ -332,29 +348,35 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
                                                              &queue_input);
                         if (r)
                                 DRM_WARN("failed to remove hardware queue\n");
+               }
+
+               idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+       }
  
+       amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
+       idr_remove(&adev->mes.pasid_idr, pasid);
+       amdgpu_mes_unlock(&adev->mes);
+
+       /* free all memory allocated by the process */
+       list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+               /* free all queues in the gang */
+               list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+                       amdgpu_mes_queue_free_mqd(queue);
                         list_del(&queue->list);
                         kfree(queue);
                 }
-
-               idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
                 amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
                                       &gang->gang_ctx_gpu_addr,
                                       &gang->gang_ctx_cpu_ptr);
                 list_del(&gang->list);
                 kfree(gang);
-       }
  
-       amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
-
-       idr_remove(&adev->mes.pasid_idr, pasid);
+       }
         amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                               &process->proc_ctx_gpu_addr,
                               &process->proc_ctx_cpu_ptr);
         kfree(process->doorbell_bitmap);
         kfree(process);
-
-       mutex_unlock(&adev->mes.mutex);
  }
  
  int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
@@ -365,34 +387,12 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
         struct amdgpu_mes_gang *gang;
         int r;
  
-       mutex_lock(&adev->mes.mutex);
-
-       process = idr_find(&adev->mes.pasid_idr, pasid);
-       if (!process) {
-               DRM_ERROR("pasid %d doesn't exist\n", pasid);
-               mutex_unlock(&adev->mes.mutex);
-               return -EINVAL;
-       }
-
         /* allocate the mes gang buffer */
         gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
         if (!gang) {
-               mutex_unlock(&adev->mes.mutex);
                 return -ENOMEM;
         }
  
-       /* add the mes gang to idr list */
-       r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
-                     GFP_KERNEL);
-       if (r < 0) {
-               kfree(gang);
-               mutex_unlock(&adev->mes.mutex);
-               return r;
-       }
-
-       gang->gang_id = r;
-       *gang_id = r;
-
         /* allocate the gang context bo and map it to cpu space */
         r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
                                     AMDGPU_GEM_DOMAIN_GTT,
@@ -401,10 +401,34 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
                                     &gang->gang_ctx_cpu_ptr);
         if (r) {
                 DRM_ERROR("failed to allocate process context bo\n");
-               goto clean_up;
+               goto clean_up_mem;
         }
         memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
  
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       process = idr_find(&adev->mes.pasid_idr, pasid);
+       if (!process) {
+               DRM_ERROR("pasid %d doesn't exist\n", pasid);
+               r = -EINVAL;
+               goto clean_up_ctx;
+       }
+
+       /* add the mes gang to idr list */
+       r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+                     GFP_KERNEL);
+       if (r < 0) {
+               DRM_ERROR("failed to allocate idr for gang\n");
+               goto clean_up_ctx;
+       }
+
+       gang->gang_id = r;
+       *gang_id = r;
+
         INIT_LIST_HEAD(&gang->queue_list);
         gang->process = process;
         gang->priority = gprops->priority;
@@ -414,13 +438,16 @@ int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
         gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
         list_add_tail(&gang->list, &process->gang_list);
  
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return 0;
  
-clean_up:
-       idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+clean_up_ctx:
+       amdgpu_mes_unlock(&adev->mes);
+       amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+                             &gang->gang_ctx_gpu_addr,
+                             &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
         kfree(gang);
-       mutex_unlock(&adev->mes.mutex);
         return r;
  }
  
@@ -428,29 +455,35 @@ int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
  {
         struct amdgpu_mes_gang *gang;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
  
         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
         if (!gang) {
                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 return -EINVAL;
         }
  
         if (!list_empty(&gang->queue_list)) {
                 DRM_ERROR("queue list is not empty\n");
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 return -EBUSY;
         }
  
         idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+       list_del(&gang->list);
+       amdgpu_mes_unlock(&adev->mes);
+
         amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
                               &gang->gang_ctx_gpu_addr,
                               &gang->gang_ctx_cpu_ptr);
-       list_del(&gang->list);
+
         kfree(gang);
  
-       mutex_unlock(&adev->mes.mutex);
         return 0;
  }
  
@@ -462,7 +495,11 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
         struct mes_suspend_gang_input input;
         int r, pasid;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
  
         idp = &adev->mes.pasid_idr;
  
@@ -475,7 +512,7 @@ int amdgpu_mes_suspend(struct amdgpu_device *adev)
                 }
         }
  
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return 0;
  }
  
@@ -487,7 +524,11 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
         struct mes_resume_gang_input input;
         int r, pasid;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
  
         idp = &adev->mes.pasid_idr;
  
@@ -500,17 +541,16 @@ int amdgpu_mes_resume(struct amdgpu_device *adev)
                 }
         }
  
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return 0;
  }
  
-static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
                                      struct amdgpu_mes_queue *q,
                                      struct amdgpu_mes_queue_properties *p)
  {
         struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
         u32 mqd_size = mqd_mgr->mqd_size;
-       struct amdgpu_mqd_prop mqd_prop = {0};
         int r;
  
         r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
@@ -523,6 +563,26 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
         }
         memset(q->mqd_cpu_ptr, 0, mqd_size);
  
+       r = amdgpu_bo_reserve(q->mqd_obj, false);
+       if (unlikely(r != 0))
+               goto clean_up;
+
+       return 0;
+
+clean_up:
+       amdgpu_bo_free_kernel(&q->mqd_obj,
+                             &q->mqd_gpu_addr,
+                             &q->mqd_cpu_ptr);
+       return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+                                    struct amdgpu_mes_queue *q,
+                                    struct amdgpu_mes_queue_properties *p)
+{
+       struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+       struct amdgpu_mqd_prop mqd_prop = {0};
+
         mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
         mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
         mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
@@ -535,27 +595,9 @@ static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
         mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
         mqd_prop.hqd_active = false;
  
-       r = amdgpu_bo_reserve(q->mqd_obj, false);
-       if (unlikely(r != 0))
-               goto clean_up;
-
         mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
  
         amdgpu_bo_unreserve(q->mqd_obj);
-       return 0;
-
-clean_up:
-       amdgpu_bo_free_kernel(&q->mqd_obj,
-                             &q->mqd_gpu_addr,
-                             &q->mqd_cpu_ptr);
-       return r;
-}
-
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
-       amdgpu_bo_free_kernel(&q->mqd_obj,
-                             &q->mqd_gpu_addr,
-                             &q->mqd_cpu_ptr);
  }
  
  int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
@@ -568,29 +610,38 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
         unsigned long flags;
         int r;
  
-       mutex_lock(&adev->mes.mutex);
-
-       gang = idr_find(&adev->mes.gang_id_idr, gang_id);
-       if (!gang) {
-               DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
-               return -EINVAL;
-       }
-
         /* allocate the mes queue buffer */
         queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
         if (!queue) {
-               mutex_unlock(&adev->mes.mutex);
+               DRM_ERROR("Failed to allocate memory for queue\n");
                 return -ENOMEM;
         }
  
+       /* Allocate the queue mqd */
+       r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+       if (r)
+               goto clean_up_memory;
+
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+       if (!gang) {
+               DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+               r = -EINVAL;
+               goto clean_up_mqd;
+       }
+
         /* add the mes gang to idr list */
         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
         r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
                       GFP_ATOMIC);
         if (r < 0) {
                 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-               goto clean_up_memory;
+               goto clean_up_mqd;
         }
         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
         *queue_id = queue->queue_id = r;
@@ -603,13 +654,15 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
                 goto clean_up_queue_id;
  
         /* initialize the queue mqd */
-       r = amdgpu_mes_queue_init_mqd(adev, queue, qprops);
-       if (r)
-               goto clean_up_doorbell;
+       amdgpu_mes_queue_init_mqd(adev, queue, qprops);
  
         /* add hw queue to mes */
         queue_input.process_id = gang->process->pasid;
-       queue_input.page_table_base_addr = gang->process->pd_gpu_addr;
+
+       queue_input.page_table_base_addr =
+               adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+               adev->gmc.vram_start;
+
         queue_input.process_va_start = 0;
         queue_input.process_va_end =
                 (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
@@ -629,7 +682,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
         if (r) {
                 DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
                           qprops->doorbell_off);
-               goto clean_up_mqd;
+               goto clean_up_doorbell;
         }
  
         DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
@@ -645,11 +698,9 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
         queue->gang = gang;
         list_add_tail(&queue->list, &gang->queue_list);
  
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return 0;
  
-clean_up_mqd:
-       amdgpu_mes_queue_free_mqd(queue);
  clean_up_doorbell:
         amdgpu_mes_queue_doorbell_free(adev, gang->process,
                                        qprops->doorbell_off);
@@ -657,9 +708,11 @@ clean_up_queue_id:
         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
         idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
         spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+       amdgpu_mes_unlock(&adev->mes);
+       amdgpu_mes_queue_free_mqd(queue);
  clean_up_memory:
         kfree(queue);
-       mutex_unlock(&adev->mes.mutex);
         return r;
  }
  
@@ -671,7 +724,11 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
         struct mes_remove_queue_input queue_input;
         int r;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
  
         /* remove the mes gang from idr list */
         spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
@@ -679,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
         queue = idr_find(&adev->mes.queue_id_idr, queue_id);
         if (!queue) {
                 spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 DRM_ERROR("queue id %d doesn't exist\n", queue_id);
                 return -EINVAL;
         }
@@ -699,15 +756,42 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
                 DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
                           queue_id);
  
-       amdgpu_mes_queue_free_mqd(queue);
         list_del(&queue->list);
         amdgpu_mes_queue_doorbell_free(adev, gang->process,
                                        queue->doorbell_off);
+       amdgpu_mes_unlock(&adev->mes);
+
+       amdgpu_mes_queue_free_mqd(queue);
         kfree(queue);
-       mutex_unlock(&adev->mes.mutex);
         return 0;
  }
  
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+                                 struct amdgpu_ring *ring,
+                                 enum amdgpu_unmap_queues_action action,
+                                 u64 gpu_addr, u64 seq)
+{
+       struct mes_unmap_legacy_queue_input queue_input;
+       int r;
+
+       amdgpu_mes_lock(&adev->mes);
+
+       queue_input.action = action;
+       queue_input.queue_type = ring->funcs->type;
+       queue_input.doorbell_offset = ring->doorbell_index;
+       queue_input.pipe_id = ring->pipe;
+       queue_input.queue_id = ring->queue;
+       queue_input.trail_fence_addr = gpu_addr;
+       queue_input.trail_fence_data = seq;
+
+       r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+       if (r)
+               DRM_ERROR("failed to unmap legacy queue\n");
+
+       amdgpu_mes_unlock(&adev->mes);
+       return r;
+}
+
  static void
  amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
                                struct amdgpu_ring *ring,
@@ -771,18 +855,22 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
         struct amdgpu_mes_queue_properties qprops = {0};
         int r, queue_id, pasid;
  
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
         if (!gang) {
                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 return -EINVAL;
         }
         pasid = gang->process->pasid;
  
         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
         if (!ring) {
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                 return -ENOMEM;
         }
  
@@ -823,7 +911,7 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
  
         dma_fence_wait(gang->process->vm->last_update, false);
         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
  
         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
         if (r)
@@ -850,7 +938,7 @@ clean_up_ring:
         amdgpu_ring_fini(ring);
  clean_up_memory:
         kfree(ring);
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
         return r;
  }
  
@@ -1086,9 +1174,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
         }
  
         for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
-               /* On sienna cichlid+, fw hasn't supported to map sdma queue. */
-               if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
-                   i == AMDGPU_RING_TYPE_SDMA)
+               /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+               if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+                   adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+                   queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
                         continue;
  
                 r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h

index 548015bb6ee7c26b990c47cdf33c22f284a57974..25590b301f25b8531ac5799c6ab4d6ffd9d3ef28 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -56,7 +56,7 @@ enum admgpu_mes_pipe {
  struct amdgpu_mes {
         struct amdgpu_device            *adev;
  
-       struct mutex                    mutex;
+       struct mutex                    mutex_hidden;
  
         struct idr                      pasid_idr;
         struct idr                      gang_id_idr;
@@ -109,9 +109,11 @@ struct amdgpu_mes {
         uint32_t                        query_status_fence_offs;
         uint64_t                        query_status_fence_gpu_addr;
         uint64_t                        *query_status_fence_ptr;
+       uint32_t                        saved_flags;
  
         /* initialize kiq pipe */
         int                             (*kiq_hw_init)(struct amdgpu_device *adev);
+       int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
  
         /* ip specific functions */
         const struct amdgpu_mes_funcs   *funcs;
@@ -198,6 +200,10 @@ struct mes_add_queue_input {
         uint64_t        wptr_addr;
         uint32_t        queue_type;
         uint32_t        paging;
+       uint32_t        gws_base;
+       uint32_t        gws_size;
+       uint64_t        tba_addr;
+       uint64_t        tma_addr;
  };
  
  struct mes_remove_queue_input {
@@ -205,6 +211,16 @@ struct mes_remove_queue_input {
         uint64_t        gang_context_addr;
  };
  
+struct mes_unmap_legacy_queue_input {
+       enum amdgpu_unmap_queues_action    action;
+       uint32_t                           queue_type;
+       uint32_t                           doorbell_offset;
+       uint32_t                           pipe_id;
+       uint32_t                           queue_id;
+       uint64_t                           trail_fence_addr;
+       uint64_t                           trail_fence_data;
+};
+
  struct mes_suspend_gang_input {
         bool            suspend_all_gangs;
         uint64_t        gang_context_addr;
@@ -224,6 +240,9 @@ struct amdgpu_mes_funcs {
         int (*remove_hw_queue)(struct amdgpu_mes *mes,
                                struct mes_remove_queue_input *input);
  
+       int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+                                 struct mes_unmap_legacy_queue_input *input);
+
         int (*suspend_gang)(struct amdgpu_mes *mes,
                             struct mes_suspend_gang_input *input);
  
@@ -232,6 +251,7 @@ struct amdgpu_mes_funcs {
  };
  
  #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
  
  int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
  
@@ -255,6 +275,11 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
                             int *queue_id);
  int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
  
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+                                 struct amdgpu_ring *ring,
+                                 enum amdgpu_unmap_queues_action action,
+                                 u64 gpu_addr, u64 seq);
+
  int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
                         int queue_type, int idx,
                         struct amdgpu_mes_ctx_data *ctx_data,
@@ -280,4 +305,62 @@ unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
                                         uint32_t doorbell_index,
                                         unsigned int doorbell_id);
  int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+       mutex_lock(&mes->mutex_hidden);
+       mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+       memalloc_noreclaim_restore(mes->saved_flags);
+       mutex_unlock(&mes->mutex_hidden);
+}
  #endif /* __AMDGPU_MES_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index 9042e0b480ce753b8a93ba4a424fc6146922e0fb..3c4f2a94ad9fdbc8ba4f680db1c96e6eff50b21a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3551,8 +3551,14 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
                                    enum amdgpu_unmap_queues_action action,
                                    u64 gpu_addr, u64 seq)
  {
+       struct amdgpu_device *adev = kiq_ring->adev;
         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
  
+       if (!adev->gfx.kiq.ring.sched.ready) {
+               amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
+               return;
+       }
+
         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
                           PACKET3_UNMAP_QUEUES_ACTION(action) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c

index b80b5f70ecf1b2c1ab2621c13a26b12fd4c81f76..61db2a378008439bf8fed11e630eb2c79f34ca2b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -274,7 +274,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         /* For SRIOV run time, driver shouldn't access the register through MMIO
          * Directly use kiq to do the vm invalidation instead
          */
-       if (adev->gfx.kiq.ring.sched.ready &&
+       if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
             (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
                 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
                 const unsigned eng = 17;
@@ -411,6 +411,10 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
         struct amdgpu_device *adev = ring->adev;
         uint32_t reg;
  
+       /* MES fw manages IH_VMID_x_LUT updating */
+       if (ring->is_mes_queue)
+               return;
+
         if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
                 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
         else
@@ -803,6 +807,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
         }
  
         amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr);
+
         r = adev->mmhub.funcs->gart_enable(adev);
         if (r)
                 return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c

index 622aa17b18e789b8e19574316ed1e386c7159aae..030a92b3a0dafdd9361ecf44e0d4319308ca7a18 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -133,6 +133,8 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
  {
         struct amdgpu_device *adev = mes->adev;
         union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+       struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+       uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
  
         memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
  
@@ -141,8 +143,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
         mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
  
         mes_add_queue_pkt.process_id = input->process_id;
-       mes_add_queue_pkt.page_table_base_addr =
-               input->page_table_base_addr - adev->gmc.vram_start;
+       mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
         mes_add_queue_pkt.process_va_start = input->process_va_start;
         mes_add_queue_pkt.process_va_end = input->process_va_end;
         mes_add_queue_pkt.process_quantum = input->process_quantum;
@@ -159,6 +160,10 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
         mes_add_queue_pkt.queue_type =
                 convert_to_mes_queue_type(input->queue_type);
         mes_add_queue_pkt.paging = input->paging;
+       mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+       mes_add_queue_pkt.gws_base = input->gws_base;
+       mes_add_queue_pkt.gws_size = input->gws_size;
+       mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
  
         mes_add_queue_pkt.api_status.api_completion_fence_addr =
                 mes->ring.fence_drv.gpu_addr;
@@ -192,6 +197,44 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
                         &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
  }
  
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+                                struct mes_unmap_legacy_queue_input *input)
+{
+       union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+       memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+       mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+       mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+       mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+       mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+       mes_remove_queue_pkt.gang_context_addr = 0;
+
+       mes_remove_queue_pkt.pipe_id = input->pipe_id;
+       mes_remove_queue_pkt.queue_id = input->queue_id;
+
+       if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+               mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+               mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+               mes_remove_queue_pkt.tf_data =
+                       lower_32_bits(input->trail_fence_data);
+       } else {
+               if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+                       mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+               else
+                       mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+       }
+
+       mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+               mes->ring.fence_drv.gpu_addr;
+       mes_remove_queue_pkt.api_status.api_completion_fence_value =
+               ++mes->ring.fence_drv.sync_seq;
+
+       return mes_v10_1_submit_pkt_and_poll_completion(mes,
+                       &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+}
+
  static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
                                   struct mes_suspend_gang_input *input)
  {
@@ -254,9 +297,21 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
                 mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
  
         for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
-               mes_set_hw_res_pkt.agreegated_doorbells[i] =
+               mes_set_hw_res_pkt.aggregated_doorbells[i] =
                         mes->agreegated_doorbells[i];
  
+       for (i = 0; i < 5; i++) {
+               mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+               mes_set_hw_res_pkt.mmhub_base[i] =
+                       adev->reg_offset[MMHUB_HWIP][0][i];
+               mes_set_hw_res_pkt.osssys_base[i] =
+                       adev->reg_offset[OSSSYS_HWIP][0][i];
+       }
+
+       mes_set_hw_res_pkt.disable_reset = 1;
+       mes_set_hw_res_pkt.disable_mes_log = 1;
+       mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
         mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
                 mes->ring.fence_drv.gpu_addr;
         mes_set_hw_res_pkt.api_status.api_completion_fence_value =
@@ -269,6 +324,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
  static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
         .add_hw_queue = mes_v10_1_add_hw_queue,
         .remove_hw_queue = mes_v10_1_remove_hw_queue,
+       .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
         .suspend_gang = mes_v10_1_suspend_gang,
         .resume_gang = mes_v10_1_resume_gang,
  };
@@ -1097,6 +1153,13 @@ static int mes_v10_1_hw_init(void *handle)
                 goto failure;
         }
  
+       /*
+        * Disable KIQ ring usage from the driver once MES is enabled.
+        * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+        * with MES enabled.
+        */
+       adev->gfx.kiq.ring.sched.ready = false;
+
         return 0;
  
  failure:
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h b/drivers/gpu/drm/amd/include/mes_api_def.h

similarity index 68%

rename from drivers/gpu/drm/amd/amdgpu/mes_api_def.h

rename to drivers/gpu/drm/amd/include/mes_api_def.h

index 3f4fca5fd1dad2b423618523ce3c18b9e7fd92ce..b2a8503feec06cd1cf84258a8aea771dc08b3423 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/mes_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_api_def.h
@@ -59,6 +59,8 @@ enum MES_SCH_API_OPCODE {
         MES_SCH_API_PROGRAM_GDS                 = 12,
         MES_SCH_API_SET_DEBUG_VMID              = 13,
         MES_SCH_API_MISC                        = 14,
+       MES_SCH_API_UPDATE_ROOT_PAGE_TABLE      = 15,
+       MES_SCH_API_AMD_LOG                     = 16,
         MES_SCH_API_MAX                         = 0xFF
  };
  
@@ -116,7 +118,12 @@ enum { MAX_VMID_GCHUB = 16 };
  enum { MAX_VMID_MMHUB = 16 };
  
  enum MES_LOG_OPERATION {
-       MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
+       MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0,
+       MES_LOG_OPERATION_QUEUE_NEW_WORK = 1,
+       MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2,
+       MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3,
+       MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4,
+       MES_LOG_OPERATION_QUEUE_INVALID = 0xF,
  };
  
  enum MES_LOG_CONTEXT_STATE {
@@ -124,6 +131,7 @@ enum MES_LOG_CONTEXT_STATE {
         MES_LOG_CONTEXT_STATE_RUNNING           = 1,
         MES_LOG_CONTEXT_STATE_READY             = 2,
         MES_LOG_CONTEXT_STATE_READY_STANDBY     = 3,
+       MES_LOG_CONTEXT_STATE_INVALID           = 0xF,
  };
  
  struct MES_LOG_CONTEXT_STATE_CHANGE {
@@ -131,6 +139,26 @@ struct MES_LOG_CONTEXT_STATE_CHANGE {
         enum MES_LOG_CONTEXT_STATE      new_context_state;
  };
  
+struct MES_LOG_QUEUE_NEW_WORK {
+       uint64_t                   h_queue;
+       uint64_t                   reserved;
+};
+
+struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT {
+       uint64_t                   h_queue;
+       uint64_t                   h_sync_object;
+};
+
+struct MES_LOG_QUEUE_NO_MORE_WORK {
+       uint64_t                   h_queue;
+       uint64_t                   reserved;
+};
+
+struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT {
+       uint64_t                   h_queue;
+       uint64_t                   h_sync_object;
+};
+
  struct MES_LOG_ENTRY_HEADER {
         uint32_t        first_free_entry_index;
         uint32_t        wraparound_count;
@@ -143,8 +171,12 @@ struct MES_LOG_ENTRY_DATA {
         uint32_t        operation_type; /* operation_type is of MES_LOG_OPERATION type */
         uint32_t        reserved_operation_type_bits;
         union {
-               struct MES_LOG_CONTEXT_STATE_CHANGE     context_state_change;
-               uint64_t                                reserved_operation_data[2];
+               struct MES_LOG_CONTEXT_STATE_CHANGE     context_state_change;
+               struct MES_LOG_QUEUE_NEW_WORK           queue_new_work;
+               struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object;
+               struct MES_LOG_QUEUE_NO_MORE_WORK       queue_no_more_work;
+               struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT   queue_wait_sync_object;
+               uint64_t                                all[2];
         };
  };
  
@@ -153,6 +185,10 @@ struct MES_LOG_BUFFER {
         struct MES_LOG_ENTRY_DATA       entries[1];
  };
  
+enum MES_SWIP_TO_HWIP_DEF {
+       MES_MAX_HWIP_SEGMENT = 6,
+};
+
  union MESAPI_SET_HW_RESOURCES {
         struct {
                 union MES_API_HEADER    header;
@@ -163,14 +199,26 @@ union MESAPI_SET_HW_RESOURCES {
                 uint32_t                compute_hqd_mask[MAX_COMPUTE_PIPES];
                 uint32_t                gfx_hqd_mask[MAX_GFX_PIPES];
                 uint32_t                sdma_hqd_mask[MAX_SDMA_PIPES];
-               uint32_t                agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
+               uint32_t                aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS];
                 uint64_t                g_sch_ctx_gpu_mc_ptr;
                 uint64_t                query_status_fence_gpu_mc_ptr;
+               uint32_t                gc_base[MES_MAX_HWIP_SEGMENT];
+               uint32_t                mmhub_base[MES_MAX_HWIP_SEGMENT];
+               uint32_t                osssys_base[MES_MAX_HWIP_SEGMENT];
                 struct MES_API_STATUS   api_status;
                 union {
                         struct {
                                 uint32_t disable_reset  : 1;
-                               uint32_t reserved       : 31;
+                               uint32_t use_different_vmid_compute : 1;
+                               uint32_t disable_mes_log   : 1;
+                               uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1;
+                               uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
+                               uint32_t second_gfx_pipe_enabled : 1;
+                               uint32_t enable_level_process_quantum_check : 1;
+                               uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1;
+                               uint32_t enable_mqd_active_poll : 1;
+                               uint32_t disable_timer_int : 1;
+                               uint32_t reserved       : 22;
                         };
                         uint32_t        uint32_t_all;
                 };
@@ -195,12 +243,16 @@ union MESAPI__ADD_QUEUE {
                 uint32_t                        doorbell_offset;
                 uint64_t                        mqd_addr;
                 uint64_t                        wptr_addr;
+               uint64_t                        h_context;
+               uint64_t                        h_queue;
                 enum MES_QUEUE_TYPE             queue_type;
                 uint32_t                        gds_base;
                 uint32_t                        gds_size;
                 uint32_t                        gws_base;
                 uint32_t                        gws_size;
                 uint32_t                        oa_mask;
+               uint64_t                        trap_handler_addr;
+               uint32_t                        vm_context_cntl;
  
                 struct {
                         uint32_t paging                 : 1;
@@ -208,7 +260,8 @@ union MESAPI__ADD_QUEUE {
                         uint32_t program_gds            : 1;
                         uint32_t is_gang_suspended      : 1;
                         uint32_t is_tmz_queue           : 1;
-                       uint32_t reserved               : 24;
+                       uint32_t map_kiq_utility_queue  : 1;
+                       uint32_t reserved               : 23;
                 };
                 struct MES_API_STATUS           api_status;
         };
@@ -223,10 +276,18 @@ union MESAPI__REMOVE_QUEUE {
                 uint64_t                gang_context_addr;
  
                 struct {
-                       uint32_t unmap_legacy_gfx_queue : 1;
-                       uint32_t reserved               : 31;
+                       uint32_t unmap_legacy_gfx_queue   : 1;
+                       uint32_t unmap_kiq_utility_queue  : 1;
+                       uint32_t preempt_legacy_gfx_queue : 1;
+                       uint32_t reserved                 : 29;
                 };
-               struct MES_API_STATUS   api_status;
+               struct MES_API_STATUS       api_status;
+
+               uint32_t                    pipe_id;
+               uint32_t                    queue_id;
+
+               uint64_t                    tf_addr;
+               uint32_t                    tf_data;
         };
  
         uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -321,16 +382,45 @@ union MESAPI__RESUME {
  
  union MESAPI__RESET {
         struct {
-               union MES_API_HEADER    header;
+               union MES_API_HEADER            header;
  
                 struct {
-                       uint32_t reset_queue    : 1;
-                       uint32_t reserved       : 31;
+                       /* Only reset the queue given by doorbell_offset (not entire gang) */
+                       uint32_t                reset_queue_only : 1;
+                       /* Hang detection first then reset any queues that are hung */
+                       uint32_t                hang_detect_then_reset : 1;
+                       /* Only do hang detection (no reset) */
+                       uint32_t                hang_detect_only : 1;
+                       /* Rest HP and LP kernel queues not managed by MES */
+                       uint32_t                reset_legacy_gfx : 1;
+                       uint32_t                reserved : 28;
                 };
  
-               uint64_t                gang_context_addr;
-               uint32_t                doorbell_offset; /* valid only if reset_queue = true */
-               struct MES_API_STATUS   api_status;
+               uint64_t                        gang_context_addr;
+
+               /* valid only if reset_queue_only = true */
+               uint32_t                        doorbell_offset;
+
+               /* valid only if hang_detect_then_reset = true */
+               uint64_t                        doorbell_offset_addr;
+               enum MES_QUEUE_TYPE             queue_type;
+
+               /* valid only if reset_legacy_gfx = true */
+               uint32_t                        pipe_id_lp;
+               uint32_t                        queue_id_lp;
+               uint32_t                        vmid_id_lp;
+               uint64_t                        mqd_mc_addr_lp;
+               uint32_t                        doorbell_offset_lp;
+               uint64_t                        wptr_addr_lp;
+
+               uint32_t                        pipe_id_hp;
+               uint32_t                        queue_id_hp;
+               uint32_t                        vmid_id_hp;
+               uint64_t                        mqd_mc_addr_hp;
+               uint32_t                        doorbell_offset_hp;
+               uint64_t                        wptr_addr_hp;
+
+               struct MES_API_STATUS           api_status;
         };
  
         uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
@@ -408,6 +498,8 @@ union MESAPI__SET_DEBUG_VMID {
  
  enum MESAPI_MISC_OPCODE {
         MESAPI_MISC__MODIFY_REG,
+       MESAPI_MISC__INV_GART,
+       MESAPI_MISC__QUERY_STATUS,
         MESAPI_MISC__MAX,
  };
  
@@ -420,6 +512,21 @@ enum MODIFY_REG_SUBCODE {
  
  enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
  
+struct MODIFY_REG {
+       enum MODIFY_REG_SUBCODE   subcode;
+       uint32_t                  reg_offset;
+       uint32_t                  reg_value;
+};
+
+struct INV_GART {
+       uint64_t                  inv_range_va_start;
+       uint64_t                  inv_range_size;
+};
+
+struct QUERY_STATUS {
+       uint32_t context_id;
+};
+
  union MESAPI__MISC {
         struct {
                 union MES_API_HEADER    header;
@@ -427,11 +534,9 @@ union MESAPI__MISC {
                 struct MES_API_STATUS   api_status;
  
                 union {
-                       struct {
-                               enum MODIFY_REG_SUBCODE subcode;
-                               uint32_t                reg_offset;
-                               uint32_t                reg_value;
-                       } modify_reg;
+                       struct          MODIFY_REG modify_reg;
+                       struct          INV_GART inv_gart;
+                       struct          QUERY_STATUS query_status;
                         uint32_t        data[MISC_DATA_MAX_SIZE_IN_DWORDS];
                 };
         };
@@ -439,5 +544,27 @@ union MESAPI__MISC {
         uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
  };
  
+union MESAPI__UPDATE_ROOT_PAGE_TABLE {
+       struct {
+               union MES_API_HEADER        header;
+               uint64_t                    page_table_base_addr;
+               uint64_t                    process_context_addr;
+               struct MES_API_STATUS       api_status;
+       };
+
+       uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI_AMD_LOG {
+       struct {
+               union MES_API_HEADER        header;
+               uint64_t                    p_buffer_memory;
+               uint64_t                    p_buffer_size_used;
+               struct MES_API_STATUS       api_status;
+       };
+
+       uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
  #pragma pack(pop)
  #endif
author	Jack Xiao <Jack.Xiao@amd.com>
	Wed, 13 Apr 2022 18:30:37 +0000 (14:30 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 4 May 2022 14:43:54 +0000 (10:43 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/mes_v10_1.c		patch \| blob \| history
drivers/gpu/drm/amd/include/mes_api_def.h	[moved from drivers/gpu/drm/amd/amdgpu/mes_api_def.h with 68% similarity]	patch \| blob \| history