idr_init(&adev->mes.queue_id_idr);
        ida_init(&adev->mes.doorbell_ida);
        spin_lock_init(&adev->mes.queue_id_lock);
-       mutex_init(&adev->mes.mutex);
+       mutex_init(&adev->mes.mutex_hidden);
 
        adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
        adev->mes.vmid_mask_mmhub = 0xffffff00;
        for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
                adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
 
-       for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++)
-               adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+       for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
+               if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
+                       adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
+               else
+                       adev->mes.sdma_hqd_mask[i] = 0xfc;
+       }
 
        for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
                adev->mes.agreegated_doorbells[i] = 0xffffffff;
        idr_destroy(&adev->mes.gang_id_idr);
        idr_destroy(&adev->mes.queue_id_idr);
        ida_destroy(&adev->mes.doorbell_ida);
-       mutex_destroy(&adev->mes.mutex);
+       mutex_destroy(&adev->mes.mutex_hidden);
        return r;
 }
 
        idr_destroy(&adev->mes.gang_id_idr);
        idr_destroy(&adev->mes.queue_id_idr);
        ida_destroy(&adev->mes.doorbell_ida);
-       mutex_destroy(&adev->mes.mutex);
+       mutex_destroy(&adev->mes.mutex_hidden);
+}
+
+static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
+{
+       amdgpu_bo_free_kernel(&q->mqd_obj,
+                             &q->mqd_gpu_addr,
+                             &q->mqd_cpu_ptr);
 }
 
 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
        struct amdgpu_mes_process *process;
        int r;
 
-       mutex_lock(&adev->mes.mutex);
-
        /* allocate the mes process buffer */
        process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
        if (!process) {
                DRM_ERROR("no more memory to create mes process\n");
-               mutex_unlock(&adev->mes.mutex);
                return -ENOMEM;
        }
 
        if (!process->doorbell_bitmap) {
                DRM_ERROR("failed to allocate doorbell bitmap\n");
                kfree(process);
-               mutex_unlock(&adev->mes.mutex);
                return -ENOMEM;
        }
 
-       /* add the mes process to idr list */
-       r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
-                     GFP_KERNEL);
-       if (r < 0) {
-               DRM_ERROR("failed to lock pasid=%d\n", pasid);
-               goto clean_up_memory;
-       }
-
        /* allocate the process context bo and map it */
        r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT,
                                    &process->proc_ctx_cpu_ptr);
        if (r) {
                DRM_ERROR("failed to allocate process context bo\n");
-               goto clean_up_pasid;
+               goto clean_up_memory;
        }
        memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
 
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       /* add the mes process to idr list */
+       r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
+                     GFP_KERNEL);
+       if (r < 0) {
+               DRM_ERROR("failed to lock pasid=%d\n", pasid);
+               goto clean_up_ctx;
+       }
+
        /* allocate the starting doorbell index of the process */
        r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index);
        if (r < 0) {
                DRM_ERROR("failed to allocate doorbell for process\n");
-               goto clean_up_ctx;
+               goto clean_up_pasid;
        }
 
        DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index);
        process->process_quantum = adev->mes.default_process_quantum;
        process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
 
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return 0;
 
+clean_up_pasid:
+       idr_remove(&adev->mes.pasid_idr, pasid);
+       amdgpu_mes_unlock(&adev->mes);
 clean_up_ctx:
        amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                              &process->proc_ctx_gpu_addr,
                              &process->proc_ctx_cpu_ptr);
-clean_up_pasid:
-       idr_remove(&adev->mes.pasid_idr, pasid);
 clean_up_memory:
        kfree(process->doorbell_bitmap);
        kfree(process);
-       mutex_unlock(&adev->mes.mutex);
        return r;
 }
 
        unsigned long flags;
        int r;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
 
        process = idr_find(&adev->mes.pasid_idr, pasid);
        if (!process) {
                DRM_WARN("pasid %d doesn't exist\n", pasid);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                return;
        }
 
-       /* free all gangs in the process */
+       /* Remove all queues from hardware */
        list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
-               /* free all queues in the gang */
                list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
                        spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
                        idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
                                                             &queue_input);
                        if (r)
                                DRM_WARN("failed to remove hardware queue\n");
+               }
+
+               idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+       }
 
+       amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
+       idr_remove(&adev->mes.pasid_idr, pasid);
+       amdgpu_mes_unlock(&adev->mes);
+
+       /* free all memory allocated by the process */
+       list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
+               /* free all queues in the gang */
+               list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
+                       amdgpu_mes_queue_free_mqd(queue);
                        list_del(&queue->list);
                        kfree(queue);
                }
-
-               idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
                amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
                                      &gang->gang_ctx_gpu_addr,
                                      &gang->gang_ctx_cpu_ptr);
                list_del(&gang->list);
                kfree(gang);
-       }
 
-       amdgpu_mes_free_process_doorbells(adev, process->doorbell_index);
-
-       idr_remove(&adev->mes.pasid_idr, pasid);
+       }
        amdgpu_bo_free_kernel(&process->proc_ctx_bo,
                              &process->proc_ctx_gpu_addr,
                              &process->proc_ctx_cpu_ptr);
        kfree(process->doorbell_bitmap);
        kfree(process);
-
-       mutex_unlock(&adev->mes.mutex);
 }
 
 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
        struct amdgpu_mes_gang *gang;
        int r;
 
-       mutex_lock(&adev->mes.mutex);
-
-       process = idr_find(&adev->mes.pasid_idr, pasid);
-       if (!process) {
-               DRM_ERROR("pasid %d doesn't exist\n", pasid);
-               mutex_unlock(&adev->mes.mutex);
-               return -EINVAL;
-       }
-
        /* allocate the mes gang buffer */
        gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
        if (!gang) {
-               mutex_unlock(&adev->mes.mutex);
                return -ENOMEM;
        }
 
-       /* add the mes gang to idr list */
-       r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
-                     GFP_KERNEL);
-       if (r < 0) {
-               kfree(gang);
-               mutex_unlock(&adev->mes.mutex);
-               return r;
-       }
-
-       gang->gang_id = r;
-       *gang_id = r;
-
        /* allocate the gang context bo and map it to cpu space */
        r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT,
                                    &gang->gang_ctx_cpu_ptr);
        if (r) {
                DRM_ERROR("failed to allocate process context bo\n");
-               goto clean_up;
+               goto clean_up_mem;
        }
        memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
 
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       process = idr_find(&adev->mes.pasid_idr, pasid);
+       if (!process) {
+               DRM_ERROR("pasid %d doesn't exist\n", pasid);
+               r = -EINVAL;
+               goto clean_up_ctx;
+       }
+
+       /* add the mes gang to idr list */
+       r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
+                     GFP_KERNEL);
+       if (r < 0) {
+               DRM_ERROR("failed to allocate idr for gang\n");
+               goto clean_up_ctx;
+       }
+
+       gang->gang_id = r;
+       *gang_id = r;
+
        INIT_LIST_HEAD(&gang->queue_list);
        gang->process = process;
        gang->priority = gprops->priority;
        gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
        list_add_tail(&gang->list, &process->gang_list);
 
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return 0;
 
-clean_up:
-       idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+clean_up_ctx:
+       amdgpu_mes_unlock(&adev->mes);
+       amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
+                             &gang->gang_ctx_gpu_addr,
+                             &gang->gang_ctx_cpu_ptr);
+clean_up_mem:
        kfree(gang);
-       mutex_unlock(&adev->mes.mutex);
        return r;
 }
 
 {
        struct amdgpu_mes_gang *gang;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
 
        gang = idr_find(&adev->mes.gang_id_idr, gang_id);
        if (!gang) {
                DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                return -EINVAL;
        }
 
        if (!list_empty(&gang->queue_list)) {
                DRM_ERROR("queue list is not empty\n");
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                return -EBUSY;
        }
 
        idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
+       list_del(&gang->list);
+       amdgpu_mes_unlock(&adev->mes);
+
        amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
                              &gang->gang_ctx_gpu_addr,
                              &gang->gang_ctx_cpu_ptr);
-       list_del(&gang->list);
+
        kfree(gang);
 
-       mutex_unlock(&adev->mes.mutex);
        return 0;
 }
 
        struct mes_suspend_gang_input input;
        int r, pasid;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
 
        idp = &adev->mes.pasid_idr;
 
                }
        }
 
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return 0;
 }
 
        struct mes_resume_gang_input input;
        int r, pasid;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
 
        idp = &adev->mes.pasid_idr;
 
                }
        }
 
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return 0;
 }
 
-static int amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
                                     struct amdgpu_mes_queue *q,
                                     struct amdgpu_mes_queue_properties *p)
 {
        struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
        u32 mqd_size = mqd_mgr->mqd_size;
-       struct amdgpu_mqd_prop mqd_prop = {0};
        int r;
 
        r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
        }
        memset(q->mqd_cpu_ptr, 0, mqd_size);
 
+       r = amdgpu_bo_reserve(q->mqd_obj, false);
+       if (unlikely(r != 0))
+               goto clean_up;
+
+       return 0;
+
+clean_up:
+       amdgpu_bo_free_kernel(&q->mqd_obj,
+                             &q->mqd_gpu_addr,
+                             &q->mqd_cpu_ptr);
+       return r;
+}
+
+static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
+                                    struct amdgpu_mes_queue *q,
+                                    struct amdgpu_mes_queue_properties *p)
+{
+       struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
+       struct amdgpu_mqd_prop mqd_prop = {0};
+
        mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
        mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
        mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
        mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
        mqd_prop.hqd_active = false;
 
-       r = amdgpu_bo_reserve(q->mqd_obj, false);
-       if (unlikely(r != 0))
-               goto clean_up;
-
        mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
 
        amdgpu_bo_unreserve(q->mqd_obj);
-       return 0;
-
-clean_up:
-       amdgpu_bo_free_kernel(&q->mqd_obj,
-                             &q->mqd_gpu_addr,
-                             &q->mqd_cpu_ptr);
-       return r;
-}
-
-static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
-{
-       amdgpu_bo_free_kernel(&q->mqd_obj,
-                             &q->mqd_gpu_addr,
-                             &q->mqd_cpu_ptr);
 }
 
 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
        unsigned long flags;
        int r;
 
-       mutex_lock(&adev->mes.mutex);
-
-       gang = idr_find(&adev->mes.gang_id_idr, gang_id);
-       if (!gang) {
-               DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
-               return -EINVAL;
-       }
-
        /* allocate the mes queue buffer */
        queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
        if (!queue) {
-               mutex_unlock(&adev->mes.mutex);
+               DRM_ERROR("Failed to allocate memory for queue\n");
                return -ENOMEM;
        }
 
+       /* Allocate the queue mqd */
+       r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
+       if (r)
+               goto clean_up_memory;
+
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
+
+       gang = idr_find(&adev->mes.gang_id_idr, gang_id);
+       if (!gang) {
+               DRM_ERROR("gang id %d doesn't exist\n", gang_id);
+               r = -EINVAL;
+               goto clean_up_mqd;
+       }
+
        /* add the mes gang to idr list */
        spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
        r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
                      GFP_ATOMIC);
        if (r < 0) {
                spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-               goto clean_up_memory;
+               goto clean_up_mqd;
        }
        spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
        *queue_id = queue->queue_id = r;
                goto clean_up_queue_id;
 
        /* initialize the queue mqd */
-       r = amdgpu_mes_queue_init_mqd(adev, queue, qprops);
-       if (r)
-               goto clean_up_doorbell;
+       amdgpu_mes_queue_init_mqd(adev, queue, qprops);
 
        /* add hw queue to mes */
        queue_input.process_id = gang->process->pasid;
-       queue_input.page_table_base_addr = gang->process->pd_gpu_addr;
+
+       queue_input.page_table_base_addr =
+               adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
+               adev->gmc.vram_start;
+
        queue_input.process_va_start = 0;
        queue_input.process_va_end =
                (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
        if (r) {
                DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
                          qprops->doorbell_off);
-               goto clean_up_mqd;
+               goto clean_up_doorbell;
        }
 
        DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
        queue->gang = gang;
        list_add_tail(&queue->list, &gang->queue_list);
 
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return 0;
 
-clean_up_mqd:
-       amdgpu_mes_queue_free_mqd(queue);
 clean_up_doorbell:
        amdgpu_mes_queue_doorbell_free(adev, gang->process,
                                       qprops->doorbell_off);
        spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
        idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
        spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
+clean_up_mqd:
+       amdgpu_mes_unlock(&adev->mes);
+       amdgpu_mes_queue_free_mqd(queue);
 clean_up_memory:
        kfree(queue);
-       mutex_unlock(&adev->mes.mutex);
        return r;
 }
 
        struct mes_remove_queue_input queue_input;
        int r;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
 
        /* remove the mes gang from idr list */
        spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
        queue = idr_find(&adev->mes.queue_id_idr, queue_id);
        if (!queue) {
                spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                DRM_ERROR("queue id %d doesn't exist\n", queue_id);
                return -EINVAL;
        }
                DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
                          queue_id);
 
-       amdgpu_mes_queue_free_mqd(queue);
        list_del(&queue->list);
        amdgpu_mes_queue_doorbell_free(adev, gang->process,
                                       queue->doorbell_off);
+       amdgpu_mes_unlock(&adev->mes);
+
+       amdgpu_mes_queue_free_mqd(queue);
        kfree(queue);
-       mutex_unlock(&adev->mes.mutex);
        return 0;
 }
 
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+                                 struct amdgpu_ring *ring,
+                                 enum amdgpu_unmap_queues_action action,
+                                 u64 gpu_addr, u64 seq)
+{
+       struct mes_unmap_legacy_queue_input queue_input;
+       int r;
+
+       amdgpu_mes_lock(&adev->mes);
+
+       queue_input.action = action;
+       queue_input.queue_type = ring->funcs->type;
+       queue_input.doorbell_offset = ring->doorbell_index;
+       queue_input.pipe_id = ring->pipe;
+       queue_input.queue_id = ring->queue;
+       queue_input.trail_fence_addr = gpu_addr;
+       queue_input.trail_fence_data = seq;
+
+       r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
+       if (r)
+               DRM_ERROR("failed to unmap legacy queue\n");
+
+       amdgpu_mes_unlock(&adev->mes);
+       return r;
+}
+
 static void
 amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
                               struct amdgpu_ring *ring,
        struct amdgpu_mes_queue_properties qprops = {0};
        int r, queue_id, pasid;
 
-       mutex_lock(&adev->mes.mutex);
+       /*
+        * Avoid taking any other locks under MES lock to avoid circular
+        * lock dependencies.
+        */
+       amdgpu_mes_lock(&adev->mes);
        gang = idr_find(&adev->mes.gang_id_idr, gang_id);
        if (!gang) {
                DRM_ERROR("gang id %d doesn't exist\n", gang_id);
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                return -EINVAL;
        }
        pasid = gang->process->pasid;
 
        ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
        if (!ring) {
-               mutex_unlock(&adev->mes.mutex);
+               amdgpu_mes_unlock(&adev->mes);
                return -ENOMEM;
        }
 
 
        dma_fence_wait(gang->process->vm->last_update, false);
        dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
 
        r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
        if (r)
        amdgpu_ring_fini(ring);
 clean_up_memory:
        kfree(ring);
-       mutex_unlock(&adev->mes.mutex);
+       amdgpu_mes_unlock(&adev->mes);
        return r;
 }
 
        }
 
        for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
-               /* On sienna cichlid+, fw hasn't supported to map sdma queue. */
-               if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
-                   i == AMDGPU_RING_TYPE_SDMA)
+               /* On GFX v10.3, fw hasn't supported to map sdma queue. */
+               if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
+                   adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
+                   queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
                        continue;
 
                r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
 
 struct amdgpu_mes {
        struct amdgpu_device            *adev;
 
-       struct mutex                    mutex;
+       struct mutex                    mutex_hidden;
 
        struct idr                      pasid_idr;
        struct idr                      gang_id_idr;
        uint32_t                        query_status_fence_offs;
        uint64_t                        query_status_fence_gpu_addr;
        uint64_t                        *query_status_fence_ptr;
+       uint32_t                        saved_flags;
 
        /* initialize kiq pipe */
        int                             (*kiq_hw_init)(struct amdgpu_device *adev);
+       int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
 
        /* ip specific functions */
        const struct amdgpu_mes_funcs   *funcs;
        uint64_t        wptr_addr;
        uint32_t        queue_type;
        uint32_t        paging;
+       uint32_t        gws_base;
+       uint32_t        gws_size;
+       uint64_t        tba_addr;
+       uint64_t        tma_addr;
 };
 
 struct mes_remove_queue_input {
        uint64_t        gang_context_addr;
 };
 
+struct mes_unmap_legacy_queue_input {
+       enum amdgpu_unmap_queues_action    action;
+       uint32_t                           queue_type;
+       uint32_t                           doorbell_offset;
+       uint32_t                           pipe_id;
+       uint32_t                           queue_id;
+       uint64_t                           trail_fence_addr;
+       uint64_t                           trail_fence_data;
+};
+
 struct mes_suspend_gang_input {
        bool            suspend_all_gangs;
        uint64_t        gang_context_addr;
        int (*remove_hw_queue)(struct amdgpu_mes *mes,
                               struct mes_remove_queue_input *input);
 
+       int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
+                                 struct mes_unmap_legacy_queue_input *input);
+
        int (*suspend_gang)(struct amdgpu_mes *mes,
                            struct mes_suspend_gang_input *input);
 
 };
 
 #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
+#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
 
 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
 
                            int *queue_id);
 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
 
+int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
+                                 struct amdgpu_ring *ring,
+                                 enum amdgpu_unmap_queues_action action,
+                                 u64 gpu_addr, u64 seq);
+
 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
                        int queue_type, int idx,
                        struct amdgpu_mes_ctx_data *ctx_data,
                                        uint32_t doorbell_index,
                                        unsigned int doorbell_id);
 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
+
+/*
+ * MES lock can be taken in MMU notifiers.
+ *
+ * A bit more detail about why to set no-FS reclaim with MES lock:
+ *
+ * The purpose of the MMU notifier is to stop GPU access to memory so
+ * that the Linux VM subsystem can move pages around safely. This is
+ * done by preempting user mode queues for the affected process. When
+ * MES is used, MES lock needs to be taken to preempt the queues.
+ *
+ * The MMU notifier callback entry point in the driver is
+ * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
+ * there is:
+ * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
+ * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
+ *
+ * The last part of the chain is a function pointer where we take the
+ * MES lock.
+ *
+ * The problem with taking locks in the MMU notifier is, that MMU
+ * notifiers can be called in reclaim-FS context. That's where the
+ * kernel frees up pages to make room for new page allocations under
+ * memory pressure. While we are running in reclaim-FS context, we must
+ * not trigger another memory reclaim operation because that would
+ * recursively reenter the reclaim code and cause a deadlock. The
+ * memalloc_nofs_save/restore calls guarantee that.
+ *
+ * In addition we also need to avoid lock dependencies on other locks taken
+ * under the MES lock, for example reservation locks. Here is a possible
+ * scenario of a deadlock:
+ * Thread A: takes and holds reservation lock | triggers reclaim-FS |
+ * MMU notifier | blocks trying to take MES lock
+ * Thread B: takes and holds MES lock | blocks trying to take reservation lock
+ *
+ * In this scenario Thread B gets involved in a deadlock even without
+ * triggering a reclaim-FS operation itself.
+ * To fix this and break the lock dependency chain you'd need to either:
+ * 1. protect reservation locks with memalloc_nofs_save/restore, or
+ * 2. avoid taking reservation locks under the MES lock.
+ *
+ * Reservation locks are taken all over the kernel in different subsystems, we
+ * have no control over them and their lock dependencies.So the only workable
+ * solution is to avoid taking other locks under the MES lock.
+ * As a result, make sure no reclaim-FS happens while holding this lock anywhere
+ * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
+{
+       mutex_lock(&mes->mutex_hidden);
+       mes->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
+{
+       memalloc_noreclaim_restore(mes->saved_flags);
+       mutex_unlock(&mes->mutex_hidden);
+}
 #endif /* __AMDGPU_MES_H__ */
 
 {
        struct amdgpu_device *adev = mes->adev;
        union MESAPI__ADD_QUEUE mes_add_queue_pkt;
+       struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+       uint32_t vm_cntx_cntl = hub->vm_cntx_cntl;
 
        memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
 
        mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
 
        mes_add_queue_pkt.process_id = input->process_id;
-       mes_add_queue_pkt.page_table_base_addr =
-               input->page_table_base_addr - adev->gmc.vram_start;
+       mes_add_queue_pkt.page_table_base_addr = input->page_table_base_addr;
        mes_add_queue_pkt.process_va_start = input->process_va_start;
        mes_add_queue_pkt.process_va_end = input->process_va_end;
        mes_add_queue_pkt.process_quantum = input->process_quantum;
        mes_add_queue_pkt.queue_type =
                convert_to_mes_queue_type(input->queue_type);
        mes_add_queue_pkt.paging = input->paging;
+       mes_add_queue_pkt.vm_context_cntl = vm_cntx_cntl;
+       mes_add_queue_pkt.gws_base = input->gws_base;
+       mes_add_queue_pkt.gws_size = input->gws_size;
+       mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
 
        mes_add_queue_pkt.api_status.api_completion_fence_addr =
                mes->ring.fence_drv.gpu_addr;
                        &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
 }
 
+static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
+                                struct mes_unmap_legacy_queue_input *input)
+{
+       union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt;
+
+       memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt));
+
+       mes_remove_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
+       mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
+       mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+       mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
+       mes_remove_queue_pkt.gang_context_addr = 0;
+
+       mes_remove_queue_pkt.pipe_id = input->pipe_id;
+       mes_remove_queue_pkt.queue_id = input->queue_id;
+
+       if (input->action == PREEMPT_QUEUES_NO_UNMAP) {
+               mes_remove_queue_pkt.preempt_legacy_gfx_queue = 1;
+               mes_remove_queue_pkt.tf_addr = input->trail_fence_addr;
+               mes_remove_queue_pkt.tf_data =
+                       lower_32_bits(input->trail_fence_data);
+       } else {
+               if (input->queue_type == AMDGPU_RING_TYPE_GFX)
+                       mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
+               else
+                       mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+       }
+
+       mes_remove_queue_pkt.api_status.api_completion_fence_addr =
+               mes->ring.fence_drv.gpu_addr;
+       mes_remove_queue_pkt.api_status.api_completion_fence_value =
+               ++mes->ring.fence_drv.sync_seq;
+
+       return mes_v10_1_submit_pkt_and_poll_completion(mes,
+                       &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+}
+
 static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
                                  struct mes_suspend_gang_input *input)
 {
                mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i];
 
        for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
-               mes_set_hw_res_pkt.agreegated_doorbells[i] =
+               mes_set_hw_res_pkt.aggregated_doorbells[i] =
                        mes->agreegated_doorbells[i];
 
+       for (i = 0; i < 5; i++) {
+               mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
+               mes_set_hw_res_pkt.mmhub_base[i] =
+                       adev->reg_offset[MMHUB_HWIP][0][i];
+               mes_set_hw_res_pkt.osssys_base[i] =
+                       adev->reg_offset[OSSSYS_HWIP][0][i];
+       }
+
+       mes_set_hw_res_pkt.disable_reset = 1;
+       mes_set_hw_res_pkt.disable_mes_log = 1;
+       mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+
        mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
                mes->ring.fence_drv.gpu_addr;
        mes_set_hw_res_pkt.api_status.api_completion_fence_value =
 static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
        .add_hw_queue = mes_v10_1_add_hw_queue,
        .remove_hw_queue = mes_v10_1_remove_hw_queue,
+       .unmap_legacy_queue = mes_v10_1_unmap_legacy_queue,
        .suspend_gang = mes_v10_1_suspend_gang,
        .resume_gang = mes_v10_1_resume_gang,
 };
                goto failure;
        }
 
+       /*
+        * Disable KIQ ring usage from the driver once MES is enabled.
+        * MES uses KIQ ring exclusively so driver cannot access KIQ ring
+        * with MES enabled.
+        */
+       adev->gfx.kiq.ring.sched.ready = false;
+
        return 0;
 
 failure:
 
        MES_SCH_API_PROGRAM_GDS                 = 12,
        MES_SCH_API_SET_DEBUG_VMID              = 13,
        MES_SCH_API_MISC                        = 14,
+       MES_SCH_API_UPDATE_ROOT_PAGE_TABLE      = 15,
+       MES_SCH_API_AMD_LOG                     = 16,
        MES_SCH_API_MAX                         = 0xFF
 };
 
 enum { MAX_VMID_MMHUB = 16 };
 
 enum MES_LOG_OPERATION {
-       MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0
+       MES_LOG_OPERATION_CONTEXT_STATE_CHANGE = 0,
+       MES_LOG_OPERATION_QUEUE_NEW_WORK = 1,
+       MES_LOG_OPERATION_QUEUE_UNWAIT_SYNC_OBJECT = 2,
+       MES_LOG_OPERATION_QUEUE_NO_MORE_WORK = 3,
+       MES_LOG_OPERATION_QUEUE_WAIT_SYNC_OBJECT = 4,
+       MES_LOG_OPERATION_QUEUE_INVALID = 0xF,
 };
 
 enum MES_LOG_CONTEXT_STATE {
        MES_LOG_CONTEXT_STATE_RUNNING           = 1,
        MES_LOG_CONTEXT_STATE_READY             = 2,
        MES_LOG_CONTEXT_STATE_READY_STANDBY     = 3,
+       MES_LOG_CONTEXT_STATE_INVALID           = 0xF,
 };
 
 struct MES_LOG_CONTEXT_STATE_CHANGE {
        enum MES_LOG_CONTEXT_STATE      new_context_state;
 };
 
+struct MES_LOG_QUEUE_NEW_WORK {
+       uint64_t                   h_queue;
+       uint64_t                   reserved;
+};
+
+struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT {
+       uint64_t                   h_queue;
+       uint64_t                   h_sync_object;
+};
+
+struct MES_LOG_QUEUE_NO_MORE_WORK {
+       uint64_t                   h_queue;
+       uint64_t                   reserved;
+};
+
+struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT {
+       uint64_t                   h_queue;
+       uint64_t                   h_sync_object;
+};
+
 struct MES_LOG_ENTRY_HEADER {
        uint32_t        first_free_entry_index;
        uint32_t        wraparound_count;
        uint32_t        operation_type; /* operation_type is of MES_LOG_OPERATION type */
        uint32_t        reserved_operation_type_bits;
        union {
-               struct MES_LOG_CONTEXT_STATE_CHANGE     context_state_change;
-               uint64_t                                reserved_operation_data[2];
+               struct MES_LOG_CONTEXT_STATE_CHANGE     context_state_change;
+               struct MES_LOG_QUEUE_NEW_WORK           queue_new_work;
+               struct MES_LOG_QUEUE_UNWAIT_SYNC_OBJECT queue_unwait_sync_object;
+               struct MES_LOG_QUEUE_NO_MORE_WORK       queue_no_more_work;
+               struct MES_LOG_QUEUE_WAIT_SYNC_OBJECT   queue_wait_sync_object;
+               uint64_t                                all[2];
        };
 };
 
        struct MES_LOG_ENTRY_DATA       entries[1];
 };
 
+enum MES_SWIP_TO_HWIP_DEF {
+       MES_MAX_HWIP_SEGMENT = 6,
+};
+
 union MESAPI_SET_HW_RESOURCES {
        struct {
                union MES_API_HEADER    header;
                uint32_t                compute_hqd_mask[MAX_COMPUTE_PIPES];
                uint32_t                gfx_hqd_mask[MAX_GFX_PIPES];
                uint32_t                sdma_hqd_mask[MAX_SDMA_PIPES];
-               uint32_t                agreegated_doorbells[AMD_PRIORITY_NUM_LEVELS];
+               uint32_t                aggregated_doorbells[AMD_PRIORITY_NUM_LEVELS];
                uint64_t                g_sch_ctx_gpu_mc_ptr;
                uint64_t                query_status_fence_gpu_mc_ptr;
+               uint32_t                gc_base[MES_MAX_HWIP_SEGMENT];
+               uint32_t                mmhub_base[MES_MAX_HWIP_SEGMENT];
+               uint32_t                osssys_base[MES_MAX_HWIP_SEGMENT];
                struct MES_API_STATUS   api_status;
                union {
                        struct {
                                uint32_t disable_reset  : 1;
-                               uint32_t reserved       : 31;
+                               uint32_t use_different_vmid_compute : 1;
+                               uint32_t disable_mes_log   : 1;
+                               uint32_t apply_mmhub_pgvm_invalidate_ack_loss_wa : 1;
+                               uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
+                               uint32_t second_gfx_pipe_enabled : 1;
+                               uint32_t enable_level_process_quantum_check : 1;
+                               uint32_t apply_cwsr_program_all_vmid_sq_shader_tba_registers_wa : 1;
+                               uint32_t enable_mqd_active_poll : 1;
+                               uint32_t disable_timer_int : 1;
+                               uint32_t reserved       : 22;
                        };
                        uint32_t        uint32_t_all;
                };
                uint32_t                        doorbell_offset;
                uint64_t                        mqd_addr;
                uint64_t                        wptr_addr;
+               uint64_t                        h_context;
+               uint64_t                        h_queue;
                enum MES_QUEUE_TYPE             queue_type;
                uint32_t                        gds_base;
                uint32_t                        gds_size;
                uint32_t                        gws_base;
                uint32_t                        gws_size;
                uint32_t                        oa_mask;
+               uint64_t                        trap_handler_addr;
+               uint32_t                        vm_context_cntl;
 
                struct {
                        uint32_t paging                 : 1;
                        uint32_t program_gds            : 1;
                        uint32_t is_gang_suspended      : 1;
                        uint32_t is_tmz_queue           : 1;
-                       uint32_t reserved               : 24;
+                       uint32_t map_kiq_utility_queue  : 1;
+                       uint32_t reserved               : 23;
                };
                struct MES_API_STATUS           api_status;
        };
                uint64_t                gang_context_addr;
 
                struct {
-                       uint32_t unmap_legacy_gfx_queue : 1;
-                       uint32_t reserved               : 31;
+                       uint32_t unmap_legacy_gfx_queue   : 1;
+                       uint32_t unmap_kiq_utility_queue  : 1;
+                       uint32_t preempt_legacy_gfx_queue : 1;
+                       uint32_t reserved                 : 29;
                };
-               struct MES_API_STATUS   api_status;
+               struct MES_API_STATUS       api_status;
+
+               uint32_t                    pipe_id;
+               uint32_t                    queue_id;
+
+               uint64_t                    tf_addr;
+               uint32_t                    tf_data;
        };
 
        uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
 
 union MESAPI__RESET {
        struct {
-               union MES_API_HEADER    header;
+               union MES_API_HEADER            header;
 
                struct {
-                       uint32_t reset_queue    : 1;
-                       uint32_t reserved       : 31;
+                       /* Only reset the queue given by doorbell_offset (not entire gang) */
+                       uint32_t                reset_queue_only : 1;
+                       /* Hang detection first then reset any queues that are hung */
+                       uint32_t                hang_detect_then_reset : 1;
+                       /* Only do hang detection (no reset) */
+                       uint32_t                hang_detect_only : 1;
+                       /* Rest HP and LP kernel queues not managed by MES */
+                       uint32_t                reset_legacy_gfx : 1;
+                       uint32_t                reserved : 28;
                };
 
-               uint64_t                gang_context_addr;
-               uint32_t                doorbell_offset; /* valid only if reset_queue = true */
-               struct MES_API_STATUS   api_status;
+               uint64_t                        gang_context_addr;
+
+               /* valid only if reset_queue_only = true */
+               uint32_t                        doorbell_offset;
+
+               /* valid only if hang_detect_then_reset = true */
+               uint64_t                        doorbell_offset_addr;
+               enum MES_QUEUE_TYPE             queue_type;
+
+               /* valid only if reset_legacy_gfx = true */
+               uint32_t                        pipe_id_lp;
+               uint32_t                        queue_id_lp;
+               uint32_t                        vmid_id_lp;
+               uint64_t                        mqd_mc_addr_lp;
+               uint32_t                        doorbell_offset_lp;
+               uint64_t                        wptr_addr_lp;
+
+               uint32_t                        pipe_id_hp;
+               uint32_t                        queue_id_hp;
+               uint32_t                        vmid_id_hp;
+               uint64_t                        mqd_mc_addr_hp;
+               uint32_t                        doorbell_offset_hp;
+               uint64_t                        wptr_addr_hp;
+
+               struct MES_API_STATUS           api_status;
        };
 
        uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
 
 enum MESAPI_MISC_OPCODE {
        MESAPI_MISC__MODIFY_REG,
+       MESAPI_MISC__INV_GART,
+       MESAPI_MISC__QUERY_STATUS,
        MESAPI_MISC__MAX,
 };
 
 
 enum { MISC_DATA_MAX_SIZE_IN_DWORDS = 20 };
 
+struct MODIFY_REG {
+       enum MODIFY_REG_SUBCODE   subcode;
+       uint32_t                  reg_offset;
+       uint32_t                  reg_value;
+};
+
+struct INV_GART {
+       uint64_t                  inv_range_va_start;
+       uint64_t                  inv_range_size;
+};
+
+struct QUERY_STATUS {
+       uint32_t context_id;
+};
+
 union MESAPI__MISC {
        struct {
                union MES_API_HEADER    header;
                struct MES_API_STATUS   api_status;
 
                union {
-                       struct {
-                               enum MODIFY_REG_SUBCODE subcode;
-                               uint32_t                reg_offset;
-                               uint32_t                reg_value;
-                       } modify_reg;
+                       struct          MODIFY_REG modify_reg;
+                       struct          INV_GART inv_gart;
+                       struct          QUERY_STATUS query_status;
                        uint32_t        data[MISC_DATA_MAX_SIZE_IN_DWORDS];
                };
        };
        uint32_t        max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
 };
 
+union MESAPI__UPDATE_ROOT_PAGE_TABLE {
+       struct {
+               union MES_API_HEADER        header;
+               uint64_t                    page_table_base_addr;
+               uint64_t                    process_context_addr;
+               struct MES_API_STATUS       api_status;
+       };
+
+       uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
+union MESAPI_AMD_LOG {
+       struct {
+               union MES_API_HEADER        header;
+               uint64_t                    p_buffer_memory;
+               uint64_t                    p_buffer_size_used;
+               struct MES_API_STATUS       api_status;
+       };
+
+       uint32_t max_dwords_in_api[API_FRAME_SIZE_IN_DWORDS];
+};
+
 #pragma pack(pop)
 #endif