int r;
        u32 tmp;
        u64 gpu_addr;
+       volatile uint32_t *cpu_ptr = NULL;
 
-       r = amdgpu_device_wb_get(adev, &index);
-       if (r) {
-               dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
-               return r;
-       }
-
-       gpu_addr = adev->wb.gpu_addr + (index * 4);
        tmp = 0xCAFEDEAD;
-       adev->wb.wb[index] = cpu_to_le32(tmp);
 
-       r = amdgpu_ring_alloc(ring, 5);
+       if (ring->is_mes_queue) {
+               uint32_t offset = 0;
+               offset = amdgpu_mes_ctx_get_offs(ring,
+                                        AMDGPU_MES_CTX_PADDING_OFFS);
+               gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+               cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+               *cpu_ptr = tmp;
+       } else {
+               r = amdgpu_device_wb_get(adev, &index);
+               if (r) {
+                       dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+                       return r;
+               }
+
+               gpu_addr = adev->wb.gpu_addr + (index * 4);
+               adev->wb.wb[index] = cpu_to_le32(tmp);
+       }
+
+       r = amdgpu_ring_alloc(ring, 20);
        if (r) {
                DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
                amdgpu_device_wb_free(adev, index);
        amdgpu_ring_commit(ring);
 
        for (i = 0; i < adev->usec_timeout; i++) {
-               tmp = le32_to_cpu(adev->wb.wb[index]);
+               if (ring->is_mes_queue)
+                       tmp = le32_to_cpu(*cpu_ptr);
+               else
+                       tmp = le32_to_cpu(adev->wb.wb[index]);
                if (tmp == 0xDEADBEEF)
                        break;
                if (amdgpu_emu_mode == 1)
        if (i >= adev->usec_timeout)
                r = -ETIMEDOUT;
 
-       amdgpu_device_wb_free(adev, index);
+       if (!ring->is_mes_queue)
+               amdgpu_device_wb_free(adev, index);
 
        return r;
 }