From fb796c308767606bbd6c2e1bf4fa9446ec68ce51 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Tue, 27 Aug 2024 15:44:43 +0530
Subject: [PATCH 01/16] drm/amdgpu: add gfx eviction fence helpers

This patch adds basic eviction fence framework for the gfx buffers.
The idea is to:
- One eviction fence is created per gfx process, at kms_open.
- This fence is attached to all the gem buffers created
  by this process.
- This fence is detached to all the gem buffers at postclose_kms.

This framework will be further used for usermode queues.

V2: Addressed review comments from Christian
    - keep fence_ctx and fence_seq directly in fpriv
    - evcition_fence should be dynamically allocated
    - do not save eviction fence instance in BO, there could be many
      such fences attached to one BO
    - use dma_resv_replace_fence() in detach

V3: Addressed review comments from Christian
    - eviction fence create and destroy functions should be called
      only once from fpriv create/destroy
    - use dma_fence_put() in eviction_fence_destroy

V4: Addressed review comments from Christian:
    - create a separate ev_fence_mgr structure
    - cleanup fence init part
    - do not add a domain for fence owner KGD

V5: Addressed review comments from Christian:
    - drop the dma_fence_is_signaled check
    - use a local variable to access evf_mgr->ev_fence under the
      spin_lock() multiple places
    - remove the vm->is_compute_ctx check to attach gfx eviction fence,
      in gem_object_open

V6: Addressed review comments from Christian:
    - drop the return value from eviction_fence_signal
    - reserve_fence should be the first thing inside the
      attach_eviction_fence function, also keep the resv_add_fence inside
      the lock
    - remove the unwanted ev_fence check inside detach function
    - fix wrong variable check in eviction_fence_init function
    - return the error value of eviction_fence_init to the caller, dont
      keep it void.
    - fail gem_object_open if attaching of eviction_fence fails
    - detach the eviction fence only when amdgpu_vm_is_bo_always_valid
      is not true.

V7: Addressed review comments from Christian:
    - Do not add a uq_mgr ptr in ev_fence, rather add evf_mgr

V8: Move eviction fence enabling into separate patch for CI

Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |   6 +-
 .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 144 ++++++++++++++++++
 .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |  63 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |   5 +
 6 files changed, 219 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 1cc307904089..0b0c8ec46516 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
 	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
 	amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
-	amdgpu_cper.o amdgpu_userq_fence.o
+	amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index be10fbe293e4..8c6c3c5451e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -114,6 +114,7 @@
 #include "amdgpu_seq64.h"
 #include "amdgpu_reg_state.h"
 #include "amdgpu_userqueue.h"
+#include "amdgpu_eviction_fence.h"
 #if defined(CONFIG_DRM_AMD_ISP)
 #include "amdgpu_isp.h"
 #endif
@@ -490,7 +491,6 @@ struct amdgpu_flip_work {
 	bool				async;
 };
 
-
 /*
  * file private structure
  */
@@ -504,6 +504,10 @@ struct amdgpu_fpriv {
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
 	struct amdgpu_userq_mgr	userq_mgr;
+
+	/* Eviction fence infra */
+	struct amdgpu_eviction_fence_mgr evf_mgr;
+
 	/** GPU partition selection */
 	uint32_t		xcp_id;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
new file mode 100644
index 000000000000..056798e2b050
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/sched.h>
+#include "amdgpu.h"
+
+static const char *
+amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "amdgpu";
+}
+
+static const char *
+amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence *ef;
+
+	ef = container_of(f, struct amdgpu_eviction_fence, base);
+	return ef->timeline_name;
+}
+
+static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
+	.use_64bit_seqno = true,
+	.get_driver_name = amdgpu_eviction_fence_get_driver_name,
+	.get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+};
+
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	spin_lock(&evf_mgr->ev_fence_lock);
+	dma_fence_signal(&evf_mgr->ev_fence->base);
+	spin_unlock(&evf_mgr->ev_fence_lock);
+}
+
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
+	if (!ev_fence)
+		return NULL;
+
+	ev_fence->evf_mgr = evf_mgr;
+	get_task_comm(ev_fence->timeline_name, current);
+	spin_lock_init(&ev_fence->lock);
+	dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops,
+		       &ev_fence->lock, evf_mgr->ev_fence_ctx,
+		       atomic_inc_return(&evf_mgr->ev_fence_seq));
+	return ev_fence;
+}
+
+void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	if (!ev_fence)
+		return;
+
+	/* Last unref of ev_fence */
+	dma_fence_put(&evf_mgr->ev_fence->base);
+}
+
+int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				 struct amdgpu_bo *bo)
+{
+	struct dma_fence *ef;
+	struct amdgpu_eviction_fence *ev_fence;
+	struct dma_resv *resv = bo->tbo.base.resv;
+	int ret;
+
+	if (!resv)
+		return 0;
+
+	ret = dma_resv_reserve_fences(resv, 1);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to resv fence space\n");
+		return ret;
+	}
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	if (ev_fence) {
+		ef = dma_fence_get(&ev_fence->base);
+		dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP);
+	}
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	return 0;
+}
+
+void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_bo *bo)
+{
+	struct dma_fence *stub = dma_fence_get_stub();
+
+	dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
+				stub, DMA_RESV_USAGE_BOOKKEEP);
+	dma_fence_put(stub);
+}
+
+int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+	/* This needs to be done one time per open */
+	atomic_set(&evf_mgr->ev_fence_seq, 0);
+	evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
+	spin_lock_init(&evf_mgr->ev_fence_lock);
+
+	ev_fence = amdgpu_eviction_fence_create(evf_mgr);
+	if (!ev_fence) {
+		DRM_ERROR("Failed to craete eviction fence\n");
+		return -ENOMEM;
+	}
+
+	spin_lock(&evf_mgr->ev_fence_lock);
+	evf_mgr->ev_fence = ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
new file mode 100644
index 000000000000..aba12a43f433
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDGPU_EV_FENCE_H_
+#define AMDGPU_EV_FENCE_H_
+
+struct amdgpu_eviction_fence {
+	struct dma_fence base;
+	spinlock_t	 lock;
+	char		 timeline_name[TASK_COMM_LEN];
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+};
+
+struct amdgpu_eviction_fence_mgr {
+	u64			ev_fence_ctx;
+	atomic_t		ev_fence_seq;
+	spinlock_t		ev_fence_lock;
+	struct amdgpu_eviction_fence *ev_fence;
+};
+
+/* Eviction fence helper functions */
+struct amdgpu_eviction_fence *
+amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+int
+amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+void
+amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_bo *bo);
+
+int
+amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+void
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 06171eab6e92..682b76cad359 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -293,6 +293,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
 		bo_va = amdgpu_vm_bo_add(adev, vm, abo);
 	else
 		++bo_va->ref_count;
+
 	amdgpu_bo_unreserve(abo);
 
 	/* Validate and add eviction fence to DMABuf imports with dynamic
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 2e07776dd408..700442584f97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1413,6 +1413,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init_base(&fpriv->bo_list_handles, 1);
 
+	r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
+	if (r)
+		goto error_vm;
+
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
 
 	r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev);
@@ -1486,6 +1490,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		amdgpu_bo_unreserve(pd);
 	}
 
+	amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 	amdgpu_vm_fini(adev, &fpriv->vm);
 	amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
-- 
2.51.0


From 30e4d781385dda92fdee574b0a95094dfa143b52 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Mon, 3 Jun 2024 11:13:03 +0200
Subject: [PATCH 02/16] drm/amdgpu: add userqueue suspend/resume functions

This patch adds userqueue suspend/resume functions at
core MES V11 IP level.

V2: use true/false for queue_active status (Christian)
    added Christian's R-B

V3: reset/set queue status in mqd.create and mqd.destroy

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 33 +++++++++++++++++++
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |  5 +++
 2 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index fe4efe5ba6ac..ee0a757fcfa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -331,6 +331,7 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 		goto free_ctx;
 	}
 
+	queue->queue_active = true;
 	return 0;
 
 free_ctx:
@@ -354,9 +355,41 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
 	kfree(queue->userq_prop);
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
+	queue->queue_active = false;
+}
+
+static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr,
+				   struct amdgpu_usermode_queue *queue)
+{
+	if (queue->queue_active) {
+		mes_v11_0_userq_unmap(uq_mgr, queue);
+		queue->queue_active = false;
+	}
+
+	return 0;
+}
+
+static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr,
+				  struct amdgpu_usermode_queue *queue)
+{
+	int ret;
+
+	if (queue->queue_active)
+		return 0;
+
+	ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop);
+	if (ret) {
+		DRM_ERROR("Failed to resume queue\n");
+		return ret;
+	}
+
+	queue->queue_active = true;
+	return 0;
 }
 
 const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = {
 	.mqd_create = mes_v11_0_userq_mqd_create,
 	.mqd_destroy = mes_v11_0_userq_mqd_destroy,
+	.suspend = mes_v11_0_userq_suspend,
+	.resume = mes_v11_0_userq_resume,
 };
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index b942f3f5ea35..ede0178e0332 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -37,6 +37,7 @@ struct amdgpu_userq_obj {
 
 struct amdgpu_usermode_queue {
 	int			queue_type;
+	uint8_t			queue_active;
 	uint64_t		doorbell_handle;
 	uint64_t		doorbell_index;
 	uint64_t		flags;
@@ -57,6 +58,10 @@ struct amdgpu_userq_funcs {
 			  struct amdgpu_usermode_queue *queue);
 	void (*mqd_destroy)(struct amdgpu_userq_mgr *uq_mgr,
 			    struct amdgpu_usermode_queue *uq);
+	int (*suspend)(struct amdgpu_userq_mgr *uq_mgr,
+		       struct amdgpu_usermode_queue *queue);
+	int (*resume)(struct amdgpu_userq_mgr *uq_mgr,
+		      struct amdgpu_usermode_queue *queue);
 };
 
 /* Usermode queues for gfx */
-- 
2.51.0


From b0328087c179f47ea6558c3b91b4487c5e10deda Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Wed, 20 Nov 2024 18:59:49 +0100
Subject: [PATCH 03/16] drm/amdgpu: suspend gfx userqueues

This patch adds suspend support for gfx userqueues. It typically does
the following:
- adds an enable_signaling function for the eviction fence, so that it
  can trigger the userqueue suspend,
- adds a delayed work to handle suspending of the eviction_fence
- adds a suspend function to handle suspending of userqueues which
  suspends all the queues under this userq manager and signals the
  eviction fence,
- adds a function to replace the old eviction fence with a new one and
  attach it to each of the objects,
- adds reference of userq manager in the eviction fence container so
  that it can be used in the suspend function.

V2: Addressed Christian's review comments:
    - schedule suspend work immediately

V4: Addressed Christian's review comments:
    - wait for pending uq fences before starting suspend, added
      queue->last_fence for the same
    - accommodate ev_fence_mgr into existing code
    - some bug fixes and NULL checks

V5: Addressed Christian's review comments (gitlab)
    - Wait for eviction fence to get signaled in destroy,
      don't signal it
    - Wait for eviction fence to get signaled in replace fence,
      don't signal it

V6: Addressed Christian's review comments
    - Do not destroy the old eviction fence until we have it replaced
    - Change the sequence of fence replacement sub-tasks
    - reusing the ev_fence delayed work for userqueue suspend as well
      (Shashank).

V7: Addressed Christian's review comments
    - give evf_mgr as argument (instead of fpriv) to replace_fence()
    - save ptr to evf_mgr in ev_fence (instead of uq_mgr)
    - modify suspend_all_queues logic to reflect error properly
    - remove the garbage drm_exec_lock section in wait_for_signal
    - grab the userqueue mutex before starting the wait for fence
    - remove the unrelated gobj check from signal_ioctl

V8: Added race condition fixes

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 127 ++++++++++++++++++
 .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |   4 +
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   |  37 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 110 +++++++++++++++
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |   9 ++
 5 files changed, 276 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index 056798e2b050..189afb872775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -22,8 +22,12 @@
  *
  */
 #include <linux/sched.h>
+#include <drm/drm_exec.h>
 #include "amdgpu.h"
 
+#define work_to_evf_mgr(w, name) container_of(w, struct amdgpu_eviction_fence_mgr, name)
+#define evf_mgr_to_fpriv(e) container_of(e, struct amdgpu_fpriv, evf_mgr)
+
 static const char *
 amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
 {
@@ -39,10 +43,131 @@ amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
 	return ef->timeline_name;
 }
 
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec)
+{
+	struct amdgpu_eviction_fence *old_ef, *new_ef;
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	/*
+	 * Steps to replace eviction fence:
+	 * * lock all objects in exec (caller)
+	 * * create a new eviction fence
+	 * * update new eviction fence in evf_mgr
+	 * * attach the new eviction fence to BOs
+	 * * release the old fence
+	 * * unlock the objects (caller)
+	 */
+	new_ef = amdgpu_eviction_fence_create(evf_mgr);
+	if (!new_ef) {
+		DRM_ERROR("Failed to create new eviction fence\n");
+		return -ENOMEM;
+	}
+
+	/* Update the eviction fence now */
+	spin_lock(&evf_mgr->ev_fence_lock);
+	old_ef = evf_mgr->ev_fence;
+	evf_mgr->ev_fence = new_ef;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	/* Attach the new fence */
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+		if (!bo)
+			continue;
+		ret = amdgpu_eviction_fence_attach(evf_mgr, bo);
+		if (ret) {
+			DRM_ERROR("Failed to attch new eviction fence\n");
+			goto free_err;
+		}
+	}
+
+	/* Free old fence */
+	dma_fence_put(&old_ef->base);
+	return 0;
+
+free_err:
+	kfree(new_ef);
+	return ret;
+}
+
+static void
+amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
+	struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
+	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_va *bo_va;
+	struct drm_exec exec;
+	bool userq_active = amdgpu_userqueue_active(uq_mgr);
+	int ret;
+
+
+	/* For userqueues, the fence replacement happens in resume path */
+	if (userq_active) {
+		amdgpu_userqueue_suspend(uq_mgr);
+		return;
+	}
+
+	/* Signal old eviction fence */
+	amdgpu_eviction_fence_signal(evf_mgr);
+
+	/* Prepare the objects to replace eviction fence */
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret))
+			goto unlock_drm;
+
+		/* Lock the done list */
+		list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+			struct amdgpu_bo *bo = bo_va->base.bo;
+
+			if (!bo)
+				continue;
+
+			ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret))
+				goto unlock_drm;
+		}
+	}
+
+	/* Replace old eviction fence with new one */
+	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+	if (ret)
+		DRM_ERROR("Failed to replace eviction fence\n");
+
+unlock_drm:
+	drm_exec_fini(&exec);
+}
+
+static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
+{
+	struct amdgpu_eviction_fence_mgr *evf_mgr;
+	struct amdgpu_eviction_fence *ev_fence;
+
+	if (!f)
+		return true;
+
+	ev_fence = to_ev_fence(f);
+	evf_mgr = ev_fence->evf_mgr;
+
+	schedule_delayed_work(&evf_mgr->suspend_work, 0);
+	return true;
+}
+
 static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
 	.use_64bit_seqno = true,
 	.get_driver_name = amdgpu_eviction_fence_get_driver_name,
 	.get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
+	.enable_signaling = amdgpu_eviction_fence_enable_signaling,
 };
 
 void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr)
@@ -140,5 +265,7 @@ int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
 	spin_lock(&evf_mgr->ev_fence_lock);
 	evf_mgr->ev_fence = ev_fence;
 	spin_unlock(&evf_mgr->ev_fence_lock);
+
+	INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
index aba12a43f433..12f168ec3ef0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -37,6 +37,7 @@ struct amdgpu_eviction_fence_mgr {
 	atomic_t		ev_fence_seq;
 	spinlock_t		ev_fence_lock;
 	struct amdgpu_eviction_fence *ev_fence;
+	struct delayed_work	suspend_work;
 };
 
 /* Eviction fence helper functions */
@@ -60,4 +61,7 @@ amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
 void
 amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr);
 
+int
+amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				    struct drm_exec *exec);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 6157a540c929..877cb17a14e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -466,6 +466,16 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
+	/* Save the fence to wait for during suspend */
+	mutex_lock(&userq_mgr->userq_mutex);
+
+	/* Retrieve the user queue */
+	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
+	if (!queue) {
+		r = -ENOENT;
+		mutex_unlock(&userq_mgr->userq_mutex);
+	}
+
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
 		      (num_read_bo_handles + num_write_bo_handles));
 
@@ -473,30 +483,35 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
 		drm_exec_retry_on_contention(&exec);
-		if (r)
+		if (r) {
+			mutex_unlock(&userq_mgr->userq_mutex);
 			goto exec_fini;
+		}
 
 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
 		drm_exec_retry_on_contention(&exec);
-		if (r)
+		if (r) {
+			mutex_unlock(&userq_mgr->userq_mutex);
 			goto exec_fini;
-	}
-
-	/*Retrieve the user queue */
-	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
-	if (!queue) {
-		r = -ENOENT;
-		goto exec_fini;
+		}
 	}
 
 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
-	if (r)
+	if (r) {
+		mutex_unlock(&userq_mgr->userq_mutex);
 		goto exec_fini;
+	}
 
 	/* Create a new fence */
 	r = amdgpu_userq_fence_create(queue, wptr, &fence);
-	if (r)
+	if (r) {
+		mutex_unlock(&userq_mgr->userq_mutex);
 		goto exec_fini;
+	}
+
+	dma_fence_put(queue->last_fence);
+	queue->last_fence = dma_fence_get(fence);
+	mutex_unlock(&userq_mgr->userq_mutex);
 
 	for (i = 0; i < num_read_bo_handles; i++) {
 		if (!gobj_read || !gobj_read[i]->resv)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index e946c6d1dd6b..43fff8869ac9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -60,6 +60,16 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
 {
 	struct amdgpu_device *adev = uq_mgr->adev;
 	const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
+	struct dma_fence *f = queue->last_fence;
+	int ret;
+
+	if (f && !dma_fence_is_signaled(f)) {
+		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+		if (ret <= 0) {
+			DRM_ERROR("Timed out waiting for fence f=%p\n", f);
+			return;
+		}
+	}
 
 	uq_funcs->mqd_destroy(uq_mgr, queue);
 	amdgpu_userq_fence_driver_free(queue);
@@ -67,6 +77,22 @@ amdgpu_userqueue_cleanup(struct amdgpu_userq_mgr *uq_mgr,
 	kfree(queue);
 }
 
+int
+amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	/* Resume all the queues for this process */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
+		ret += queue->queue_active;
+
+	mutex_unlock(&uq_mgr->userq_mutex);
+	return ret;
+}
+
 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
 static struct amdgpu_usermode_queue *
 amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
@@ -202,6 +228,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
 	amdgpu_bo_unpin(queue->db_obj.obj);
 	amdgpu_bo_unref(&queue->db_obj.obj);
 	amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
+	uq_mgr->num_userqs--;
 	mutex_unlock(&uq_mgr->userq_mutex);
 	return 0;
 }
@@ -277,6 +304,7 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		goto unlock;
 	}
 	args->out.queue_id = qid;
+	uq_mgr->num_userqs++;
 
 unlock:
 	mutex_unlock(&uq_mgr->userq_mutex);
@@ -317,11 +345,93 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
 }
 #endif
 
+static int
+amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs;
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0;
+
+	userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX];
+
+	/* Try to suspend all the queues in this process ctx */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
+		ret += userq_funcs->suspend(uq_mgr, queue);
+
+	if (ret)
+		DRM_ERROR("Couldn't suspend all the queues\n");
+	return ret;
+}
+
+static int
+amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_usermode_queue *queue;
+	int queue_id, ret;
+
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
+		struct dma_fence *f = queue->last_fence;
+
+		if (!f || dma_fence_is_signaled(f))
+			continue;
+		ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
+		if (ret <= 0) {
+			DRM_ERROR("Timed out waiting for fence f=%p\n", f);
+			return -ETIMEDOUT;
+		}
+	}
+
+	return 0;
+}
+
+void
+amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr)
+{
+	int ret;
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	/* Wait for any pending userqueue fence to signal */
+	ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
+	if (ret) {
+		DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
+		goto unlock;
+	}
+
+	ret = amdgpu_userqueue_suspend_all(uq_mgr);
+	if (ret) {
+		DRM_ERROR("Failed to evict userqueue\n");
+		goto unlock;
+	}
+
+	/* Signal current eviction fence */
+	amdgpu_eviction_fence_signal(evf_mgr);
+
+	/* Cleanup old eviction fence entry */
+	amdgpu_eviction_fence_destroy(evf_mgr);
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev)
 {
+	struct amdgpu_fpriv *fpriv;
+
 	mutex_init(&userq_mgr->userq_mutex);
 	idr_init_base(&userq_mgr->userq_idr, 1);
 	userq_mgr->adev = adev;
+	userq_mgr->num_userqs = 0;
+
+	fpriv = uq_mgr_to_fpriv(userq_mgr);
+	if (!fpriv->evf_mgr.ev_fence) {
+		DRM_ERROR("Eviction fence not initialized yet\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index ede0178e0332..7781ee59653b 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -27,6 +27,9 @@
 
 #define AMDGPU_MAX_USERQ_COUNT 512
 
+#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
+#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+
 struct amdgpu_mqd_prop;
 
 struct amdgpu_userq_obj {
@@ -50,6 +53,7 @@ struct amdgpu_usermode_queue {
 	struct amdgpu_userq_obj wptr_obj;
 	struct xarray		fence_drv_xa;
 	struct amdgpu_userq_fence_driver *fence_drv;
+	struct dma_fence	*last_fence;
 };
 
 struct amdgpu_userq_funcs {
@@ -69,6 +73,7 @@ struct amdgpu_userq_mgr {
 	struct idr			userq_idr;
 	struct mutex			userq_mutex;
 	struct amdgpu_device		*adev;
+	int num_userqs;
 };
 
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
@@ -83,4 +88,8 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
 
 void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
 				     struct amdgpu_userq_obj *userq_obj);
+
+void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr);
+
+int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr);
 #endif
-- 
2.51.0


From 44cfdf368fb72c03e4137709803d58288a22cb06 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Wed, 20 Nov 2024 19:02:26 +0100
Subject: [PATCH 04/16] drm/amdgpu: resume gfx userqueues

This patch adds support for userqueue resume. What it typically does is
this:
- adds a new delayed work for resuming all the queues.
- schedules this delayed work from the suspend work.
- validates the BOs and replaces the eviction fence before resuming all
  the queues running under this instance of userq manager.

V2: Addressed Christian's review comments:
    - declare local variables like ret at the bottom.
    - lock all the object first, then start attaching the new fence.
    - dont replace old eviction fence, just attach new eviction fence.
    - no error logs for drm_exec_lock failures
    - no need to reserve bos after drm_exec_locked
    - schedule the resume worker immediately (not after 100 ms)
    - check for NULL BO (Arvind)

V5: Rebased wrt changes in suspend patch
    - moved amdgpu_userqueue_validate_vm_bo in this patch
    - initialized ret in resume_all

V6: Rebase
V7: Addressed review comments from Christian
    - Do not use list_for_each_safe() with vm->invalidated, its not
      correct way
V8: Fixed the race condition between suspend/close/fence

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Acked-by: Christian Koenig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 183 +++++++++++++++++-
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |   3 +-
 3 files changed, 181 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
index 12f168ec3ef0..787182bd1069 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -38,6 +38,7 @@ struct amdgpu_eviction_fence_mgr {
 	spinlock_t		ev_fence_lock;
 	struct amdgpu_eviction_fence *ev_fence;
 	struct delayed_work	suspend_work;
+	uint8_t fd_closing;
 };
 
 /* Eviction fence helper functions */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 43fff8869ac9..57e502d014a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include <drm/drm_exec.h>
 #include "amdgpu.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_userqueue.h"
@@ -216,6 +217,7 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
 	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
 	struct amdgpu_usermode_queue *queue;
 
+	cancel_delayed_work(&uq_mgr->resume_work);
 	mutex_lock(&uq_mgr->userq_mutex);
 
 	queue = amdgpu_userqueue_find(uq_mgr, queue_id);
@@ -228,7 +230,6 @@ amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
 	amdgpu_bo_unpin(queue->db_obj.obj);
 	amdgpu_bo_unref(&queue->db_obj.obj);
 	amdgpu_userqueue_cleanup(uq_mgr, queue, queue_id);
-	uq_mgr->num_userqs--;
 	mutex_unlock(&uq_mgr->userq_mutex);
 	return 0;
 }
@@ -304,10 +305,18 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		goto unlock;
 	}
 	args->out.queue_id = qid;
-	uq_mgr->num_userqs++;
 
 unlock:
 	mutex_unlock(&uq_mgr->userq_mutex);
+	if (!r) {
+		/*
+		* There could be a situation that we are creating a new queue while
+		* the other queues under this UQ_mgr are suspended. So if there is any
+		* resume work pending, wait for it to get done.
+		*/
+		flush_delayed_work(&uq_mgr->resume_work);
+	}
+
 	return r;
 }
 
@@ -345,6 +354,161 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
 }
 #endif
 
+static int
+amdgpu_userqueue_resume_all(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_device *adev = uq_mgr->adev;
+	const struct amdgpu_userq_funcs *userq_funcs;
+	struct amdgpu_usermode_queue *queue;
+	int queue_id;
+	int ret = 0;
+
+	userq_funcs = adev->userq_funcs[AMDGPU_HW_IP_GFX];
+
+	/* Resume all the queues for this process */
+	idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id)
+		ret = userq_funcs->resume(uq_mgr, queue);
+
+	if (ret)
+		DRM_ERROR("Failed to resume all the queue\n");
+	return ret;
+}
+
+static int
+amdgpu_userqueue_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	int ret;
+
+	amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		DRM_ERROR("Fail to validate\n");
+
+	return ret;
+}
+
+static int
+amdgpu_userqueue_validate_bos(struct amdgpu_userq_mgr *uq_mgr)
+{
+	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_device *adev = uq_mgr->adev;
+	struct amdgpu_bo_va *bo_va;
+	struct ww_acquire_ctx *ticket;
+	struct drm_exec exec;
+	struct amdgpu_bo *bo;
+	struct dma_resv *resv;
+	bool clear, unlock;
+	int ret = 0;
+
+	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+	drm_exec_until_all_locked(&exec) {
+		ret = amdgpu_vm_lock_pd(vm, &exec, 2);
+		drm_exec_retry_on_contention(&exec);
+		if (unlikely(ret)) {
+			DRM_ERROR("Failed to lock PD\n");
+			goto unlock_all;
+		}
+
+		/* Lock the done list */
+		list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+			bo = bo_va->base.bo;
+			if (!bo)
+				continue;
+
+			ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
+			drm_exec_retry_on_contention(&exec);
+			if (unlikely(ret))
+				goto unlock_all;
+		}
+	}
+
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->moved)) {
+		bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+					 base.vm_status);
+		spin_unlock(&vm->status_lock);
+
+		/* Per VM BOs never need to bo cleared in the page tables */
+		ret = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (ret)
+			goto unlock_all;
+		spin_lock(&vm->status_lock);
+	}
+
+	ticket = &exec.ticket;
+	while (!list_empty(&vm->invalidated)) {
+		bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+					 base.vm_status);
+		resv = bo_va->base.bo->tbo.base.resv;
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_va->base.bo;
+		ret = amdgpu_userqueue_validate_vm_bo(NULL, bo);
+		if (ret) {
+			DRM_ERROR("Failed to validate BO\n");
+			goto unlock_all;
+		}
+
+		/* Try to reserve the BO to avoid clearing its ptes */
+		if (!adev->debug_vm && dma_resv_trylock(resv)) {
+			clear = false;
+			unlock = true;
+		/* The caller is already holding the reservation lock */
+		} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
+			clear = false;
+			unlock = false;
+		/* Somebody else is using the BO right now */
+		} else {
+			clear = true;
+			unlock = false;
+		}
+
+		ret = amdgpu_vm_bo_update(adev, bo_va, clear);
+
+		if (unlock)
+			dma_resv_unlock(resv);
+		if (ret)
+			goto unlock_all;
+
+		spin_lock(&vm->status_lock);
+	}
+	spin_unlock(&vm->status_lock);
+
+	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
+	if (ret)
+		DRM_ERROR("Failed to replace eviction fence\n");
+
+unlock_all:
+	drm_exec_fini(&exec);
+	return ret;
+}
+
+static void amdgpu_userqueue_resume_worker(struct work_struct *work)
+{
+	struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
+	int ret;
+
+	mutex_lock(&uq_mgr->userq_mutex);
+
+	ret = amdgpu_userqueue_validate_bos(uq_mgr);
+	if (ret) {
+		DRM_ERROR("Failed to validate BOs to restore\n");
+		goto unlock;
+	}
+
+	ret = amdgpu_userqueue_resume_all(uq_mgr);
+	if (ret) {
+		DRM_ERROR("Failed to resume all queues\n");
+		goto unlock;
+	}
+
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
+}
+
 static int
 amdgpu_userqueue_suspend_all(struct amdgpu_userq_mgr *uq_mgr)
 {
@@ -393,6 +557,7 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr)
 	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
 	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
 
+
 	mutex_lock(&uq_mgr->userq_mutex);
 
 	/* Wait for any pending userqueue fence to signal */
@@ -411,8 +576,14 @@ amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr)
 	/* Signal current eviction fence */
 	amdgpu_eviction_fence_signal(evf_mgr);
 
-	/* Cleanup old eviction fence entry */
-	amdgpu_eviction_fence_destroy(evf_mgr);
+	if (evf_mgr->fd_closing) {
+		mutex_unlock(&uq_mgr->userq_mutex);
+		cancel_delayed_work(&uq_mgr->resume_work);
+		return;
+	}
+
+	/* Schedule a resume work */
+	schedule_delayed_work(&uq_mgr->resume_work, 0);
 
 unlock:
 	mutex_unlock(&uq_mgr->userq_mutex);
@@ -425,7 +596,6 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
 	mutex_init(&userq_mgr->userq_mutex);
 	idr_init_base(&userq_mgr->userq_idr, 1);
 	userq_mgr->adev = adev;
-	userq_mgr->num_userqs = 0;
 
 	fpriv = uq_mgr_to_fpriv(userq_mgr);
 	if (!fpriv->evf_mgr.ev_fence) {
@@ -433,6 +603,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
 		return -EINVAL;
 	}
 
+	INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker);
 	return 0;
 }
 
@@ -441,6 +612,8 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
 	uint32_t queue_id;
 	struct amdgpu_usermode_queue *queue;
 
+	cancel_delayed_work(&userq_mgr->resume_work);
+
 	idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id)
 		amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id);
 
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index 7781ee59653b..2bf28f3454cb 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -29,6 +29,7 @@
 
 #define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
 #define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
+#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
 
 struct amdgpu_mqd_prop;
 
@@ -73,7 +74,7 @@ struct amdgpu_userq_mgr {
 	struct idr			userq_idr;
 	struct mutex			userq_mutex;
 	struct amdgpu_device		*adev;
-	int num_userqs;
+	struct delayed_work		resume_work;
 };
 
 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
-- 
2.51.0


From b8e6d3f68c3bd1ac54492e210ece87475e7f862b Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Wed, 20 Nov 2024 18:04:33 +0100
Subject: [PATCH 05/16] drm/amdgpu: handle eviction fence race
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The eviction process can get into a race condition between the eviction
fence suspend work (which replaces the old fence with new) and kms_close
(which destroys the fence and doesn't expect a new one).

This patch:
- adds a flag to indicate that fd is closing, so fence replacement is
  not required (evf_mgr->fd_closing)
- adds a flush_work() during the ev_fence_destroy routine

V2: Addressed review comments from Christian:
    - Do not use mutex to sync
    - Use flush_work and wait for suspend_work to be done

V3: Fixed state machine for queue->active, which adds into race between
    suspend/resume and queue ops

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c            | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c      | 3 ++-
 drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c   | 9 +++++----
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index 189afb872775..c22767a75348 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -117,6 +117,10 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
 	/* Signal old eviction fence */
 	amdgpu_eviction_fence_signal(evf_mgr);
 
+	/* Do not replace eviction fence is fd is getting closed */
+	if (evf_mgr->fd_closing)
+		return;
+
 	/* Prepare the objects to replace eviction fence */
 	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
 	drm_exec_until_all_locked(&exec) {
@@ -199,6 +203,9 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
 {
 	struct amdgpu_eviction_fence *ev_fence;
 
+	/* Wait for any pending work to execute */
+	flush_delayed_work(&evf_mgr->suspend_work);
+
 	spin_lock(&evf_mgr->ev_fence_lock);
 	ev_fence = evf_mgr->ev_fence;
 	spin_unlock(&evf_mgr->ev_fence_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 700442584f97..f8370da08038 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1490,10 +1490,12 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		amdgpu_bo_unreserve(pd);
 	}
 
+	fpriv->evf_mgr.fd_closing = true;
+	amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
 	amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
+
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 	amdgpu_vm_fini(adev, &fpriv->vm);
-	amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
 
 	if (pasid)
 		amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 57e502d014a1..cba51bdf2e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -614,9 +614,10 @@ void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
 
 	cancel_delayed_work(&userq_mgr->resume_work);
 
+	mutex_lock(&userq_mgr->userq_mutex);
 	idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id)
 		amdgpu_userqueue_cleanup(userq_mgr, queue, queue_id);
-
 	idr_destroy(&userq_mgr->userq_idr);
+	mutex_unlock(&userq_mgr->userq_mutex);
 	mutex_destroy(&userq_mgr->userq_mutex);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index ee0a757fcfa3..b1b7bc47d39f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -139,6 +139,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
 		return r;
 	}
 
+	queue->queue_active = true;
 	DRM_DEBUG_DRIVER("Queue (doorbell:%d) mapped successfully\n", userq_props->doorbell_index);
 	return 0;
 }
@@ -160,6 +161,7 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
 	amdgpu_mes_unlock(&adev->mes);
 	if (r)
 		DRM_ERROR("Failed to unmap queue in HW, err (%d)\n", r);
+	queue->queue_active = false;
 }
 
 static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
@@ -331,7 +333,6 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 		goto free_ctx;
 	}
 
-	queue->queue_active = true;
 	return 0;
 
 free_ctx:
@@ -350,12 +351,12 @@ static void
 mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
 			    struct amdgpu_usermode_queue *queue)
 {
-	mes_v11_0_userq_unmap(uq_mgr, queue);
-	amdgpu_bo_unref(&queue->wptr_obj.obj);
+	if (queue->queue_active)
+		mes_v11_0_userq_unmap(uq_mgr, queue);
+
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
 	kfree(queue->userq_prop);
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
-	queue->queue_active = false;
 }
 
 static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr,
-- 
2.51.0


From 825f82cf936aea7f2d25d47efd27f90ba90e4ee2 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 18 Oct 2024 13:58:23 -0400
Subject: [PATCH 06/16] drm/amdgpu: add some additional members to
 amdgpu_mqd_prop

These are needed to make userqueue infrastructure generic.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8c6c3c5451e9..ea5a1e6f01c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -830,6 +830,9 @@ struct amdgpu_mqd_prop {
 	uint32_t hqd_queue_priority;
 	bool allow_tunneling;
 	bool hqd_active;
+	uint64_t shadow_addr;
+	uint64_t gds_bkup_addr;
+	uint64_t csa_addr;
 };
 
 struct amdgpu_mqd {
-- 
2.51.0


From 7179439e34bbeef28e1b5083c365e7295b07f9bd Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 18 Oct 2024 14:14:34 -0400
Subject: [PATCH 07/16] drm/amdgpu/gfx11: update mqd init for UQ

Set the addresses for the UQ metadata.

V2: Fix lower address (Shashank)
V3: Restore lower_32_bits() for MQD addresses (Alex)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 933c4ad5eff9..b8f75e1ba72c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4127,6 +4127,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	/* active the queue */
 	mqd->cp_gfx_hqd_active = 1;
 
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
+	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
-- 
2.51.0


From f2234816a31d0ec85ab63899762ec962ab682704 Mon Sep 17 00:00:00 2001
From: Amaranath Somalapuram <amaranath.somalapuram@amd.com>
Date: Wed, 27 Nov 2024 17:06:45 +0100
Subject: [PATCH 08/16] drm/amdgpu: fix IGT CI regression with eviction fence

This patch fixes one of the regressions in eviction fence code with
IGT tests.

Reviewed-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Amaranath Somalapuram <amaranath.somalapuram@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index c22767a75348..f7fb1674278c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -136,6 +136,9 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
 			if (!bo)
 				continue;
 
+			if (vm != bo_va->base.vm)
+				continue;
+
 			ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
 			drm_exec_retry_on_contention(&exec);
 			if (unlikely(ret))
-- 
2.51.0


From ab328d9a7b614ba8c6f63096eae817dd6e7f72f1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 18 Oct 2024 14:15:12 -0400
Subject: [PATCH 09/16] drm/amdgpu/gfx12: update mqd init for UQ

Set the addresses for the UQ metadata.

V2: Fix lower address mask (Shashank)
V3: Use lower_32_bits() for MQD objects (Alex)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 4f9a73bae332..8b409b4a1cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -3027,6 +3027,12 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	/* active the queue */
 	mqd->cp_gfx_hqd_active = 1;
 
+	/* set gfx UQ items */
+	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
+	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
+	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
+	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
-- 
2.51.0


From d07a7fcb8d25724351b7f9c03739b814da343b72 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 18 Oct 2024 14:15:31 -0400
Subject: [PATCH 10/16] drm/amdgpu/sdma6: update mqd init for UQ

Set the addresses for the UQ metadata.

V2: Fix lower address mask (Shashank)
V3: Use lower_32_bits for MQD objects (Alex)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index d4e1b2cd1f35..fe389379e890 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -892,6 +892,9 @@ static int sdma_v6_0_mqd_init(struct amdgpu_device *adev, void *mqd,
 	m->sdmax_rlcx_rb_aql_cntl = regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
 	m->sdmax_rlcx_dummy_reg = regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
 
+	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
-- 
2.51.0


From 21926b5db8c1d4e82b47bbd484b085a0e604bfd6 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 18 Oct 2024 14:15:51 -0400
Subject: [PATCH 11/16] drm/amdgpu/sdma7: update mqd init for UQ

Set the addresses for the UQ metadata.

V2: Fix lower offset mask (Shashank)
V2: Use lower_32_bits for mqd objects(Alex)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index b2706221df99..3a0f38fc003e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -935,6 +935,9 @@ static int sdma_v7_0_mqd_init(struct amdgpu_device *adev, void *mqd,
 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_QUEUE0_RB_AQL_CNTL_DEFAULT;
 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_QUEUE0_DUMMY_REG_DEFAULT;
 
+	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
+	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
+
 	return 0;
 }
 
-- 
2.51.0


From b965c5d87108add7941528569d7acdfabcd86b55 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 21 Oct 2024 15:16:12 +0200
Subject: [PATCH 12/16] drm/amdgpu/uq: remove gfx11 specifics from UQ setup

This can all be handled by in the IP specific mpd init
code.

V2: Removed setting of gds_va, which was removed during UAPI
    review (Shashank)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 83 ++++++++-----------
 1 file changed, 35 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
index b1b7bc47d39f..1ba6b91b03c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
@@ -23,8 +23,6 @@
  */
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
-#include "v11_structs.h"
-#include "mes_v11_0.h"
 #include "mes_v11_0_userqueue.h"
 #include "amdgpu_userq_fence.h"
 
@@ -183,52 +181,6 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
 		return r;
 	}
 
-	/* Shadow, GDS and CSA objects come directly from userspace */
-	if (mqd_user->ip_type == AMDGPU_HW_IP_GFX) {
-		struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr;
-		struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
-
-		if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
-			DRM_ERROR("Invalid GFX MQD\n");
-			return -EINVAL;
-		}
-
-		mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
-		if (IS_ERR(mqd_gfx_v11)) {
-			DRM_ERROR("Failed to read user MQD\n");
-			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
-			return -ENOMEM;
-		}
-
-		mqd->shadow_base_lo = mqd_gfx_v11->shadow_va & 0xFFFFFFFC;
-		mqd->shadow_base_hi = upper_32_bits(mqd_gfx_v11->shadow_va);
-
-		mqd->gds_bkup_base_lo = 0;
-		mqd->gds_bkup_base_hi = 0;
-
-		mqd->fw_work_area_base_lo = mqd_gfx_v11->csa_va & 0xFFFFFFFC;
-		mqd->fw_work_area_base_hi = upper_32_bits(mqd_gfx_v11->csa_va);
-		kfree(mqd_gfx_v11);
-	} else if (mqd_user->ip_type == AMDGPU_HW_IP_DMA) {
-		struct v11_sdma_mqd *mqd = queue->mqd.cpu_ptr;
-		struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
-
-		if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
-			DRM_ERROR("Invalid SDMA MQD\n");
-			return -EINVAL;
-		}
-
-		mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
-		if (IS_ERR(mqd_sdma_v11)) {
-			DRM_ERROR("Failed to read sdma user MQD\n");
-			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
-			return -ENOMEM;
-		}
-
-		mqd->sdmax_rlcx_csa_addr_lo = mqd_sdma_v11->csa_va & 0xFFFFFFFC;
-		mqd->sdmax_rlcx_csa_addr_hi = upper_32_bits(mqd_sdma_v11->csa_va);
-	}
-
 	return 0;
 }
 
@@ -300,6 +252,41 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 		userq_props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
 		userq_props->hqd_active = false;
 		kfree(compute_mqd);
+	} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
+		struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
+
+		if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
+			DRM_ERROR("Invalid GFX MQD\n");
+			return -EINVAL;
+		}
+
+		mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+		if (IS_ERR(mqd_gfx_v11)) {
+			DRM_ERROR("Failed to read user MQD\n");
+			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
+			return -ENOMEM;
+		}
+
+		userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
+		userq_props->csa_addr = mqd_gfx_v11->csa_va;
+		kfree(mqd_gfx_v11);
+	} else if (queue->queue_type == AMDGPU_HW_IP_DMA) {
+		struct drm_amdgpu_userq_mqd_sdma_gfx11 *mqd_sdma_v11;
+
+		if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
+			DRM_ERROR("Invalid SDMA MQD\n");
+			return -EINVAL;
+		}
+
+		mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
+		if (IS_ERR(mqd_sdma_v11)) {
+			DRM_ERROR("Failed to read sdma user MQD\n");
+			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
+			return -ENOMEM;
+		}
+
+		userq_props->csa_addr = mqd_sdma_v11->csa_va;
+		kfree(mqd_sdma_v11);
 	}
 
 	queue->userq_prop = userq_props;
-- 
2.51.0


From 79819d9a0ac38bf83ac712e00be2d53104895b84 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 26 Nov 2024 15:45:19 +0100
Subject: [PATCH 13/16] drm/amdgpu/uq: make MES UQ setup generic

Now that all of the IP specific code has been moved into
the IP specific functions, we can make this code generic.

V2: Fixed build errors and porting logics (Shashank)

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c        | 10 +--
 ...{mes_v11_0_userqueue.c => mes_userqueue.c} | 77 ++++++++++---------
 ...{mes_v11_0_userqueue.h => mes_userqueue.h} |  6 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c        |  4 +-
 5 files changed, 51 insertions(+), 48 deletions(-)
 rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.c => mes_userqueue.c} (84%)
 rename drivers/gpu/drm/amd/amdgpu/{mes_v11_0_userqueue.h => mes_userqueue.h} (91%)

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 0b0c8ec46516..513c4d64f554 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -177,7 +177,7 @@ amdgpu-y += \
 	mes_v12_0.o \
 
 # add GFX userqueue support
-amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_v11_0_userqueue.o
+amdgpu-$(CONFIG_DRM_AMDGPU_NAVI3X_USERQ) += mes_userqueue.o
 
 # add UVD block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index b8f75e1ba72c..63b7c7bfcc4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -48,7 +48,7 @@
 #include "gfx_v11_0_3.h"
 #include "nbio_v4_3.h"
 #include "mes_v11_0.h"
-#include "mes_v11_0_userqueue.h"
+#include "mes_userqueue.h"
 #include "amdgpu_userq_fence.h"
 
 #define GFX11_NUM_GFX_RINGS		1
@@ -1623,8 +1623,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 		adev->gfx.mec.num_pipe_per_mec = 4;
 		adev->gfx.mec.num_queue_per_pipe = 4;
 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
-		adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs;
-		adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs;
+		adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+		adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
 #endif
 		break;
 	case IP_VERSION(11, 0, 1):
@@ -1640,8 +1640,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
 		adev->gfx.mec.num_pipe_per_mec = 4;
 		adev->gfx.mec.num_queue_per_pipe = 4;
 #ifdef CONFIG_DRM_AMD_USERQ_GFX
-		adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_v11_0_funcs;
-		adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_v11_0_funcs;
+		adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+		adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
 #endif
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
similarity index 84%
rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 1ba6b91b03c4..9c2fc8ae0d56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -23,14 +23,15 @@
  */
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
-#include "mes_v11_0_userqueue.h"
+#include "mes_userqueue.h"
 #include "amdgpu_userq_fence.h"
+#include "v11_structs.h"
 
 #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
 #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
 
 static int
-mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
+mes_userq_map_gtt_bo_to_gart(struct amdgpu_bo *bo)
 {
 	int ret;
 
@@ -58,7 +59,7 @@ err_reserve_bo_failed:
 }
 
 static int
-mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
+mes_userq_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
 			      struct amdgpu_usermode_queue *queue,
 			      uint64_t wptr)
 {
@@ -86,7 +87,7 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
 		return -EINVAL;
 	}
 
-	ret = mes_v11_0_map_gtt_bo_to_gart(wptr_obj->obj);
+	ret = mes_userq_map_gtt_bo_to_gart(wptr_obj->obj);
 	if (ret) {
 		DRM_ERROR("Failed to map wptr bo to GART\n");
 		return ret;
@@ -96,9 +97,9 @@ mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
 	return 0;
 }
 
-static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
-			       struct amdgpu_usermode_queue *queue,
-			       struct amdgpu_mqd_prop *userq_props)
+static int mes_userq_map(struct amdgpu_userq_mgr *uq_mgr,
+			 struct amdgpu_usermode_queue *queue,
+			 struct amdgpu_mqd_prop *userq_props)
 {
 	struct amdgpu_device *adev = uq_mgr->adev;
 	struct amdgpu_userq_obj *ctx = &queue->fw_obj;
@@ -142,8 +143,8 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
 	return 0;
 }
 
-static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
-				  struct amdgpu_usermode_queue *queue)
+static void mes_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
+			    struct amdgpu_usermode_queue *queue)
 {
 	struct amdgpu_device *adev = uq_mgr->adev;
 	struct mes_remove_queue_input queue_input;
@@ -162,9 +163,9 @@ static void mes_v11_0_userq_unmap(struct amdgpu_userq_mgr *uq_mgr,
 	queue->queue_active = false;
 }
 
-static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
-					    struct amdgpu_usermode_queue *queue,
-					    struct drm_amdgpu_userq_in *mqd_user)
+static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+				      struct amdgpu_usermode_queue *queue,
+				      struct drm_amdgpu_userq_in *mqd_user)
 {
 	struct amdgpu_userq_obj *ctx = &queue->fw_obj;
 	int r, size;
@@ -184,7 +185,7 @@ static int mes_v11_0_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
 	return 0;
 }
 
-static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue)
+static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue)
 {
 	struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr;
 
@@ -192,9 +193,9 @@ static void mes_v11_0_userq_set_fence_space(struct amdgpu_usermode_queue *queue)
 	mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr);
 }
 
-static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
-				      struct drm_amdgpu_userq_in *args_in,
-				      struct amdgpu_usermode_queue *queue)
+static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
+				struct drm_amdgpu_userq_in *args_in,
+				struct amdgpu_usermode_queue *queue)
 {
 	struct amdgpu_device *adev = uq_mgr->adev;
 	struct amdgpu_mqd *mqd_hw_default = &adev->mqds[queue->queue_type];
@@ -257,14 +258,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 
 		if (mqd_user->mqd_size != sizeof(*mqd_gfx_v11) || !mqd_user->mqd) {
 			DRM_ERROR("Invalid GFX MQD\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto free_mqd;
 		}
 
 		mqd_gfx_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
 		if (IS_ERR(mqd_gfx_v11)) {
 			DRM_ERROR("Failed to read user MQD\n");
-			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
-			return -ENOMEM;
+			r = -ENOMEM;
+			goto free_mqd;
 		}
 
 		userq_props->shadow_addr = mqd_gfx_v11->shadow_va;
@@ -275,14 +277,15 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 
 		if (mqd_user->mqd_size != sizeof(*mqd_sdma_v11) || !mqd_user->mqd) {
 			DRM_ERROR("Invalid SDMA MQD\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto free_mqd;
 		}
 
 		mqd_sdma_v11 = memdup_user(u64_to_user_ptr(mqd_user->mqd), mqd_user->mqd_size);
 		if (IS_ERR(mqd_sdma_v11)) {
 			DRM_ERROR("Failed to read sdma user MQD\n");
-			amdgpu_userqueue_destroy_object(uq_mgr, ctx);
-			return -ENOMEM;
+			r = -ENOMEM;
+			goto free_mqd;
 		}
 
 		userq_props->csa_addr = mqd_sdma_v11->csa_va;
@@ -298,23 +301,23 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 	}
 
 	/* Create BO for FW operations */
-	r = mes_v11_0_userq_create_ctx_space(uq_mgr, queue, mqd_user);
+	r = mes_userq_create_ctx_space(uq_mgr, queue, mqd_user);
 	if (r) {
 		DRM_ERROR("Failed to allocate BO for userqueue (%d)", r);
 		goto free_mqd;
 	}
 
-	mes_v11_0_userq_set_fence_space(queue);
+	mes_userq_set_fence_space(queue);
 
 	/* FW expects WPTR BOs to be mapped into GART */
-	r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
+	r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
 	if (r) {
 		DRM_ERROR("Failed to create WPTR mapping\n");
 		goto free_ctx;
 	}
 
 	/* Map userqueue into FW using MES */
-	r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
+	r = mes_userq_map(uq_mgr, queue, userq_props);
 	if (r) {
 		DRM_ERROR("Failed to init MQD\n");
 		goto free_ctx;
@@ -335,29 +338,29 @@ free_props:
 }
 
 static void
-mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
+mes_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
 			    struct amdgpu_usermode_queue *queue)
 {
 	if (queue->queue_active)
-		mes_v11_0_userq_unmap(uq_mgr, queue);
+		mes_userq_unmap(uq_mgr, queue);
 
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
 	kfree(queue->userq_prop);
 	amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
 }
 
-static int mes_v11_0_userq_suspend(struct amdgpu_userq_mgr *uq_mgr,
+static int mes_userq_suspend(struct amdgpu_userq_mgr *uq_mgr,
 				   struct amdgpu_usermode_queue *queue)
 {
 	if (queue->queue_active) {
-		mes_v11_0_userq_unmap(uq_mgr, queue);
+		mes_userq_unmap(uq_mgr, queue);
 		queue->queue_active = false;
 	}
 
 	return 0;
 }
 
-static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr,
+static int mes_userq_resume(struct amdgpu_userq_mgr *uq_mgr,
 				  struct amdgpu_usermode_queue *queue)
 {
 	int ret;
@@ -365,7 +368,7 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr,
 	if (queue->queue_active)
 		return 0;
 
-	ret = mes_v11_0_userq_map(uq_mgr, queue, queue->userq_prop);
+	ret = mes_userq_map(uq_mgr, queue, queue->userq_prop);
 	if (ret) {
 		DRM_ERROR("Failed to resume queue\n");
 		return ret;
@@ -375,9 +378,9 @@ static int mes_v11_0_userq_resume(struct amdgpu_userq_mgr *uq_mgr,
 	return 0;
 }
 
-const struct amdgpu_userq_funcs userq_mes_v11_0_funcs = {
-	.mqd_create = mes_v11_0_userq_mqd_create,
-	.mqd_destroy = mes_v11_0_userq_mqd_destroy,
-	.suspend = mes_v11_0_userq_suspend,
-	.resume = mes_v11_0_userq_resume,
+const struct amdgpu_userq_funcs userq_mes_funcs = {
+	.mqd_create = mes_userq_mqd_create,
+	.mqd_destroy = mes_userq_mqd_destroy,
+	.suspend = mes_userq_suspend,
+	.resume = mes_userq_resume,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
similarity index 91%
rename from drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h
rename to drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
index 2c102361ca82..d0a521312ad4 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.h
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.h
@@ -22,9 +22,9 @@
  *
  */
 
-#ifndef MES_V11_0_USERQ_H
-#define MES_V11_0_USERQ_H
+#ifndef MES_USERQ_H
+#define MES_USERQ_H
 #include "amdgpu_userqueue.h"
 
-extern const struct amdgpu_userq_funcs userq_mes_v11_0_funcs;
+extern const struct amdgpu_userq_funcs userq_mes_funcs;
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index fe389379e890..9bc3c7a35d18 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -43,7 +43,7 @@
 #include "sdma_common.h"
 #include "sdma_v6_0.h"
 #include "v11_structs.h"
-#include "mes_v11_0_userqueue.h"
+#include "mes_userqueue.h"
 
 MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin");
 MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin");
@@ -1381,7 +1381,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
-	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_v11_0_funcs;
+	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
 #endif
 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
 	if (r)
-- 
2.51.0


From 988c9e704670a39ef2fd23f3eeb2d3b9c813dab1 Mon Sep 17 00:00:00 2001
From: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Date: Thu, 10 Oct 2024 20:08:06 +0200
Subject: [PATCH 14/16] drm/amdgpu: enable userqueue support for GFX12

This patch enables Usermode queue support across GFX, Compute
and SDMA IPs on GFX12/SDMA7. It typically reuses Navi3X userqueue
IP functions to create and destroy MQDs.

v2: rebase on proposed changes (Alex)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Arvind Yadav <arvind.yadav@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 5 +++++
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 8b409b4a1cb3..ee65f4057872 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -44,6 +44,7 @@
 #include "gfx_v12_0.h"
 #include "nbif_v6_3_1.h"
 #include "mes_v12_0.h"
+#include "mes_userqueue.h"
 
 #define GFX12_NUM_GFX_RINGS	1
 #define GFX12_MEC_HPD_SIZE	2048
@@ -1417,6 +1418,10 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
 		adev->gfx.mec.num_mec = 1;
 		adev->gfx.mec.num_pipe_per_mec = 2;
 		adev->gfx.mec.num_queue_per_pipe = 4;
+#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
+		adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
+		adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
+#endif
 		break;
 	default:
 		adev->gfx.me.num_me = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
index 3a0f38fc003e..3514089acd94 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
@@ -42,6 +42,7 @@
 #include "sdma_common.h"
 #include "sdma_v7_0.h"
 #include "v12_structs.h"
+#include "mes_userqueue.h"
 
 MODULE_FIRMWARE("amdgpu/sdma_7_0_0.bin");
 MODULE_FIRMWARE("amdgpu/sdma_7_0_1.bin");
@@ -1381,6 +1382,11 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
 	else
 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
 
+#ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
+	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
+#endif
+
+
 	return r;
 }
 
-- 
2.51.0


From dd5a376cd234c3fff79667598a1e06300cf75026 Mon Sep 17 00:00:00 2001
From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Date: Tue, 26 Nov 2024 15:51:08 +0100
Subject: [PATCH 15/16] drm/amdgpu: enable userqueue secure sem for GFX 12

- Add a field in struct amdgpu_mqd_prop for userqueue
  secure sem fence address since now we have a generic
  file for mes_userqueue.c
- Add secure sem fence address mqd support to gfx12 into
  their corresponding init functions.
- Enable secure semaphore IRQ handling

V2: Address review comment from Alex:
    Use fence_address instead of fenceaddress (Shashank)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c     |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c     | 27 +++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 11 +--------
 drivers/gpu/drm/amd/include/v11_structs.h  |  4 ++--
 drivers/gpu/drm/amd/include/v12_structs.h  |  4 ++--
 6 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ea5a1e6f01c3..3cdb5f8325aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -833,6 +833,7 @@ struct amdgpu_mqd_prop {
 	uint64_t shadow_addr;
 	uint64_t gds_bkup_addr;
 	uint64_t csa_addr;
+	uint64_t fence_address;
 };
 
 struct amdgpu_mqd {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 63b7c7bfcc4a..114284e65c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4134,6 +4134,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
 	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
 	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index ee65f4057872..1030f2985c7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -45,6 +45,7 @@
 #include "nbif_v6_3_1.h"
 #include "mes_v12_0.h"
 #include "mes_userqueue.h"
+#include "amdgpu_userq_fence.h"
 
 #define GFX12_NUM_GFX_RINGS	1
 #define GFX12_MEC_HPD_SIZE	2048
@@ -3037,6 +3038,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
 	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
 	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
 	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
+	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
+	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
 
 	return 0;
 }
@@ -4819,25 +4822,23 @@ static int gfx_v12_0_eop_irq(struct amdgpu_device *adev,
 			     struct amdgpu_irq_src *source,
 			     struct amdgpu_iv_entry *entry)
 {
-	int i;
+	u32 doorbell_offset = entry->src_data[0];
 	u8 me_id, pipe_id, queue_id;
 	struct amdgpu_ring *ring;
-	uint32_t mes_queue_id = entry->src_data[0];
+	int i;
 
 	DRM_DEBUG("IH: CP EOP\n");
 
-	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
-		struct amdgpu_mes_queue *queue;
+	if (adev->enable_mes && doorbell_offset) {
+		struct amdgpu_userq_fence_driver *fence_drv = NULL;
+		struct xarray *xa = &adev->userq_xa;
+		unsigned long flags;
 
-		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
-
-		spin_lock(&adev->mes.queue_id_lock);
-		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
-		if (queue) {
-			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
-			amdgpu_fence_process(queue->ring);
-		}
-		spin_unlock(&adev->mes.queue_id_lock);
+		xa_lock_irqsave(xa, flags);
+		fence_drv = xa_load(xa, doorbell_offset);
+		if (fence_drv)
+			amdgpu_userq_fence_driver_process(fence_drv);
+		xa_unlock_irqrestore(xa, flags);
 	} else {
 		me_id = (entry->ring_id & 0x0c) >> 2;
 		pipe_id = (entry->ring_id & 0x03) >> 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 9c2fc8ae0d56..1dde099382ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -185,14 +185,6 @@ static int mes_userq_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
 	return 0;
 }
 
-static void mes_userq_set_fence_space(struct amdgpu_usermode_queue *queue)
-{
-	struct v11_gfx_mqd *mqd = queue->mqd.cpu_ptr;
-
-	mqd->fenceaddress_lo = lower_32_bits(queue->fence_drv->gpu_addr);
-	mqd->fenceaddress_hi = upper_32_bits(queue->fence_drv->gpu_addr);
-}
-
 static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 				struct drm_amdgpu_userq_in *args_in,
 				struct amdgpu_usermode_queue *queue)
@@ -231,6 +223,7 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 	userq_props->mqd_gpu_addr = queue->mqd.gpu_addr;
 	userq_props->use_doorbell = true;
 	userq_props->doorbell_index = queue->doorbell_index;
+	userq_props->fence_address = queue->fence_drv->gpu_addr;
 
 	if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
 		struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
@@ -307,8 +300,6 @@ static int mes_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
 		goto free_mqd;
 	}
 
-	mes_userq_set_fence_space(queue);
-
 	/* FW expects WPTR BOs to be mapped into GART */
 	r = mes_userq_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
 	if (r) {
diff --git a/drivers/gpu/drm/amd/include/v11_structs.h b/drivers/gpu/drm/amd/include/v11_structs.h
index 797ce6a1e56e..f6d4dab849eb 100644
--- a/drivers/gpu/drm/amd/include/v11_structs.h
+++ b/drivers/gpu/drm/amd/include/v11_structs.h
@@ -535,8 +535,8 @@ struct v11_gfx_mqd {
 	uint32_t reserved_507; // offset: 507  (0x1FB)
 	uint32_t reserved_508; // offset: 508  (0x1FC)
 	uint32_t reserved_509; // offset: 509  (0x1FD)
-	uint32_t fenceaddress_lo; // offset: 510  (0x1FE)
-	uint32_t fenceaddress_hi; // offset: 511  (0x1FF)
+	uint32_t fence_address_lo; // offset: 510  (0x1FE)
+	uint32_t fence_address_hi; // offset: 511  (0x1FF)
 };
 
 struct v11_sdma_mqd {
diff --git a/drivers/gpu/drm/amd/include/v12_structs.h b/drivers/gpu/drm/amd/include/v12_structs.h
index 5eabab611b02..5787c8a51b7c 100644
--- a/drivers/gpu/drm/amd/include/v12_structs.h
+++ b/drivers/gpu/drm/amd/include/v12_structs.h
@@ -535,8 +535,8 @@ struct v12_gfx_mqd {
     uint32_t reserved_507; // offset: 507  (0x1FB)
     uint32_t reserved_508; // offset: 508  (0x1FC)
     uint32_t reserved_509; // offset: 509  (0x1FD)
-    uint32_t reserved_510; // offset: 510  (0x1FE)
-    uint32_t reserved_511; // offset: 511  (0x1FF)
+    uint32_t fence_address_lo; // offset: 510  (0x1FE)
+    uint32_t fence_address_hi; // offset: 511  (0x1FF)
 };
 
 struct v12_sdma_mqd {
-- 
2.51.0


From a242a3e4b5be22ffd85fb768d8c96df79b018136 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@amd.com>
Date: Wed, 11 Dec 2024 12:09:00 +0100
Subject: [PATCH 16/16] drm/amdgpu: simplify eviction fence suspend/resume
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The basic idea in this redesign is to add an eviction fence only in UQ
resume path. When userqueue is not present, keep ev_fence as NULL

Main changes are:
 - do not create the eviction fence during evf_mgr_init, keeping
   evf_mgr->ev_fence=NULL until UQ get active.
 - do not replace the ev_fence in evf_resume path, but replace it only in
   uq_resume path, so remove all the unnecessary code from ev_fence_resume.
 - add a new helper function (amdgpu_userqueue_ensure_ev_fence) which
   will do the following:
   - flush any pending uq_resume work, so that it could create an
     eviction_fence
   - if there is no pending uq_resume_work, add a uq_resume work and
     wait for it to execute so that we always have a valid ev_fence
 - call this helper function from two places, to ensure we have a valid
   ev_fence:
   - when a new uq is created
   - when a new uq completion fence is created

v2: Worked on review comments by Christian.
v3: Addressed few more review comments by Christian.
v4: Move mutex lock outside of the amdgpu_userqueue_suspend()
    function (Christian).
v5: squash in build fix (Alex)

Cc: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_eviction_fence.c    | 78 ++++---------------
 .../drm/amd/amdgpu/amdgpu_eviction_fence.h    |  3 +-
 .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 24 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 68 +++++++++-------
 .../gpu/drm/amd/include/amdgpu_userqueue.h    |  7 +-
 5 files changed, 69 insertions(+), 111 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
index f7fb1674278c..8358dc6b68ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
@@ -87,7 +87,8 @@ amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
 	}
 
 	/* Free old fence */
-	dma_fence_put(&old_ef->base);
+	if (old_ef)
+		dma_fence_put(&old_ef->base);
 	return 0;
 
 free_err:
@@ -101,58 +102,17 @@ amdgpu_eviction_fence_suspend_worker(struct work_struct *work)
 	struct amdgpu_eviction_fence_mgr *evf_mgr = work_to_evf_mgr(work, suspend_work.work);
 	struct amdgpu_fpriv *fpriv = evf_mgr_to_fpriv(evf_mgr);
 	struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
-	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_bo_va *bo_va;
-	struct drm_exec exec;
-	bool userq_active = amdgpu_userqueue_active(uq_mgr);
-	int ret;
-
-
-	/* For userqueues, the fence replacement happens in resume path */
-	if (userq_active) {
-		amdgpu_userqueue_suspend(uq_mgr);
-		return;
-	}
-
-	/* Signal old eviction fence */
-	amdgpu_eviction_fence_signal(evf_mgr);
-
-	/* Do not replace eviction fence is fd is getting closed */
-	if (evf_mgr->fd_closing)
-		return;
-
-	/* Prepare the objects to replace eviction fence */
-	drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
-	drm_exec_until_all_locked(&exec) {
-		ret = amdgpu_vm_lock_pd(vm, &exec, 2);
-		drm_exec_retry_on_contention(&exec);
-		if (unlikely(ret))
-			goto unlock_drm;
-
-		/* Lock the done list */
-		list_for_each_entry(bo_va, &vm->done, base.vm_status) {
-			struct amdgpu_bo *bo = bo_va->base.bo;
-
-			if (!bo)
-				continue;
-
-			if (vm != bo_va->base.vm)
-				continue;
+	struct amdgpu_eviction_fence *ev_fence;
 
-			ret = drm_exec_lock_obj(&exec, &bo->tbo.base);
-			drm_exec_retry_on_contention(&exec);
-			if (unlikely(ret))
-				goto unlock_drm;
-		}
-	}
+	mutex_lock(&uq_mgr->userq_mutex);
+	ev_fence = evf_mgr->ev_fence;
+	if (!ev_fence)
+		goto unlock;
 
-	/* Replace old eviction fence with new one */
-	ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
-	if (ret)
-		DRM_ERROR("Failed to replace eviction fence\n");
+	amdgpu_userqueue_suspend(uq_mgr, ev_fence);
 
-unlock_drm:
-	drm_exec_fini(&exec);
+unlock:
+	mutex_unlock(&uq_mgr->userq_mutex);
 }
 
 static bool amdgpu_eviction_fence_enable_signaling(struct dma_fence *f)
@@ -177,10 +137,11 @@ static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
 	.enable_signaling = amdgpu_eviction_fence_enable_signaling,
 };
 
-void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr)
+void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+				  struct amdgpu_eviction_fence *ev_fence)
 {
 	spin_lock(&evf_mgr->ev_fence_lock);
-	dma_fence_signal(&evf_mgr->ev_fence->base);
+	dma_fence_signal(&ev_fence->base);
 	spin_unlock(&evf_mgr->ev_fence_lock);
 }
 
@@ -244,6 +205,7 @@ int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
 		dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP);
 	}
 	spin_unlock(&evf_mgr->ev_fence_lock);
+
 	return 0;
 }
 
@@ -259,23 +221,11 @@ void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
 
 int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
 {
-	struct amdgpu_eviction_fence *ev_fence;
-
 	/* This needs to be done one time per open */
 	atomic_set(&evf_mgr->ev_fence_seq, 0);
 	evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
 	spin_lock_init(&evf_mgr->ev_fence_lock);
 
-	ev_fence = amdgpu_eviction_fence_create(evf_mgr);
-	if (!ev_fence) {
-		DRM_ERROR("Failed to craete eviction fence\n");
-		return -ENOMEM;
-	}
-
-	spin_lock(&evf_mgr->ev_fence_lock);
-	evf_mgr->ev_fence = ev_fence;
-	spin_unlock(&evf_mgr->ev_fence_lock);
-
 	INIT_DELAYED_WORK(&evf_mgr->suspend_work, amdgpu_eviction_fence_suspend_worker);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
index 787182bd1069..fcd867b7147d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
@@ -60,7 +60,8 @@ int
 amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
 
 void
-amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr);
+amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr,
+			     struct amdgpu_eviction_fence *ev_fence);
 
 int
 amdgpu_eviction_fence_replace_fence(struct amdgpu_eviction_fence_mgr *evf_mgr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
index 877cb17a14e9..20c36dc97c2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -466,14 +466,11 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	/* Save the fence to wait for during suspend */
-	mutex_lock(&userq_mgr->userq_mutex);
-
 	/* Retrieve the user queue */
 	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
 	if (!queue) {
 		r = -ENOENT;
-		mutex_unlock(&userq_mgr->userq_mutex);
+		goto put_gobj_write;
 	}
 
 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
@@ -483,31 +480,26 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
 	drm_exec_until_all_locked(&exec) {
 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
 		drm_exec_retry_on_contention(&exec);
-		if (r) {
-			mutex_unlock(&userq_mgr->userq_mutex);
+		if (r)
 			goto exec_fini;
-		}
 
 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
 		drm_exec_retry_on_contention(&exec);
-		if (r) {
-			mutex_unlock(&userq_mgr->userq_mutex);
+		if (r)
 			goto exec_fini;
-		}
 	}
 
 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
-	if (r) {
-		mutex_unlock(&userq_mgr->userq_mutex);
+	if (r)
 		goto exec_fini;
-	}
 
 	/* Create a new fence */
 	r = amdgpu_userq_fence_create(queue, wptr, &fence);
-	if (r) {
-		mutex_unlock(&userq_mgr->userq_mutex);
+	if (r)
 		goto exec_fini;
-	}
+
+	/* We are here means UQ is active, make sure the eviction fence is valid */
+	amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
 
 	dma_fence_put(queue->last_fence);
 	queue->last_fence = dma_fence_get(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index cba51bdf2e2c..c11fcdd604fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -101,6 +101,31 @@ amdgpu_userqueue_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
 	return idr_find(&uq_mgr->userq_idr, qid);
 }
 
+void
+amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
+				 struct amdgpu_eviction_fence_mgr *evf_mgr)
+{
+	struct amdgpu_eviction_fence *ev_fence;
+
+retry:
+	/* Flush any pending resume work to create ev_fence */
+	flush_delayed_work(&uq_mgr->resume_work);
+
+	mutex_lock(&uq_mgr->userq_mutex);
+	spin_lock(&evf_mgr->ev_fence_lock);
+	ev_fence = evf_mgr->ev_fence;
+	spin_unlock(&evf_mgr->ev_fence_lock);
+	if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
+		mutex_unlock(&uq_mgr->userq_mutex);
+		/*
+		 * Looks like there was no pending resume work,
+		 * add one now to create a valid eviction fence
+		 */
+		schedule_delayed_work(&uq_mgr->resume_work, 0);
+		goto retry;
+	}
+}
+
 int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
 				   struct amdgpu_userq_obj *userq_obj,
 				   int size)
@@ -253,7 +278,14 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 		return -EINVAL;
 	}
 
-	mutex_lock(&uq_mgr->userq_mutex);
+	/*
+	 * There could be a situation that we are creating a new queue while
+	 * the other queues under this UQ_mgr are suspended. So if there is any
+	 * resume work pending, wait for it to get done.
+	 *
+	 * This will also make sure we have a valid eviction fence ready to be used.
+	 */
+	amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
 
 	uq_funcs = adev->userq_funcs[args->in.ip_type];
 	if (!uq_funcs) {
@@ -308,14 +340,6 @@ amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq *args)
 
 unlock:
 	mutex_unlock(&uq_mgr->userq_mutex);
-	if (!r) {
-		/*
-		* There could be a situation that we are creating a new queue while
-		* the other queues under this UQ_mgr are suspended. So if there is any
-		* resume work pending, wait for it to get done.
-		*/
-		flush_delayed_work(&uq_mgr->resume_work);
-	}
 
 	return r;
 }
@@ -551,58 +575,44 @@ amdgpu_userqueue_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
 }
 
 void
-amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr)
+amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr,
+			 struct amdgpu_eviction_fence *ev_fence)
 {
 	int ret;
 	struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
 	struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
 
-
-	mutex_lock(&uq_mgr->userq_mutex);
-
-	/* Wait for any pending userqueue fence to signal */
+	/* Wait for any pending userqueue fence work to finish */
 	ret = amdgpu_userqueue_wait_for_signal(uq_mgr);
 	if (ret) {
 		DRM_ERROR("Not suspending userqueue, timeout waiting for work\n");
-		goto unlock;
+		return;
 	}
 
 	ret = amdgpu_userqueue_suspend_all(uq_mgr);
 	if (ret) {
 		DRM_ERROR("Failed to evict userqueue\n");
-		goto unlock;
+		return;
 	}
 
 	/* Signal current eviction fence */
-	amdgpu_eviction_fence_signal(evf_mgr);
+	amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
 
 	if (evf_mgr->fd_closing) {
-		mutex_unlock(&uq_mgr->userq_mutex);
 		cancel_delayed_work(&uq_mgr->resume_work);
 		return;
 	}
 
 	/* Schedule a resume work */
 	schedule_delayed_work(&uq_mgr->resume_work, 0);
-
-unlock:
-	mutex_unlock(&uq_mgr->userq_mutex);
 }
 
 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev)
 {
-	struct amdgpu_fpriv *fpriv;
-
 	mutex_init(&userq_mgr->userq_mutex);
 	idr_init_base(&userq_mgr->userq_idr, 1);
 	userq_mgr->adev = adev;
 
-	fpriv = uq_mgr_to_fpriv(userq_mgr);
-	if (!fpriv->evf_mgr.ev_fence) {
-		DRM_ERROR("Eviction fence not initialized yet\n");
-		return -EINVAL;
-	}
-
 	INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userqueue_resume_worker);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index 2bf28f3454cb..e7e8d79b689d 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -24,6 +24,7 @@
 
 #ifndef AMDGPU_USERQUEUE_H_
 #define AMDGPU_USERQUEUE_H_
+#include "amdgpu_eviction_fence.h"
 
 #define AMDGPU_MAX_USERQ_COUNT 512
 
@@ -90,7 +91,11 @@ int amdgpu_userqueue_create_object(struct amdgpu_userq_mgr *uq_mgr,
 void amdgpu_userqueue_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
 				     struct amdgpu_userq_obj *userq_obj);
 
-void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr);
+void amdgpu_userqueue_suspend(struct amdgpu_userq_mgr *uq_mgr,
+			      struct amdgpu_eviction_fence *ev_fence);
 
 int amdgpu_userqueue_active(struct amdgpu_userq_mgr *uq_mgr);
+
+void amdgpu_userqueue_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
+				      struct amdgpu_eviction_fence_mgr *evf_mgr);
 #endif
-- 
2.51.0