drm/amdgpu: implement TLB flush fence

author Christian Koenig <christian.koenig@amd.com>

Mon, 18 Mar 2024 10:43:39 +0000 (11:43 +0100)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 20 Mar 2024 17:38:14 +0000 (13:38 -0400)
author Christian Koenig <christian.koenig@amd.com>
Mon, 18 Mar 2024 10:43:39 +0000 (11:43 +0100)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 20 Mar 2024 17:38:14 +0000 (13:38 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile

index 4536c8ad0e11760aed5f74bc7f9ddf69c142319c..f24f11ac3e9241b01e2ed48167d60c4245960436 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
         amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
         atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
         atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-       amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+       amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+       amdgpu_ib.o amdgpu_pll.o \
         amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
         amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
         amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 81fb3465e19757fac0b7981dc83bfeacff47520f..317d2d468a6be8f443016d6ce6aca7dd3808983c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -885,6 +885,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
         kfree(tlb_cb);
  }
  
+/**
+ * amdgpu_vm_tlb_flush - prepare TLB flush
+ *
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+                   struct dma_fence **fence,
+                   struct amdgpu_vm_tlb_seq_struct *tlb_cb)
+{
+       struct amdgpu_vm *vm = params->vm;
+
+       if (!fence || !*fence)
+               return;
+
+       tlb_cb->vm = vm;
+       if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+                                   amdgpu_vm_tlb_seq_cb)) {
+               dma_fence_put(vm->last_tlb_flush);
+               vm->last_tlb_flush = dma_fence_get(*fence);
+       } else {
+               amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+       }
+
+       /* Prepare a TLB flush fence to be attached to PTs */
+       if (!params->unlocked && vm->is_compute_context) {
+               amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
+
+               /* Makes sure no PD/PT is freed before the flush */
+               dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+                                  DMA_RESV_USAGE_BOOKKEEP);
+       }
+}
+
  /**
   * amdgpu_vm_update_range - update a range in the vm page table
   *
@@ -916,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                            struct ttm_resource *res, dma_addr_t *pages_addr,
                            struct dma_fence **fence)
  {
-       struct amdgpu_vm_update_params params;
         struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+       struct amdgpu_vm_update_params params;
         struct amdgpu_res_cursor cursor;
         enum amdgpu_sync_mode sync_mode;
         int r, idx;
@@ -927,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  
         tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
         if (!tlb_cb) {
-               r = -ENOMEM;
-               goto error_unlock;
+               drm_dev_exit(idx);
+               return -ENOMEM;
         }
  
         /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
@@ -948,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         params.immediate = immediate;
         params.pages_addr = pages_addr;
         params.unlocked = unlocked;
+       params.needs_flush = flush_tlb;
         params.allow_override = allow_override;
  
         /* Implicitly sync to command submissions in the same VM before
@@ -1031,24 +1070,16 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
         }
  
         r = vm->update_funcs->commit(&params, fence);
+       if (r)
+               goto error_free;
  
-       if (flush_tlb || params.table_freed) {
-               tlb_cb->vm = vm;
-               if (fence && *fence &&
-                   !dma_fence_add_callback(*fence, &tlb_cb->cb,
-                                          amdgpu_vm_tlb_seq_cb)) {
-                       dma_fence_put(vm->last_tlb_flush);
-                       vm->last_tlb_flush = dma_fence_get(*fence);
-               } else {
-                       amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
-               }
+       if (params.needs_flush) {
+               amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
                 tlb_cb = NULL;
         }
  
  error_free:
         kfree(tlb_cb);
-
-error_unlock:
         amdgpu_vm_eviction_unlock(vm);
         drm_dev_exit(idx);
         return r;
@@ -2391,6 +2422,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  
         mutex_init(&vm->eviction_lock);
         vm->evicting = false;
+       vm->tlb_fence_context = dma_fence_context_alloc(1);
  
         r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
                                 false, &root, xcp_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 8efa8422f4f7147d89f042259cfbc7c13e0fcd7e..b0a4fe683352ffee8e073972ad373928ec997a26 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -257,9 +257,9 @@ struct amdgpu_vm_update_params {
         unsigned int num_dw_left;
  
         /**
-        * @table_freed: return true if page table is freed when updating
+        * @needs_flush: true whenever we need to invalidate the TLB
          */
-       bool table_freed;
+       bool needs_flush;
  
         /**
          * @allow_override: true for memory that is not uncached: allows MTYPE
@@ -342,6 +342,7 @@ struct amdgpu_vm {
         atomic64_t              tlb_seq;
         struct dma_fence        *last_tlb_flush;
         atomic64_t              kfd_last_flushed_seq;
+       uint64_t                tlb_fence_context;
  
         /* How many times we had to re-generate the page tables */
         uint64_t                generation;
@@ -611,5 +612,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
                                   uint64_t addr,
                                   uint32_t status,
                                   unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+                                struct amdgpu_vm *vm,
+                                struct dma_fence **fence);
  
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c

index 6e31621452de10d7c9340a1995cbd56d7924a32b..3895bd7d176a9d1547bdfe0f2019919b033a6cc5 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
  static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
                                 struct dma_fence **fence)
  {
-       /* Flush HDP */
+       if (p->needs_flush)
+               atomic64_inc(&p->vm->tlb_seq);
+
         mb();
         amdgpu_device_flush_hdp(p->adev, NULL);
         return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c

index 124389a6bf481a62e7aeb2bfe3c4c2d109901e81..601df0ce82905c03046afc1a4f1fc007ba5d2408 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -972,7 +972,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
                         while (cursor.pfn < frag_start) {
                                 /* Make sure previous mapping is freed */
                                 if (cursor.entry->bo) {
-                                       params->table_freed = true;
+                                       params->needs_flush = true;
                                         amdgpu_vm_pt_free_dfs(adev, params->vm,
                                                               &cursor,
                                                               params->unlocked);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c

index 349416e176a12757d77a503f87effe11b96dcf63..66e8a016126b87413188a4ee37e7153cc607443d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
  
         WARN_ON(ib->length_dw == 0);
         amdgpu_ring_pad_ib(ring, ib);
+
+       if (p->needs_flush)
+               atomic64_inc(&p->vm->tlb_seq);
+
         WARN_ON(ib->length_dw > p->num_dw_left);
         f = amdgpu_job_submit(p->job);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c

new file mode 100644 (file)

index 0000000..51cddfa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+       struct dma_fence        base;
+       struct amdgpu_device    *adev;
+       struct dma_fence        *dependency;
+       struct work_struct      work;
+       spinlock_t              lock;
+       uint16_t                pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+       return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+       return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+       struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+       int r;
+
+       if (f->dependency) {
+               dma_fence_wait(f->dependency, false);
+               dma_fence_put(f->dependency);
+               f->dependency = NULL;
+       }
+
+       r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+       if (r) {
+               dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+                       f->pasid);
+               dma_fence_set_error(&f->base, r);
+       }
+
+       dma_fence_signal(&f->base);
+       dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+       .use_64bit_seqno = true,
+       .get_driver_name = amdgpu_tlb_fence_get_driver_name,
+       .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                               struct dma_fence **fence)
+{
+       struct amdgpu_tlb_fence *f;
+
+       f = kmalloc(sizeof(*f), GFP_KERNEL);
+       if (!f) {
+               /*
+                * We can't fail since the PDEs and PTEs are already updated, so
+                * just block for the dependency and execute the TLB flush
+                */
+               if (*fence)
+                       dma_fence_wait(*fence, false);
+
+               amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+               *fence = dma_fence_get_stub();
+               return;
+       }
+
+       f->adev = adev;
+       f->dependency = *fence;
+       f->pasid = vm->pasid;
+       INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+       spin_lock_init(&f->lock);
+
+       dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+                      vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+       /* TODO: We probably need a separate wq here */
+       dma_fence_get(&f->base);
+       schedule_work(&f->work);
+
+       *fence = &f->base;
+}
author	Christian Koenig <christian.koenig@amd.com>
	Mon, 18 Mar 2024 10:43:39 +0000 (11:43 +0100)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 20 Mar 2024 17:38:14 +0000 (13:38 -0400)
drivers/gpu/drm/amd/amdgpu/Makefile		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c	[new file with mode: 0644]	patch \| blob