]> www.infradead.org Git - nvme.git/commitdiff
drm/amdgpu: Add EXT_COHERENT memory allocation flags
authorDavid Francis <David.Francis@amd.com>
Fri, 21 Jul 2023 15:14:24 +0000 (11:14 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 20 Sep 2023 20:24:06 +0000 (16:24 -0400)
These flags (for GEM and SVM allocations) allocate
memory that allows for system-scope atomic semantics.

On GFX943 these flags cause caches to be avoided on
non-local memory.

On all other ASICs they are identical in functionality to the
equivalent COHERENT flags.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

Reviewed-by: David Yat Sin <David.YatSin@amd.com>
Signed-off-by: David Francis <David.Francis@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
include/uapi/drm/amdgpu_drm.h
include/uapi/linux/kfd_ioctl.h

index 57189b8bc21480bb582a6db9dfd8e0fe9a5ec403..c48ca5adad8a5323d164007a391abe80fc62c800 100644 (file)
@@ -1691,6 +1691,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
                alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT)
+               alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT;
        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
                alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
 
index 12210598e5b8e64c8f88d50eaec69ff3f3161bac..76b618735dc09a88b0b4c57f3c3b512d8db4b089 100644 (file)
@@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 
                flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
                                         AMDGPU_GEM_CREATE_COHERENT |
+                                        AMDGPU_GEM_CREATE_EXT_COHERENT |
                                         AMDGPU_GEM_CREATE_UNCACHED);
        }
 
index d3da13f4c80e707d8b0618686cda8964bcd06a10..e582073b57c837ab8b8a72d7065a8f3bac6f47bd 100644 (file)
@@ -635,6 +635,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
        }
 
        if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                              AMDGPU_GEM_CREATE_EXT_COHERENT |
                               AMDGPU_GEM_CREATE_UNCACHED))
                *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
                         AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
index e1f47f9c18811e9f44fcd9b7bdb69018af5183ee..69f65e9c4f931509bc01fe1bc63b8c5dcf0443e1 100644 (file)
@@ -543,6 +543,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
        }
 
        if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                              AMDGPU_GEM_CREATE_EXT_COHERENT |
                               AMDGPU_GEM_CREATE_UNCACHED))
                *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
                         AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
index 268ee533e7c1d7e05025ed8d60710ae7aae39dd4..2936a0fb7527a94b78252fa1d2bde910813dd063 100644 (file)
@@ -1187,7 +1187,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
 {
        struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
        bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
-       bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+       bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT);
+       bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
        bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
        struct amdgpu_vm *vm = mapping->bo_va->base.vm;
        unsigned int mtype_local, mtype;
@@ -1257,6 +1258,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
                snoop = true;
                if (uncached) {
                        mtype = MTYPE_UC;
+               } else if (ext_coherent) {
+                       mtype = is_local ? MTYPE_CC : MTYPE_UC;
                } else if (adev->flags & AMD_IS_APU) {
                        mtype = is_local ? mtype_local : MTYPE_NC;
                } else {
index ae5660861133afc014a02cae5e1b0a8123a210e3..9a1952114e5b431bdfd350a08ffa78af727dc3a3 100644 (file)
@@ -1189,7 +1189,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
        uint32_t mapping_flags = 0;
        uint64_t pte_flags;
        bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
-       bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+       bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+       bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
        bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
        unsigned int mtype_local;
 
@@ -1237,6 +1238,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
                snoop = true;
                if (uncached) {
                        mapping_flags |= AMDGPU_VM_MTYPE_UC;
+               } else if (ext_coherent) {
+                       /* local HBM region close to partition */
+                       if (bo_node->adev == node->adev &&
+                           (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+                               mapping_flags |= AMDGPU_VM_MTYPE_CC;
+                       else
+                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
                } else if (domain == SVM_RANGE_VRAM_DOMAIN) {
                        /* local HBM region close to partition */
                        if (bo_node->adev == node->adev &&
index ec437120b083fe81a0b0961c9e6307ce2d2137ec..984fc16577ca520d33c762c46f71b3449b75bcc9 100644 (file)
@@ -150,7 +150,7 @@ extern "C" {
  */
 #define AMDGPU_GEM_CREATE_DISCARDABLE          (1 << 12)
 /* Flag that BO is shared coherently between multiple devices or CPU threads.
- * May depend on GPU instructions to flush caches explicitly
+ * May depend on GPU instructions to flush caches to system scope explicitly.
  *
  * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
  * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
@@ -163,6 +163,14 @@ extern "C" {
  * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
  */
 #define AMDGPU_GEM_CREATE_UNCACHED             (1 << 14)
+/* Flag that BO should be coherent across devices when using device-level
+ * atomics. May depend on GPU instructions to flush caches to device scope
+ * explicitly, promoting them to system scope automatically.
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_EXT_COHERENT         (1 << 15)
 
 struct drm_amdgpu_gem_create_in  {
        /** the requested memory size */
index eeb2fdcbdcb70843b9eee5065e985473b751726b..f0ed68974c54390f060328343c0789621835abb3 100644 (file)
@@ -405,6 +405,7 @@ struct kfd_ioctl_acquire_vm_args {
 #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM  (1 << 27)
 #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT       (1 << 26)
 #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED       (1 << 25)
+#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT   (1 << 24)
 
 /* Allocate memory for later SVM (shared virtual memory) mapping.
  *
@@ -659,6 +660,8 @@ enum kfd_mmio_remap {
 #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
 /* Keep GPU memory mapping always valid as if XNACK is disable */
 #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
+/* Fine grained coherency between all devices using device-scope atomics */
+#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT        0x00000080
 
 /**
  * kfd_ioctl_svm_op - SVM ioctl operations