static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
-       struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
-       bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
-       bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
-       uint32_t mapping_flags;
-       uint64_t pte_flags;
-       bool snoop = false;
+       uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+                                AMDGPU_VM_MTYPE_DEFAULT;
 
-       mapping_flags = AMDGPU_VM_PAGE_READABLE;
        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
                mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
                mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
-       switch (adev->ip_versions[GC_HWIP][0]) {
-       case IP_VERSION(9, 4, 1):
-       case IP_VERSION(9, 4, 2):
-               if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-                       if (bo_adev == adev) {
-                               if (uncached)
-                                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
-                               else if (coherent)
-                                       mapping_flags |= AMDGPU_VM_MTYPE_CC;
-                               else
-                                       mapping_flags |= AMDGPU_VM_MTYPE_RW;
-                               if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
-                                   adev->gmc.xgmi.connected_to_cpu)
-                                       snoop = true;
-                       } else {
-                               if (uncached || coherent)
-                                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
-                               else
-                                       mapping_flags |= AMDGPU_VM_MTYPE_NC;
-                               if (amdgpu_xgmi_same_hive(adev, bo_adev))
-                                       snoop = true;
-                       }
-               } else {
-                       if (uncached || coherent)
-                               mapping_flags |= AMDGPU_VM_MTYPE_UC;
-                       else
-                               mapping_flags |= AMDGPU_VM_MTYPE_NC;
-                       snoop = true;
-               }
-               break;
-       default:
-               if (uncached || coherent)
-                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
-               else
-                       mapping_flags |= AMDGPU_VM_MTYPE_NC;
-
-               if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
-                       snoop = true;
-       }
-
-       pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
-       pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
-
-       return pte_flags;
+       return amdgpu_gem_va_map_flags(adev, mapping_flags);
 }
 
 /**
                }
        }
 
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+               alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+               alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
        *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
        if (!*mem) {
                ret = -ENOMEM;
 
        if (dma_buf->ops == &amdgpu_dmabuf_ops) {
                struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
 
-               flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+               flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+                                        AMDGPU_GEM_CREATE_COHERENT |
+                                        AMDGPU_GEM_CREATE_UNCACHED);
        }
 
        ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
 
                                 struct amdgpu_bo_va_mapping *mapping,
                                 uint64_t *flags)
 {
+       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
        *flags &= ~AMDGPU_PTE_EXECUTABLE;
        *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
 
                *flags |= AMDGPU_PTE_SYSTEM;
                *flags &= ~AMDGPU_PTE_VALID;
        }
+
+       if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                              AMDGPU_GEM_CREATE_UNCACHED))
+               *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+                        AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
 }
 
 static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
 
                                 struct amdgpu_bo_va_mapping *mapping,
                                 uint64_t *flags)
 {
+       struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
        *flags &= ~AMDGPU_PTE_EXECUTABLE;
        *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
 
                *flags |= AMDGPU_PTE_SYSTEM;
                *flags &= ~AMDGPU_PTE_VALID;
        }
+
+       if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+                              AMDGPU_GEM_CREATE_UNCACHED))
+               *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+                        AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
 }
 
 static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
 
        }
 }
 
+static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+                                        struct amdgpu_bo *bo,
+                                        struct amdgpu_bo_va_mapping *mapping,
+                                        uint64_t *flags)
+{
+       struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
+       bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+       bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+       unsigned int mtype;
+       bool snoop = false;
+
+       switch (adev->ip_versions[GC_HWIP][0]) {
+       case IP_VERSION(9, 4, 1):
+       case IP_VERSION(9, 4, 2):
+               if (is_vram) {
+                       if (bo_adev == adev) {
+                               if (uncached)
+                                       mtype = MTYPE_UC;
+                               else if (coherent)
+                                       mtype = MTYPE_CC;
+                               else
+                                       mtype = MTYPE_RW;
+                               /* FIXME: is this still needed? Or does
+                                * amdgpu_ttm_tt_pde_flags already handle this?
+                                */
+                               if (adev->ip_versions[GC_HWIP][0] ==
+                                       IP_VERSION(9, 4, 2) &&
+                                   adev->gmc.xgmi.connected_to_cpu)
+                                       snoop = true;
+                       } else {
+                               if (uncached || coherent)
+                                       mtype = MTYPE_UC;
+                               else
+                                       mtype = MTYPE_NC;
+                               if (mapping->bo_va->is_xgmi)
+                                       snoop = true;
+                       }
+               } else {
+                       if (uncached || coherent)
+                               mtype = MTYPE_UC;
+                       else
+                               mtype = MTYPE_NC;
+                       /* FIXME: is this still needed? Or does
+                        * amdgpu_ttm_tt_pde_flags already handle this?
+                        */
+                       snoop = true;
+               }
+               break;
+       default:
+               if (uncached || coherent)
+                       mtype = MTYPE_UC;
+               else
+                       mtype = MTYPE_NC;
+
+               /* FIXME: is this still needed? Or does
+                * amdgpu_ttm_tt_pde_flags already handle this?
+                */
+               if (!is_vram)
+                       snoop = true;
+       }
+
+       if (mtype != MTYPE_NC)
+               *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+                        AMDGPU_PTE_MTYPE_VG10(mtype);
+       *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+
 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
                                struct amdgpu_bo_va_mapping *mapping,
                                uint64_t *flags)
                *flags &= ~AMDGPU_PTE_VALID;
        }
 
-       if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
-            adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
-           !(*flags & AMDGPU_PTE_SYSTEM) &&
-           mapping->bo_va->is_xgmi)
-               *flags |= AMDGPU_PTE_SNOOPED;
-
-       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
-               *flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
+       if (mapping->bo_va->base.bo)
+               gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
+                                            mapping, flags);
 }
 
 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
 
  * content.
  */
 #define AMDGPU_GEM_CREATE_DISCARDABLE          (1 << 12)
+/* Flag that BO is shared coherently between multiple devices or CPU threads.
+ * May depend on GPU instructions to flush caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_COHERENT             (1 << 13)
+/* Flag that BO should not be cached by GPU. Coherent without having to flush
+ * GPU caches explicitly
+ *
+ * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
+ * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
+ */
+#define AMDGPU_GEM_CREATE_UNCACHED             (1 << 14)
 
 struct drm_amdgpu_gem_create_in  {
        /** the requested memory size */