And also provide the level for which we need a PDE.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
        uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
                                     uint32_t flags);
        /* get the pde for a given mc addr */
-       u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr);
+       void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+                          u64 *dst, u64 *flags);
        uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr))
+#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 
                                 struct amdgpu_vm_pt *parent,
                                 struct amdgpu_vm_pt *entry)
 {
-       struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL;
+       struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
        uint64_t pd_addr, shadow_addr = 0;
-       uint64_t pde, pt;
+       uint64_t pde, pt, flags;
+       unsigned level;
 
        /* Don't update huge pages here */
        if (entry->huge)
                        shadow_addr = amdgpu_bo_gpu_offset(shadow);
        }
 
+       for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+               pbo = pbo->parent;
+
        pt = amdgpu_bo_gpu_offset(bo);
-       pt = amdgpu_gart_get_vm_pde(params->adev, pt);
+       flags = AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
        if (shadow) {
                pde = shadow_addr + (entry - parent->entries) * 8;
-               params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
+               params->func(params, pde, pt, 1, 0, flags);
        }
 
        pde = pd_addr + (entry - parent->entries) * 8;
-       params->func(params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
+       params->func(params, pde, pt, 1, 0, flags);
 }
 
 /*
            !(flags & AMDGPU_PTE_VALID)) {
 
                dst = amdgpu_bo_gpu_offset(entry->base.bo);
-               dst = amdgpu_gart_get_vm_pde(p->adev, dst);
                flags = AMDGPU_PTE_VALID;
        } else {
                /* Set the huge page flag to stop scanning at this PDE */
 
        if (!entry->huge && !(flags & AMDGPU_PDE_PTE))
                return;
-
        entry->huge = !!(flags & AMDGPU_PDE_PTE);
 
+       amdgpu_gart_get_vm_pde(p->adev, p->adev->vm_manager.num_level - 1,
+                              &dst, &flags);
+
        if (use_cpu_update) {
                /* In case a huge page is replaced with a system
                 * memory mapping, p->pages_addr != NULL and
 
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        gfx_v9_0_write_data_to_reg(ring, usepfp, true,
                                   hub->ctx0_ptb_addr_lo32 + (2 * vm_id),
 
        return pte_flag;
 }
 
-static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
+                               uint64_t *addr, uint64_t *flags)
 {
-       BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-       return addr;
+       BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
 
        return pte_flag;
 }
 
-static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
+                               uint64_t *addr, uint64_t *flags)
 {
-       BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-       return addr;
+       BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
 
        return pte_flag;
 }
 
-static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr)
+static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
+                               uint64_t *addr, uint64_t *flags)
 {
-       BUG_ON(addr & 0xFFFFFF0000000FFFULL);
-       return addr;
+       BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
 }
 
 /**
 
        return pte_flag;
 }
 
-static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr)
+static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
+                               uint64_t *addr, uint64_t *flags)
 {
-       addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start;
-       BUG_ON(addr & 0xFFFF00000000003FULL);
-       return addr;
+       if (!(*flags & AMDGPU_PDE_PTE))
+               *addr = adev->vm_manager.vram_base_offset + *addr -
+                       adev->mc.vram_start;
+       BUG_ON(*addr & 0xFFFF00000000003FULL);
 }
 
 static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
 
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
                          SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
 
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-       uint32_t data0, data1, mask;
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
+       uint32_t data0, data1, mask;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
        data1 = upper_32_bits(pd_addr);
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
 
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
        amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
 
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
-       uint32_t data0, data1, mask;
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
+       uint32_t data0, data1, mask;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
        data1 = upper_32_bits(pd_addr);
 {
        struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
        uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
+       uint64_t flags = AMDGPU_PTE_VALID;
        unsigned eng = ring->vm_inv_eng;
 
-       pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr);
-       pd_addr |= AMDGPU_PTE_VALID;
+       amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
+       pd_addr |= flags;
 
        amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
        amdgpu_ring_write(ring,