#include <drm/ttm/ttm_execbuf_util.h>
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"
+#include "amdgpu_xcp.h"
 
 extern uint64_t amdgpu_amdkfd_total_mem_size;
 
 
 struct amdgpu_kfd_dev {
        struct kfd_dev *dev;
-       int64_t vram_used;
-       uint64_t vram_used_aligned;
+       int64_t vram_used[MAX_XCP];
+       uint64_t vram_used_aligned[MAX_XCP];
        bool init_complete;
        struct work_struct reset_work;
 
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
                                        void *drm_priv);
 uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+                                       uint8_t xcp_id);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                struct amdgpu_device *adev, uint64_t va, uint64_t size,
                void *drm_priv, struct kgd_mem **mem,
 int amdgpu_amdkfd_criu_resume(void *p);
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag);
+               uint64_t size, u32 alloc_flag, int8_t xcp_id);
 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag);
+               uint64_t size, u32 alloc_flag, int8_t xcp_id);
 
 #define KFD_XCP_MEM_ID(adev, xcp_id) \
                ((adev)->xcp_mgr && (xcp_id) >= 0 ?\
 
  * Return: returns -ENOMEM in case of error, ZERO otherwise
  */
 int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
 {
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
        size_t system_mem_needed, ttm_mem_needed, vram_needed;
        int ret = 0;
+       uint64_t vram_size = 0;
 
        system_mem_needed = 0;
        ttm_mem_needed = 0;
                 * 2M BO chunk.
                 */
                vram_needed = size;
+               /*
+                * For GFX 9.4.3, get the VRAM size from XCP structs
+                */
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       return -EINVAL;
+
+               vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
+               if (adev->gmc.is_app_apu) {
+                       system_mem_needed = size;
+                       ttm_mem_needed = size;
+               }
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                system_mem_needed = size;
        } else if (!(alloc_flag &
             kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
            (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
             kfd_mem_limit.max_ttm_mem_limit) ||
-           (adev && adev->kfd.vram_used + vram_needed >
-            adev->gmc.real_vram_size - reserved_for_pt)) {
+           (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
+            vram_size - reserved_for_pt)) {
                ret = -ENOMEM;
                goto release;
        }
         */
        WARN_ONCE(vram_needed && !adev,
                  "adev reference can't be null when vram is used");
-       if (adev) {
-               adev->kfd.vram_used += vram_needed;
-               adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+       if (adev && xcp_id >= 0) {
+               adev->kfd.vram_used[xcp_id] += vram_needed;
+               adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
+                               vram_needed :
+                               ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
        }
        kfd_mem_limit.system_mem_used += system_mem_needed;
        kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 }
 
 void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
-               uint64_t size, u32 alloc_flag)
+               uint64_t size, u32 alloc_flag, int8_t xcp_id)
 {
        spin_lock(&kfd_mem_limit.mem_limit_lock);
 
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                WARN_ONCE(!adev,
                          "adev reference can't be null when alloc mem flags vram is set");
+               if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id))
+                       goto release;
+
                if (adev) {
-                       adev->kfd.vram_used -= size;
-                       adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       adev->kfd.vram_used[xcp_id] -= size;
+                       if (adev->gmc.is_app_apu) {
+                               adev->kfd.vram_used_aligned[xcp_id] -= size;
+                               kfd_mem_limit.system_mem_used -= size;
+                               kfd_mem_limit.ttm_mem_used -= size;
+                       } else {
+                               adev->kfd.vram_used_aligned[xcp_id] -=
+                                       ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+                       }
                }
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                kfd_mem_limit.system_mem_used -= size;
                pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
                goto release;
        }
-       WARN_ONCE(adev && adev->kfd.vram_used < 0,
-                 "KFD VRAM memory accounting unbalanced");
+       WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0,
+                 "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id);
        WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
                  "KFD TTM memory accounting unbalanced");
        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
        u32 alloc_flags = bo->kfd_bo->alloc_flags;
        u64 size = amdgpu_bo_size(bo);
 
-       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags,
+                                         bo->xcp_id);
 
        kfree(bo->kfd_bo);
 }
        return ret;
 }
 
-size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
+                                         uint8_t xcp_id)
 {
        uint64_t reserved_for_pt =
                ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
        ssize_t available;
+       uint64_t vram_available, system_mem_available, ttm_mem_available;
 
        spin_lock(&kfd_mem_limit.mem_limit_lock);
-       available = adev->gmc.real_vram_size
-               - adev->kfd.vram_used_aligned
+       vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
+               - adev->kfd.vram_used_aligned[xcp_id]
                - atomic64_read(&adev->vram_pin_size)
                - reserved_for_pt;
+
+       if (adev->gmc.is_app_apu) {
+               system_mem_available = no_system_mem_limit ?
+                                       kfd_mem_limit.max_system_mem_limit :
+                                       kfd_mem_limit.max_system_mem_limit -
+                                       kfd_mem_limit.system_mem_used;
+
+               ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit -
+                               kfd_mem_limit.ttm_mem_used;
+
+               available = min3(system_mem_available, ttm_mem_available,
+                                vram_available);
+               available = ALIGN_DOWN(available, PAGE_SIZE);
+       } else {
+               available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
+       }
+
        spin_unlock(&kfd_mem_limit.mem_limit_lock);
 
        if (available < 0)
                available = 0;
 
-       return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
+       return available;
 }
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
        amdgpu_sync_create(&(*mem)->sync);
 
-       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
+       ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags,
+                                             xcp_id);
        if (ret) {
                pr_debug("Insufficient memory\n");
                goto err_reserve_limit;
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
-       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
        if (gobj)
 
        if (update_mem_usage && !p->xnack_enabled) {
                pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
                amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
-                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
        }
        mutex_destroy(&prange->lock);
        mutex_destroy(&prange->migrate_mutex);
        p = container_of(svms, struct kfd_process, svms);
        if (!p->xnack_enabled && update_mem_usage &&
            amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
-                                           KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+                                   KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
                pr_info("SVM mapping failed, exceeds resident system memory limit\n");
                kfree(prange);
                return NULL;
                        size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
                        if (xnack_enabled) {
                                amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
-                                               KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
                        } else {
                                r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
-                                               KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
                                if (r)
                                        goto out_unlock;
                                reserved_size += size;
                size = (prange->last - prange->start + 1) << PAGE_SHIFT;
                if (xnack_enabled) {
                        amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
-                                               KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
                } else {
                        r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
-                                               KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
                        if (r)
                                goto out_unlock;
                        reserved_size += size;
 
        if (r)
                amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
-                                               KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
        else
                /* Change xnack mode must be inside svms lock, to avoid race with
                 * svm_range_deferred_list_work unreserve memory in parallel.