drm/amdkfd: Accounting pdd vram_usage for svm

author Philip Yang <Philip.Yang@amd.com>

Fri, 4 Oct 2024 20:28:07 +0000 (16:28 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Tue, 15 Oct 2024 15:22:19 +0000 (11:22 -0400)
author Philip Yang <Philip.Yang@amd.com>
Fri, 4 Oct 2024 20:28:07 +0000 (16:28 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Tue, 15 Oct 2024 15:22:19 +0000 (11:22 -0400)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index a1f191a5984bf9b1eec30b9cce9a234ac1c7972b..065d8784145918f450bf42637c9df6569ff50394 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1148,7 +1148,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
  
                 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
                         size >>= 1;
-               WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+               atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage);
         }
  
         mutex_unlock(&p->mutex);
@@ -1219,7 +1219,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
                 kfd_process_device_remove_obj_handle(
                         pdd, GET_IDR_HANDLE(args->handle));
  
-       WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+       atomic64_sub(size, &pdd->vram_usage);
  
  err_unlock:
  err_pdd:
@@ -2347,7 +2347,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
         } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
                 bo_bucket->restored_offset = offset;
                 /* Update the VRAM usage count */
-               WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+               atomic64_add(bo_bucket->size, &pdd->vram_usage);
         }
         return 0;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 6a5bf88cc23277d9a39349bc032f517b422d5f88..9e5ca0b93b2a256f511ca07820f89724bf888bf0 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -775,7 +775,7 @@ struct kfd_process_device {
         enum kfd_pdd_bound bound;
  
         /* VRAM usage */
-       uint64_t vram_usage;
+       atomic64_t vram_usage;
         struct attribute attr_vram;
         char vram_filename[MAX_SYSFS_FILENAME_LEN];
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

index d665ecdcd12fcf42373dda0cfd6e6f1b495400bd..d4aa843aacfdd90634a87da4da69fc8045aeb3eb 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,7 +332,7 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
         } else if (strncmp(attr->name, "vram_", 5) == 0) {
                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
                                                               attr_vram);
-               return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+               return snprintf(buffer, PAGE_SIZE, "%llu\n", atomic64_read(&pdd->vram_usage));
         } else if (strncmp(attr->name, "sdma_", 5) == 0) {
                 struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
                                                               attr_sdma);
@@ -1625,7 +1625,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
         pdd->bound = PDD_UNBOUND;
         pdd->already_dequeued = false;
         pdd->runtime_inuse = false;
-       pdd->vram_usage = 0;
+       atomic64_set(&pdd->vram_usage, 0);
         pdd->sdma_past_activity_counter = 0;
         pdd->user_gpu_id = dev->id;
         atomic64_set(&pdd->evict_duration_counter, 0);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 857ec6f23bba1b9c03856c97d31f70bbb68f6a73..3e2911895c740dd53fcd8d2f49d17a818250589b 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -405,6 +405,27 @@ static void svm_range_bo_release(struct kref *kref)
                 spin_lock(&svm_bo->list_lock);
         }
         spin_unlock(&svm_bo->list_lock);
+
+       if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+               struct kfd_process_device *pdd;
+               struct kfd_process *p;
+               struct mm_struct *mm;
+
+               mm = svm_bo->eviction_fence->mm;
+               /*
+                * The forked child process takes svm_bo device pages ref, svm_bo could be
+                * released after parent process is gone.
+                */
+               p = kfd_lookup_process_by_mm(mm);
+               if (p) {
+                       pdd = kfd_get_process_device_data(svm_bo->node, p);
+                       if (pdd)
+                               atomic64_sub(amdgpu_bo_size(svm_bo->bo), &pdd->vram_usage);
+                       kfd_unref_process(p);
+               }
+               mmput(mm);
+       }
+
         if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
                 /* We're not in the eviction worker. Signal the fence. */
                 dma_fence_signal(&svm_bo->eviction_fence->base);
@@ -532,6 +553,7 @@ int
  svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
                         bool clear)
  {
+       struct kfd_process_device *pdd;
         struct amdgpu_bo_param bp;
         struct svm_range_bo *svm_bo;
         struct amdgpu_bo_user *ubo;
@@ -623,6 +645,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
         list_add(&prange->svm_bo_list, &svm_bo->range_list);
         spin_unlock(&svm_bo->list_lock);
  
+       pdd = svm_range_get_pdd_by_node(prange, node);
+       if (pdd)
+               atomic64_add(amdgpu_bo_size(bo), &pdd->vram_usage);
+
         return 0;
  
  reserve_bo_failed:
author	Philip Yang <Philip.Yang@amd.com>
	Fri, 4 Oct 2024 20:28:07 +0000 (16:28 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Tue, 15 Oct 2024 15:22:19 +0000 (11:22 -0400)
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_priv.h		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_process.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_svm.c		patch \| blob \| history