]> www.infradead.org Git - linux.git/commitdiff
drm/amdgpu: Add enforce_isolation sysfs attribute
authorSrinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Mon, 27 May 2024 02:00:47 +0000 (07:30 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 21 Aug 2024 02:06:52 +0000 (22:06 -0400)
This commit adds a new sysfs attribute 'enforce_isolation' to control
the 'enforce_isolation' setting per GPU. The attribute can be read and
written, and accepts values 0 (disabled) and 1 (enabled).

When 'enforce_isolation' is enabled, reserved VMIDs are allocated for
each ring. When it's disabled, the reserved VMIDs are freed.

The set function locks a mutex before changing the 'enforce_isolation'
flag and the VMIDs, and unlocks it afterwards. This ensures that these
operations are atomic and prevents race conditions and other concurrency
issues.

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h

index 0dceeea235cf781ac0b10aeee7716dc087007c41..aa97bbefe934b2ec071844c9f377e9497f2f915c 100644 (file)
@@ -1164,6 +1164,8 @@ struct amdgpu_device {
        bool                            debug_enable_ras_aca;
 
        bool                            enforce_isolation[MAX_XCP];
+       /* Added this mutex for cleaner shader isolation between GFX and compute processes */
+       struct mutex                    enforce_isolation_mutex;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
index e623af740aa353a3502c336a9d4d8cc33fe87697..2f1bc02309fe9ce97497d71999baa2118cbdb035 100644 (file)
@@ -4065,6 +4065,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->pm.stable_pstate_ctx_lock);
        mutex_init(&adev->benchmark_mutex);
        mutex_init(&adev->gfx.reset_sem_mutex);
+       /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+       mutex_init(&adev->enforce_isolation_mutex);
 
        amdgpu_device_init_apu_flags(adev);
 
index 4ed69fcfe9c14f66411fcb4c0b2142ee0de2649c..2e35fc2577f9b7c0f4f7f7e7f379dfad21326786 100644 (file)
@@ -1391,6 +1391,88 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
        return sysfs_emit(buf, "%s\n", supported_partition);
 }
 
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+                                               struct device_attribute *attr,
+                                               char *buf)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+       int i;
+       ssize_t size = 0;
+
+       if (adev->xcp_mgr) {
+               for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+                       size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+                       if (i < (adev->xcp_mgr->num_xcps - 1))
+                               size += sysfs_emit_at(buf, size, " ");
+               }
+               buf[size++] = '\n';
+       } else {
+               size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+       }
+
+       return size;
+}
+
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+                                               struct device_attribute *attr,
+                                               const char *buf, size_t count)
+{
+       struct drm_device *ddev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(ddev);
+       long partition_values[MAX_XCP] = {0};
+       int ret, i, num_partitions;
+       const char *input_buf = buf;
+
+       for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+               ret = sscanf(input_buf, "%ld", &partition_values[i]);
+               if (ret <= 0)
+                       break;
+
+               /* Move the pointer to the next value in the string */
+               input_buf = strchr(input_buf, ' ');
+               if (input_buf) {
+                       input_buf++;
+               } else {
+                       i++;
+                       break;
+               }
+       }
+       num_partitions = i;
+
+       if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+               return -EINVAL;
+
+       if (!adev->xcp_mgr && num_partitions != 1)
+               return -EINVAL;
+
+       for (i = 0; i < num_partitions; i++) {
+               if (partition_values[i] != 0 && partition_values[i] != 1)
+                       return -EINVAL;
+       }
+
+       mutex_lock(&adev->enforce_isolation_mutex);
+
+       for (i = 0; i < num_partitions; i++) {
+               if (adev->enforce_isolation[i] && !partition_values[i]) {
+                       /* Going from enabled to disabled */
+                       amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
+               } else if (!adev->enforce_isolation[i] && partition_values[i]) {
+                       /* Going from disabled to enabled */
+                       amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+               }
+               adev->enforce_isolation[i] = partition_values[i];
+       }
+
+       mutex_unlock(&adev->enforce_isolation_mutex);
+
+       return count;
+}
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+                  amdgpu_gfx_get_enforce_isolation,
+                  amdgpu_gfx_set_enforce_isolation);
+
 static DEVICE_ATTR(current_compute_partition, 0644,
                   amdgpu_gfx_get_current_compute_partition,
                   amdgpu_gfx_set_compute_partition);
@@ -1417,6 +1499,25 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
        device_remove_file(adev->dev, &dev_attr_available_compute_partition);
 }
 
+int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+       int r;
+
+       if (!amdgpu_sriov_vf(adev)) {
+               r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+               if (r)
+                       return r;
+       }
+
+       return 0;
+}
+
+void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+       if (!amdgpu_sriov_vf(adev))
+               device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+}
+
 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
                                      unsigned int cleaner_shader_size)
 {
index 09379ef7388fac39db3d08c958293b1790e1a6ab..f7b37c340e3665bab43195e957c4843f9907a5e5 100644 (file)
@@ -561,6 +561,8 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
                                    unsigned int cleaner_shader_size,
                                    const void *cleaner_shader_ptr);
+int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev);
 
 static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
 {