]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/amdgpu: Create a debug option to disable ring reset
authorAndré Almeida <andrealmeid@igalia.com>
Wed, 26 Feb 2025 13:11:18 +0000 (10:11 -0300)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 27 Feb 2025 21:50:04 +0000 (16:50 -0500)
Prior to the addition of ring reset, the debug option
`debug_disable_soft_recovery` could be used to force a full device
reset. Now that we have ring reset, create a debug option to disable
them in amdgpu, forcing the driver to go with the full device
reset path again when both options are combined.

This option is useful for testing and debugging purposes when one wants
to test the full reset from userspace.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index 2b1990ea9639d429e5e9a8efaefc3436af053440..2a9a41f4e7485e2cccc4f182384f7deaeb476efa 100644 (file)
@@ -1192,6 +1192,7 @@ struct amdgpu_device {
        bool                            debug_use_vram_fw_buf;
        bool                            debug_enable_ras_aca;
        bool                            debug_exp_resets;
+       bool                            debug_disable_gpu_ring_reset;
 
        bool                            enforce_isolation[MAX_XCP];
        /* Added this mutex for cleaner shader isolation between GFX and compute processes */
index 1819166cb4cf425aea77df1f0f4032251485e01c..8c1aa9feda53d6181f13b1ace60c43d5925b5090 100644 (file)
@@ -137,6 +137,7 @@ enum AMDGPU_DEBUG_MASK {
        AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
        AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
        AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
+       AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2223,6 +2224,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
                pr_info("debug: enable experimental reset features\n");
                adev->debug_exp_resets = true;
        }
+
+       if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) {
+               pr_info("debug: ring reset disabled\n");
+               adev->debug_disable_gpu_ring_reset = true;
+       }
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
index c37bc683253a4c2a696ccb2c9035e175957b2a8b..5537c8bfd2270ab3b6ab91663e0c065b85b3b9e2 100644 (file)
@@ -131,8 +131,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
        }
 
        /* attempt a per ring reset */
-       if (amdgpu_gpu_recovery &&
-           ring->funcs->reset) {
+       if (unlikely(adev->debug_disable_gpu_ring_reset)) {
+               dev_err(adev->dev, "Ring reset disabled by debug mask\n");
+       } else if (amdgpu_gpu_recovery && ring->funcs->reset) {
                bool is_guilty;
 
                dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name);