drm/amdgpu: Add init level for post reset reinit

author Lijo Lazar <lijo.lazar@amd.com>

Fri, 15 Nov 2024 05:38:02 +0000 (11:08 +0530)

committer Alex Deucher <alexander.deucher@amd.com>

Wed, 20 Nov 2024 15:03:05 +0000 (10:03 -0500)
author Lijo Lazar <lijo.lazar@amd.com>
Fri, 15 Nov 2024 05:38:02 +0000 (11:08 +0530)
committer Alex Deucher <alexander.deucher@amd.com>
Wed, 20 Nov 2024 15:03:05 +0000 (10:03 -0500)
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c

index 3a588fecb0c58362e149a556ea22ae95db3bc03d..f44de9d4b6a17f212bb9911bb3f33df69a349315 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -330,6 +330,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
         }
  
         list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
+               amdgpu_set_init_level(tmp_adev,
+                               AMDGPU_INIT_LEVEL_RESET_RECOVERY);
                 dev_info(tmp_adev->dev,
                          "GPU reset succeeded, trying to resume\n");
                 r = aldebaran_mode2_restore_ip(tmp_adev);
@@ -375,6 +377,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
                                                         tmp_adev);
  
                 if (!r) {
+                       amdgpu_set_init_level(tmp_adev,
+                                             AMDGPU_INIT_LEVEL_DEFAULT);
                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
  
                         r = amdgpu_ib_ring_tests(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index d8bc6da5001614e3add6999f8b53df2df42be6a3..4653a8d2823a6d6f645c620ba56caf0769e0d2b1 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -839,6 +839,7 @@ struct amdgpu_mqd {
  enum amdgpu_init_lvl_id {
         AMDGPU_INIT_LEVEL_DEFAULT,
         AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
+       AMDGPU_INIT_LEVEL_RESET_RECOVERY,
  };
  
  struct amdgpu_init_level {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 0171d240fcb05e37d9a8a037b4d94e9103a57a36..5ef95161e632c9874661234df5ba7a6340407d68 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = {
         .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
  };
  
+struct amdgpu_init_level amdgpu_init_recovery = {
+       .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
+       .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
+};
+
  /*
   * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
   * is used for cases like reset on initialization where the entire hive needs to
@@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
         case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
                 adev->init_lvl = &amdgpu_init_minimal_xgmi;
                 break;
+       case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
+               adev->init_lvl = &amdgpu_init_recovery;
+               break;
         case AMDGPU_INIT_LEVEL_DEFAULT:
                 fallthrough;
         default:
@@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
         struct list_head *device_list_handle;
         bool full_reset, vram_lost = false;
         struct amdgpu_device *tmp_adev;
-       int r;
+       int r, init_level;
  
         device_list_handle = reset_context->reset_device_list;
  
@@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
  
         full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
  
+       /**
+        * If it's reset on init, it's default init level, otherwise keep level
+        * as recovery level.
+        */
+       if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
+                       init_level = AMDGPU_INIT_LEVEL_DEFAULT;
+       else
+                       init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
+
         r = 0;
         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
-               /* After reset, it's default init level */
-               amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
+               amdgpu_set_init_level(tmp_adev, init_level);
                 if (full_reset) {
                         /* post card */
                         amdgpu_ras_set_fed(tmp_adev, false);
@@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
  
  out:
                 if (!r) {
+                       /* IP init is complete now, set level as default */
+                       amdgpu_set_init_level(tmp_adev,
+                                             AMDGPU_INIT_LEVEL_DEFAULT);
                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
                         r = amdgpu_ib_ring_tests(tmp_adev);
                         if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c

index 24dae7cdbe9547ed76b7bcd69ab2a87072235dd9..a0acb65f4b40afbcd9d1a305b4893aa66fadfde7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -342,3 +342,8 @@ void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
                 strscpy(buf, "unknown", len);
         }
  }
+
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev)
+{
+       return (adev->init_lvl->level == AMDGPU_INIT_LEVEL_RESET_RECOVERY);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h

index f8628bc898df45183c6b6bf3155a3c1c82f32d92..4d9b9701139be520c2cfcc94bf6b1a182130959a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -158,4 +158,6 @@ extern struct amdgpu_reset_handler xgmi_reset_on_init_handler;
  int amdgpu_reset_do_xgmi_reset_on_init(
         struct amdgpu_reset_context *reset_context);
  
+bool amdgpu_reset_in_recovery(struct amdgpu_device *adev);
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c

index 9b01e074af471e14790416eb56dce53edace03c1..2594467bdd8735dbea28648bfe3065614f757f65 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
+++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
@@ -220,6 +220,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
         int r;
         struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
  
+       amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
         dev_info(tmp_adev->dev,
                         "GPU reset succeeded, trying to resume\n");
         r = sienna_cichlid_mode2_restore_ip(tmp_adev);
@@ -237,6 +238,7 @@ sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
  
         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
  
+       amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
         r = amdgpu_ib_ring_tests(tmp_adev);
         if (r) {
                 dev_err(tmp_adev->dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c

index e70ebad3f9fac499406deffa23bd66761cd7d001..70569ea906bca7652b4047496c0414781ce67eb3 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -221,6 +221,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
         int r;
         struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
  
+       amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_RESET_RECOVERY);
         dev_info(tmp_adev->dev,
                         "GPU reset succeeded, trying to resume\n");
         r = smu_v13_0_10_mode2_restore_ip(tmp_adev);
@@ -234,6 +235,7 @@ smu_v13_0_10_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
  
         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
  
+       amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT);
         r = amdgpu_ib_ring_tests(tmp_adev);
         if (r) {
                 dev_err(tmp_adev->dev,
author	Lijo Lazar <lijo.lazar@amd.com>
	Fri, 15 Nov 2024 05:38:02 +0000 (11:08 +0530)
committer	Alex Deucher <alexander.deucher@amd.com>
	Wed, 20 Nov 2024 15:03:05 +0000 (10:03 -0500)
drivers/gpu/drm/amd/amdgpu/aldebaran.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c		patch \| blob \| history