]> www.infradead.org Git - users/hch/misc.git/commitdiff
drm/amdgpu: Set dpc status appropriately
authorLijo Lazar <lijo.lazar@amd.com>
Thu, 24 Jul 2025 07:28:10 +0000 (12:58 +0530)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 6 Aug 2025 18:18:35 +0000 (14:18 -0400)
Set the dpc status based on hardware state. Also, clear the status before
reinitialization after a successful reset.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Ce Sun <cesun102@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7c7eecdd68d258589026f1616839e7317630fb3e..1055f5cda53c4be44b322211884a2888a0a65ff0 100644 (file)
@@ -5828,6 +5828,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
                amdgpu_set_init_level(tmp_adev, init_level);
                if (full_reset) {
                        /* post card */
+                       amdgpu_reset_set_dpc_status(tmp_adev, false);
                        amdgpu_ras_clear_err_state(tmp_adev);
                        r = amdgpu_device_asic_init(tmp_adev);
                        if (r) {
@@ -6883,11 +6884,6 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
 
        dev_info(adev->dev, "PCI error: detected callback!!\n");
 
-       if (!amdgpu_dpm_is_link_reset_supported(adev)) {
-               dev_warn(adev->dev, "No support for XGMI hive yet...\n");
-               return PCI_ERS_RESULT_DISCONNECT;
-       }
-
        adev->pci_channel_state = state;
 
        switch (state) {
@@ -6897,10 +6893,23 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
        case pci_channel_io_frozen:
                /* Fatal error, prepare for slot reset */
                dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
+               if (hive) {
+                       /* Hive devices should be able to support FW based
+                        * link reset on other devices, if not return.
+                        */
+                       if (!amdgpu_dpm_is_link_reset_supported(adev)) {
+                               dev_warn(adev->dev,
+                                        "No support for XGMI hive yet...\n");
+                               return PCI_ERS_RESULT_DISCONNECT;
+                       }
+                       /* Set dpc status only if device is part of hive
+                        * Non-hive devices should be able to recover after
+                        * link reset.
+                        */
+                       amdgpu_reset_set_dpc_status(adev, true);
 
-               if (hive)
                        mutex_lock(&hive->hive_lock);
-               amdgpu_reset_set_dpc_status(adev, true);
+               }
                memset(&reset_context, 0, sizeof(reset_context));
                INIT_LIST_HEAD(&device_list);
 
@@ -7063,7 +7072,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
        amdgpu_device_sched_resume(&device_list, NULL, NULL);
        amdgpu_device_gpu_resume(adev, &device_list, false);
        amdgpu_device_recovery_put_reset_lock(adev, &device_list);
-       amdgpu_reset_set_dpc_status(adev, false);
 
        if (hive) {
                mutex_unlock(&hive->hive_lock);