struct ras_ih_if ih_info = {
                .cb = amdgpu_gfx_process_ras_data_cb,
        };
+       struct ras_query_if info = { 0 };
 
        if (!adev->gfx.ras_if) {
                adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
                strcpy(adev->gfx.ras_if->name, "gfx");
        }
        fs_info.head = ih_info.head = *adev->gfx.ras_if;
-
        r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
                                 &fs_info, &ih_info);
        if (r)
                goto free;
 
        if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+               if (adev->gmc.xgmi.connected_to_cpu) {
+                       info.head = *adev->gfx.ras_if;
+                       amdgpu_ras_query_error_status(adev, &info);
+               } else {
+                       amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+               }
+
                r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
                if (r)
                        goto late_fini;
 
        void (*reset_ras_error_count) (struct amdgpu_device *adev);
        void (*init_spm_golden)(struct amdgpu_device *adev);
        void (*query_ras_error_status) (struct amdgpu_device *adev);
+       void (*reset_ras_error_status) (struct amdgpu_device *adev);
        void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
        void (*enable_watchdog_timer)(struct amdgpu_device *adev);
-       void (*query_sq_timeout_status)(struct amdgpu_device *adev);
 };
 
 struct sq_work {
 
        ssize_t s;
        char val[128];
 
-       if (amdgpu_ras_error_query(obj->adev, &info))
+       if (amdgpu_ras_query_error_status(obj->adev, &info))
                return -EINVAL;
 
        s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
                return snprintf(buf, PAGE_SIZE,
                                "Query currently inaccessible\n");
 
-       if (amdgpu_ras_error_query(obj->adev, &info))
+       if (amdgpu_ras_query_error_status(obj->adev, &info))
                return -EINVAL;
 
        return snprintf(buf, PAGE_SIZE, "%s: %lu\n%s: %lu\n",
 /* feature ctl end */
 
 /* query/inject/cure begin */
-int amdgpu_ras_error_query(struct amdgpu_device *adev,
-               struct ras_query_if *info)
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
+       struct ras_query_if *info)
 {
        struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
        struct ras_err_data err_data = {0, 0, 0, NULL};
        case AMDGPU_RAS_BLOCK__GFX:
                if (adev->gfx.funcs->query_ras_error_count)
                        adev->gfx.funcs->query_ras_error_count(adev, &err_data);
+
+               if (adev->gfx.funcs->query_ras_error_status)
+                       adev->gfx.funcs->query_ras_error_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__MMHUB:
                if (adev->mmhub.funcs->query_ras_error_count)
                        adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
+
+               if (adev->mmhub.funcs->query_ras_error_status)
+                       adev->mmhub.funcs->query_ras_error_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__PCIE_BIF:
                if (adev->nbio.funcs->query_ras_error_count)
        return 0;
 }
 
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+               enum amdgpu_ras_block block)
+{
+       if (!amdgpu_ras_is_supported(adev, block))
+               return -EINVAL;
+
+       switch (block) {
+       case AMDGPU_RAS_BLOCK__GFX:
+               if (adev->gfx.funcs->reset_ras_error_count)
+                       adev->gfx.funcs->reset_ras_error_count(adev);
+
+               if (adev->gfx.funcs->reset_ras_error_status)
+                       adev->gfx.funcs->reset_ras_error_status(adev);
+               break;
+       case AMDGPU_RAS_BLOCK__MMHUB:
+               if (adev->mmhub.funcs->reset_ras_error_count)
+                       adev->mmhub.funcs->reset_ras_error_count(adev);
+               break;
+       case AMDGPU_RAS_BLOCK__SDMA:
+               if (adev->sdma.funcs->reset_ras_error_count)
+                       adev->sdma.funcs->reset_ras_error_count(adev);
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 /* Trigger XGMI/WAFL error */
 static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
                                 struct ta_ras_trigger_error_input *block_info)
                        .head = obj->head,
                };
 
-               if (amdgpu_ras_error_query(adev, &info))
+               if (amdgpu_ras_query_error_status(adev, &info))
                        return 0;
 
                data.ce_count += info.ce_count;
                if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
                        continue;
 
-               amdgpu_ras_error_query(adev, &info);
+               amdgpu_ras_query_error_status(adev, &info);
        }
 }
 
        case AMDGPU_RAS_BLOCK__GFX:
                if (adev->gfx.funcs->query_ras_error_status)
                        adev->gfx.funcs->query_ras_error_status(adev);
-
-               if (adev->gfx.funcs->query_sq_timeout_status)
-                       adev->gfx.funcs->query_sq_timeout_status(adev);
                break;
        case AMDGPU_RAS_BLOCK__MMHUB:
                if (adev->mmhub.funcs->query_ras_error_status)
 
 
 void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev);
 
-int amdgpu_ras_error_query(struct amdgpu_device *adev,
+int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
                struct ras_query_if *info);
 
+int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
+               enum amdgpu_ras_block block);
+
 int amdgpu_ras_error_inject(struct amdgpu_device *adev,
                struct ras_inject_if *info);
 
 
        .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
        .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count,
        .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status,
+       .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status,
        .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
-       .query_sq_timeout_status = &gfx_v9_4_2_query_sq_timeout_status,
 };
 
 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
        if (adev->asic_type == CHIP_ALDEBARAN)
                gfx_v9_4_2_set_power_brake_sequence(adev);
 
-       if (adev->gfx.funcs->enable_watchdog_timer)
-               adev->gfx.funcs->enable_watchdog_timer(adev);
-
        return r;
 }
 
        if (r)
                return r;
 
-       if (adev->gfx.funcs &&
-           adev->gfx.funcs->reset_ras_error_count)
-               adev->gfx.funcs->reset_ras_error_count(adev);
-
        r = amdgpu_gfx_ras_late_init(adev);
        if (r)
                return r;
 
+       if (adev->gfx.funcs->enable_watchdog_timer)
+               adev->gfx.funcs->enable_watchdog_timer(adev);
+
        return 0;
 }
 
 
        SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
 };
 
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
+
 void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
                                      uint32_t die_id)
 {
 
        gfx_v9_4_2_query_sram_edc_count(adev, NULL, NULL);
        gfx_v9_4_2_query_utc_edc_count(adev, NULL, NULL);
-       gfx_v9_4_2_reset_utc_err_status(adev);
-       gfx_v9_4_2_reset_ea_err_status(adev);
 }
 
 int gfx_v9_4_2_ras_error_inject(struct amdgpu_device *adev, void *inject_if)
                        if (reg_value)
                                dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
                                                j, reg_value);
+                       /* clear after read */
+                       WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
                }
        }
 
        uint32_t data;
 
        data = RREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS);
-       if (!data)
+       if (!data) {
                dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data);
+               WREG32_SOC15(GC, 0, regUTCL2_MEM_ECC_STATUS, 0x3);
+       }
 
        data = RREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS);
-       if (!data)
+       if (!data) {
                dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data);
+               WREG32_SOC15(GC, 0, regVML2_MEM_ECC_STATUS, 0x3);
+       }
 
        data = RREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS);
-       if (!data)
+       if (!data) {
                dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data);
+               WREG32_SOC15(GC, 0, regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+       }
 }
 
 void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev)
 
        gfx_v9_4_2_query_ea_err_status(adev);
        gfx_v9_4_2_query_utc_err_status(adev);
+       gfx_v9_4_2_query_sq_timeout_status(adev);
+}
+
+void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev)
+{
+       if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+               return;
+
+       gfx_v9_4_2_reset_utc_err_status(adev);
+       gfx_v9_4_2_reset_ea_err_status(adev);
+       gfx_v9_4_2_reset_sq_timeout_status(adev);
 }
 
 void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev)
        }
 }
 
-void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev)
+static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev)
 {
        uint32_t se_idx, sh_idx, cu_idx;
        uint32_t status;
        }
        gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
        mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
+{
+       uint32_t se_idx, sh_idx, cu_idx;
+
+       mutex_lock(&adev->grbm_idx_mutex);
+       for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines;
+            se_idx++) {
+               for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se;
+                    sh_idx++) {
+                       for (cu_idx = 0;
+                            cu_idx < adev->gfx.config.max_cu_per_sh;
+                            cu_idx++) {
+                               gfx_v9_4_2_select_se_sh(adev, se_idx, sh_idx,
+                                                       cu_idx);
+                               WREG32_SOC15(GC, 0, regSQ_TIMEOUT_STATUS, 0);
+                       }
+               }
+       }
+       gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       mutex_unlock(&adev->grbm_idx_mutex);
 }
\ No newline at end of file
 
 void gfx_v9_4_2_query_ras_error_status(struct amdgpu_device *adev);
 int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev,
                                   void *ras_error_status);
-
+void gfx_v9_4_2_reset_ras_error_status(struct amdgpu_device *adev);
 void gfx_v9_4_2_enable_watchdog_timer(struct amdgpu_device *adev);
-void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
 #endif /* __GFX_V9_4_2_H__ */