From 56316ee91bcefaf535dbe8ba601cf5f14051d09a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 26 Jan 2025 16:32:57 +0800 Subject: [PATCH] drm/amdgpu: Include ACA error type in aca bank ACA error types managed by driver a direct 1:1 correspondence with those managed by firmware. To address this, for each ACA bank, include both the ACA error type and the ACA SMU type. This addition is useful for creating CPER records. Signed-off-by: Hawking Zhang Reviewed-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 ++ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 ++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 ++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 ++ 9 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 9d6345146495..1a26b8ad14cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -168,7 +168,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ if (ret) return ret; - bank.type = type; + bank.smu_err_type = type; aca_smu_bank_dump(adev, i, count, &bank, qctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index f3289d289913..3cd0115b0244 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -108,13 +108,15 @@ enum aca_error_type { }; enum aca_smu_type { + ACA_SMU_TYPE_INVALID = -1, ACA_SMU_TYPE_UE = 0, ACA_SMU_TYPE_CE, ACA_SMU_TYPE_COUNT, }; struct aca_bank { - enum aca_smu_type type; + enum aca_error_type aca_err_type; + enum aca_smu_type smu_err_type; u64 regs[ACA_MAX_REGS_COUNT]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 74b4349e345a..c98b6b35cfdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1123,10 +1123,12 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban if (ext_error_code != 0 && ext_error_code != 9) count = 0ULL; + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); break; case ACA_SMU_TYPE_CE: count = ext_error_code == 6 ? count : 0ULL; + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index f4635fc8a7ca..f5b1d2edf805 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -891,10 +891,12 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 574083c7378e..c67ba961de91 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1291,10 +1291,12 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index ce013a715b86..58d22f0d5a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -746,10 +746,12 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 5e0066cd6c51..3dc0ffa81484 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -2392,10 +2392,12 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index a7b9c358a2d4..74f57b2d30a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -415,6 +415,7 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank err_type = ACA_ERROR_TYPE_CE; else return 0; + bank->aca_err_type = err_type; ret = aca_bank_info_decode(bank, &info); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f1f07badb9b7..75211366f8f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1925,10 +1925,12 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_ERROR_TYPE_CE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); break; -- 2.50.1