]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: add interface to get die id from memory address
authorTao Zhou <tao.zhou1@amd.com>
Wed, 30 Oct 2024 08:42:42 +0000 (16:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Dec 2024 15:26:47 +0000 (10:26 -0500)
And implement it for UMC v12_0. The die id is calculated from IPID
register in bad page retirement flow, but we don't store it on eeprom
and it can be also gotten from physical address.

v2: get PA_C4 and PA_R13 from MCA address since they may be cleared in
retired page.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c

index f97c45b4eeb8e2ae0c26219d4e0e5922b59e1d14..a4a7e61817aa7c32578630e5043ecd51d8f09209 100644 (file)
@@ -91,6 +91,8 @@ struct amdgpu_umc_ras {
                        struct ta_ras_query_address_input *addr_in,
                        struct ta_ras_query_address_output *addr_out,
                        bool dump_addr);
+       uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
+                       uint64_t mca_addr, uint64_t retired_page);
 };
 
 struct amdgpu_umc_funcs {
index cce93b4ffb587e4e69cd2307b53510b414f0acc6..30ee4cb9aaabffca530e6af619ba815b68576eae 100644 (file)
@@ -619,6 +619,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
        mutex_unlock(&con->umc_ecc_log.lock);
 }
 
+static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
+               uint64_t mca_addr, uint64_t retired_page)
+{
+       uint32_t die = 0;
+
+       /* we only calculate die id for nps1 mode right now */
+       die += ((((retired_page >> 12) & 0x1ULL)^
+           ((retired_page >> 20) & 0x1ULL) ^
+           ((retired_page >> 27) & 0x1ULL) ^
+           ((retired_page >> 34) & 0x1ULL) ^
+           ((retired_page >> 41) & 0x1ULL)) << 0);
+
+       /* the original PA_C4 and PA_R13 may be cleared in retired_page, so
+        * get them from mca_addr.
+        */
+       die += ((((retired_page >> 13) & 0x1ULL) ^
+           ((mca_addr >> 5) & 0x1ULL) ^
+           ((retired_page >> 28) & 0x1ULL) ^
+           ((mca_addr >> 23) & 0x1ULL) ^
+           ((retired_page >> 42) & 0x1ULL)) << 1);
+       die &= 3;
+
+       return die;
+}
+
 struct amdgpu_umc_ras umc_v12_0_ras = {
        .ras_block = {
                .hw_ops = &umc_v12_0_ras_hw_ops,
@@ -630,5 +655,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
        .check_ecc_err_status = umc_v12_0_check_ecc_err_status,
        .update_ecc_status = umc_v12_0_update_ecc_status,
        .convert_ras_err_addr = umc_v12_0_convert_error_address,
+       .get_die_id_from_pa = umc_v12_0_get_die_id,
 };