]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/amdgpu: store only one RAS bad page record for all pages in one row
authorTao Zhou <tao.zhou1@amd.com>
Fri, 18 Oct 2024 10:58:54 +0000 (18:58 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 10 Dec 2024 15:26:46 +0000 (10:26 -0500)
So eeprom space can be saved, compatible with legacy way.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 4df9a8dfe9eb359b90e420126f9bef30b6b3c7d7..882a33e134d8e3e46cb7c38aace7b3c46197fd60 100644 (file)
@@ -2849,7 +2849,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data *data;
        struct amdgpu_ras_eeprom_control *control;
-       int save_count;
+       int save_count, unit_num, bad_page_num, i;
 
        if (!con || !con->eh_data) {
                if (new_cnt)
@@ -2861,19 +2861,38 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev,
        mutex_lock(&con->recovery_lock);
        control = &con->eeprom_control;
        data = con->eh_data;
-       save_count = data->count - control->ras_num_recs;
+       bad_page_num = control->ras_num_recs;
+       /* one record on eeprom stands for all pages in one memory row
+        * in this mode
+        */
+       if (control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA)
+               bad_page_num = control->ras_num_recs * adev->umc.retire_unit;
+
+       save_count = data->count - bad_page_num;
        mutex_unlock(&con->recovery_lock);
 
+       unit_num = save_count / adev->umc.retire_unit;
        if (new_cnt)
-               *new_cnt = save_count / adev->umc.retire_unit;
+               *new_cnt = unit_num;
 
        /* only new entries are saved */
        if (save_count > 0) {
-               if (amdgpu_ras_eeprom_append(control,
-                                            &data->bps[control->ras_num_recs],
-                                            save_count)) {
-                       dev_err(adev->dev, "Failed to save EEPROM table data!");
-                       return -EIO;
+               if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) {
+                       if (amdgpu_ras_eeprom_append(control,
+                                                    &data->bps[control->ras_num_recs],
+                                                    save_count)) {
+                               dev_err(adev->dev, "Failed to save EEPROM table data!");
+                               return -EIO;
+                       }
+               } else {
+                       for (i = 0; i < unit_num; i++) {
+                               if (amdgpu_ras_eeprom_append(control,
+                                               &data->bps[bad_page_num + i * adev->umc.retire_unit],
+                                               1)) {
+                                       dev_err(adev->dev, "Failed to save EEPROM table data!");
+                                       return -EIO;
+                               }
+                       }
                }
 
                dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);