]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
md/raid10: Do not add spare disk when recovery fails
authorLi Nan <linan122@huawei.com>
Fri, 2 Jun 2023 09:18:38 +0000 (17:18 +0800)
committerSong Liu <song@kernel.org>
Tue, 13 Jun 2023 22:25:43 +0000 (15:25 -0700)
In raid10_sync_request(), if data cannot be read from any disk for
recovery, it will go to 'giveup' and let 'chunks_skipped' + 1. After
multiple 'giveup', when 'chunks_skipped >= geo.raid_disks', it will
return 'max_sector', indicating that the recovery has been completed.
However, the recovery is just aborted and the data remains inconsistent.

Fix it by setting mirror->recovery_disabled, which will prevent the spare
disk from being added to this mirror. The same issue also exists during
resync, it will be fixed afterwards.

Signed-off-by: Li Nan <linan122@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230602091839.743798-2-linan666@huaweicloud.com
drivers/md/raid10.c

index ab0e2485b2b7a2010dd819f4da384ccedf094f3e..1b953e788ce1de45762f6ed1c25ec757497efbc0 100644 (file)
@@ -3311,6 +3311,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
        int chunks_skipped = 0;
        sector_t chunk_mask = conf->geo.chunk_mask;
        int page_idx = 0;
+       int error_disk = -1;
 
        /*
         * Allow skipping a full rebuild for incremental assembly
@@ -3394,8 +3395,21 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                return reshape_request(mddev, sector_nr, skipped);
 
        if (chunks_skipped >= conf->geo.raid_disks) {
-               /* if there has been nothing to do on any drive,
-                * then there is nothing to do at all..
+               pr_err("md/raid10:%s: %s fails\n", mdname(mddev),
+                       test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?  "resync" : "recovery");
+               if (error_disk >= 0 &&
+                   !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+                       /*
+                        * recovery fails, set mirrors.recovery_disabled,
+                        * device shouldn't be added to there.
+                        */
+                       conf->mirrors[error_disk].recovery_disabled =
+                                               mddev->recovery_disabled;
+                       return 0;
+               }
+               /*
+                * if there has been nothing to do on any drive,
+                * then there is nothing to do at all.
                 */
                *skipped = 1;
                return (max_sector - sector_nr) + sectors_skipped;
@@ -3646,6 +3660,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                                       mdname(mddev));
                                        mirror->recovery_disabled
                                                = mddev->recovery_disabled;
+                               } else {
+                                       error_disk = i;
                                }
                                put_buf(r10_bio);
                                if (rb2)