]> www.infradead.org Git - nvme.git/commitdiff
md: Don't register sync_thread for reshape directly
authorYu Kuai <yukuai3@huawei.com>
Thu, 1 Feb 2024 09:25:49 +0000 (17:25 +0800)
committerSong Liu <song@kernel.org>
Thu, 15 Feb 2024 22:17:22 +0000 (14:17 -0800)
Currently, if reshape is interrupted, then reassemble the array will
register sync_thread directly from pers->run(), in this case
'MD_RECOVERY_RUNNING' is set directly, however, there is no guarantee
that md_do_sync() will be executed, hence stop_sync_thread() will hang
because 'MD_RECOVERY_RUNNING' can't be cleared.

Last patch make sure that md_do_sync() will set MD_RECOVERY_DONE,
however, following hang can still be triggered by dm-raid test
shell/lvconvert-raid-reshape.sh occasionally:

[root@fedora ~]# cat /proc/1982/stack
[<0>] stop_sync_thread+0x1ab/0x270 [md_mod]
[<0>] md_frozen_sync_thread+0x5c/0xa0 [md_mod]
[<0>] raid_presuspend+0x1e/0x70 [dm_raid]
[<0>] dm_table_presuspend_targets+0x40/0xb0 [dm_mod]
[<0>] __dm_destroy+0x2a5/0x310 [dm_mod]
[<0>] dm_destroy+0x16/0x30 [dm_mod]
[<0>] dev_remove+0x165/0x290 [dm_mod]
[<0>] ctl_ioctl+0x4bb/0x7b0 [dm_mod]
[<0>] dm_ctl_ioctl+0x11/0x20 [dm_mod]
[<0>] vfs_ioctl+0x21/0x60
[<0>] __x64_sys_ioctl+0xb9/0xe0
[<0>] do_syscall_64+0xc6/0x230
[<0>] entry_SYSCALL_64_after_hwframe+0x6c/0x74

Meanwhile mddev->recovery is:
MD_RECOVERY_RUNNING |
MD_RECOVERY_INTR |
MD_RECOVERY_RESHAPE |
MD_RECOVERY_FROZEN

Fix this problem by remove the code to register sync_thread directly
from raid10 and raid5. And let md_check_recovery() to register
sync_thread.

Fixes: f67055780caa ("[PATCH] md: Checkpoint and allow restart of raid5 reshape")
Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
Cc: stable@vger.kernel.org # v6.7+
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240201092559.910982-5-yukuai1@huaweicloud.com
drivers/md/md.c
drivers/md/raid10.c
drivers/md/raid5.c

index fbf04160d0098d8cb7c6cad1383e40b652552279..dcde67a816473438765357bd5d4feeca925fcb9e 100644 (file)
@@ -9376,6 +9376,7 @@ static void md_start_sync(struct work_struct *ws)
        struct mddev *mddev = container_of(ws, struct mddev, sync_work);
        int spares = 0;
        bool suspend = false;
+       char *name;
 
        if (md_spares_need_change(mddev))
                suspend = true;
@@ -9408,8 +9409,10 @@ static void md_start_sync(struct work_struct *ws)
        if (spares)
                md_bitmap_write_all(mddev->bitmap);
 
+       name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
+                       "reshape" : "resync";
        rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "resync"));
+                          md_register_thread(md_do_sync, mddev, name));
        if (!mddev->sync_thread) {
                pr_warn("%s: could not start resync thread...\n",
                        mdname(mddev));
index 7412066ea22c7a525ed3e9ff1cfc1b5db2b2b527..a5f8419e2df1d5624f587a3615c3e46348532701 100644 (file)
@@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev)
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-               set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-               rcu_assign_pointer(mddev->sync_thread,
-                       md_register_thread(md_do_sync, mddev, "reshape"));
-               if (!mddev->sync_thread)
-                       goto out_free_conf;
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        }
 
        return 0;
@@ -4573,16 +4569,8 @@ out:
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
        clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-       set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-
-       rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "reshape"));
-       if (!mddev->sync_thread) {
-               ret = -EAGAIN;
-               goto abort;
-       }
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        conf->reshape_checkpoint = jiffies;
-       md_wakeup_thread(mddev->sync_thread);
        md_new_event();
        return 0;
 
index 8497880135ee4269ef329e58a10757870ae2df18..6a7a32f7fb912019754f75338104373009604051 100644 (file)
@@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev)
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-               set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-               rcu_assign_pointer(mddev->sync_thread,
-                       md_register_thread(md_do_sync, mddev, "reshape"));
-               if (!mddev->sync_thread)
-                       goto abort;
+               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        }
 
        /* Ok, everything is just fine now */
@@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev)
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
        clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
-       set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-       rcu_assign_pointer(mddev->sync_thread,
-                          md_register_thread(md_do_sync, mddev, "reshape"));
-       if (!mddev->sync_thread) {
-               mddev->recovery = 0;
-               spin_lock_irq(&conf->device_lock);
-               write_seqcount_begin(&conf->gen_lock);
-               mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
-               mddev->new_chunk_sectors =
-                       conf->chunk_sectors = conf->prev_chunk_sectors;
-               mddev->new_layout = conf->algorithm = conf->prev_algo;
-               rdev_for_each(rdev, mddev)
-                       rdev->new_data_offset = rdev->data_offset;
-               smp_wmb();
-               conf->generation --;
-               conf->reshape_progress = MaxSector;
-               mddev->reshape_position = MaxSector;
-               write_seqcount_end(&conf->gen_lock);
-               spin_unlock_irq(&conf->device_lock);
-               return -EAGAIN;
-       }
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        conf->reshape_checkpoint = jiffies;
-       md_wakeup_thread(mddev->sync_thread);
        md_new_event();
        return 0;
 }