]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
md/raid10: avoid deadlock on recovery.
authorVitaly Mayatskikh <vmayatskikh@digitalocean.com>
Tue, 3 Mar 2020 18:14:40 +0000 (13:14 -0500)
committerSong Liu <songliubraving@fb.com>
Wed, 22 Jul 2020 18:44:54 +0000 (11:44 -0700)
When disk failure happens and the array has a spare drive, resync thread
kicks in and starts to refill the spare. However it may get blocked by
a retry thread that resubmits failed IO to a mirror and itself can get
blocked on a barrier raised by the resync thread.

Acked-by: Nigel Croxon <ncroxon@redhat.com>
Signed-off-by: Vitaly Mayatskikh <vmayatskikh@digitalocean.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
drivers/md/raid10.c

index 14b1ba732cd7d572ab987ce412a81813b996e8b3..cefda2abd34f870f5f5c3f9c98c31f80c364ddb3 100644 (file)
@@ -980,6 +980,7 @@ static void wait_barrier(struct r10conf *conf)
 {
        spin_lock_irq(&conf->resync_lock);
        if (conf->barrier) {
+               struct bio_list *bio_list = current->bio_list;
                conf->nr_waiting++;
                /* Wait for the barrier to drop.
                 * However if there are already pending
@@ -994,9 +995,16 @@ static void wait_barrier(struct r10conf *conf)
                wait_event_lock_irq(conf->wait_barrier,
                                    !conf->barrier ||
                                    (atomic_read(&conf->nr_pending) &&
-                                    current->bio_list &&
-                                    (!bio_list_empty(&current->bio_list[0]) ||
-                                     !bio_list_empty(&current->bio_list[1]))),
+                                    bio_list &&
+                                    (!bio_list_empty(&bio_list[0]) ||
+                                     !bio_list_empty(&bio_list[1]))) ||
+                                    /* move on if recovery thread is
+                                     * blocked by us
+                                     */
+                                    (conf->mddev->thread->tsk == current &&
+                                     test_bit(MD_RECOVERY_RUNNING,
+                                              &conf->mddev->recovery) &&
+                                     conf->nr_queued > 0),
                                    conf->resync_lock);
                conf->nr_waiting--;
                if (!conf->nr_waiting)