]> www.infradead.org Git - users/hch/xfs.git/commitdiff
md/raid1: remove rcu protection to access rdev from conf
authorYu Kuai <yukuai3@huawei.com>
Sat, 25 Nov 2023 08:16:02 +0000 (16:16 +0800)
committerSong Liu <song@kernel.org>
Mon, 27 Nov 2023 23:49:05 +0000 (15:49 -0800)
Because it's safe to accees rdev from conf:
 - If any spinlock is held, because synchronize_rcu() from
   md_kick_rdev_from_array() will prevent 'rdev' to be freed until
   spinlock is released;
 - If 'reconfig_lock' is held, because rdev can't be added or removed from
   array;
 - If there is normal IO inflight, because mddev_suspend() will prevent
   rdev to be added or removed from array;
 - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is
   checked in remove_and_add_spares().

And these will cover all the scenarios in raid1.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231125081604.3939938-4-yukuai1@huaweicloud.com
drivers/md/raid1.c

index a678e0e6e102fc8d2b6e42a5f567825811338b9d..9348f17095120414b106a9bb1e02a1707e78bff5 100644 (file)
@@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        int choose_first;
        int choose_next_idle;
 
-       rcu_read_lock();
        /*
         * Check if we can balance. We can balance on the whole
         * device if no resync is going on, or below the resync window.
@@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
                unsigned int pending;
                bool nonrot;
 
-               rdev = rcu_dereference(conf->mirrors[disk].rdev);
+               rdev = conf->mirrors[disk].rdev;
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
                    || test_bit(Faulty, &rdev->flags))
@@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        }
 
        if (best_disk >= 0) {
-               rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
+               rdev = conf->mirrors[best_disk].rdev;
                if (!rdev)
                        goto retry;
                atomic_inc(&rdev->nr_pending);
@@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
 
                conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
        }
-       rcu_read_unlock();
        *max_sectors = sectors;
 
        return best_disk;
@@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
 
        if (r1bio_existed) {
                /* Need to get the block device name carefully */
-               struct md_rdev *rdev;
-               rcu_read_lock();
-               rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
+               struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
+
                if (rdev)
                        snprintf(b, sizeof(b), "%pg", rdev->bdev);
                else
                        strcpy(b, "???");
-               rcu_read_unlock();
        }
 
        /*
@@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 
        disks = conf->raid_disks * 2;
        blocked_rdev = NULL;
-       rcu_read_lock();
        max_sectors = r1_bio->sectors;
        for (i = 0;  i < disks; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+               struct md_rdev *rdev = conf->mirrors[i].rdev;
 
                /*
                 * The write-behind io is only attempted on drives marked as
@@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                }
                r1_bio->bios[i] = bio;
        }
-       rcu_read_unlock();
 
        if (unlikely(blocked_rdev)) {
                /* Wait for this device to become unblocked */
@@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
        struct r1conf *conf = mddev->private;
        int i;
 
+       lockdep_assert_held(&mddev->lock);
+
        seq_printf(seq, " [%d/%d] [", conf->raid_disks,
                   conf->raid_disks - mddev->degraded);
-       rcu_read_lock();
        for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+               struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
+
                seq_printf(seq, "%s",
                           rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
        }
-       rcu_read_unlock();
        seq_printf(seq, "]");
 }
 
@@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf)
        pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
                 conf->raid_disks);
 
-       rcu_read_lock();
+       lockdep_assert_held(&conf->mddev->reconfig_mutex);
        for (i = 0; i < conf->raid_disks; i++) {
-               struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
+               struct md_rdev *rdev = conf->mirrors[i].rdev;
                if (rdev)
                        pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
                                 i, !test_bit(In_sync, &rdev->flags),
                                 !test_bit(Faulty, &rdev->flags),
                                 rdev->bdev);
        }
-       rcu_read_unlock();
 }
 
 static void close_sync(struct r1conf *conf)
@@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                         */
                        if (rdev->saved_raid_disk < 0)
                                conf->fullsync = 1;
-                       rcu_assign_pointer(p->rdev, rdev);
+                       WRITE_ONCE(p->rdev, rdev);
                        break;
                }
                if (test_bit(WantReplacement, &p->rdev->flags) &&
@@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                rdev->raid_disk = repl_slot;
                err = 0;
                conf->fullsync = 1;
-               rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
+               WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
        }
 
        print_conf(conf);
@@ -1862,7 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                        err = -EBUSY;
                        goto abort;
                }
-               p->rdev = NULL;
+               WRITE_ONCE(p->rdev, NULL);
                if (conf->mirrors[conf->raid_disks + number].rdev) {
                        /* We just removed a device that is being replaced.
                         * Move down the replacement.  We drain all IO before
@@ -1883,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                                goto abort;
                        }
                        clear_bit(Replacement, &repl->flags);
-                       p->rdev = repl;
+                       WRITE_ONCE(p->rdev, repl);
                        conf->mirrors[conf->raid_disks + number].rdev = NULL;
                        unfreeze_array(conf);
                }
@@ -2281,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
                        sector_t first_bad;
                        int bad_sectors;
 
-                       rcu_read_lock();
-                       rdev = rcu_dereference(conf->mirrors[d].rdev);
+                       rdev = conf->mirrors[d].rdev;
                        if (rdev &&
                            (test_bit(In_sync, &rdev->flags) ||
                             (!test_bit(Faulty, &rdev->flags) &&
@@ -2290,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
                            is_badblock(rdev, sect, s,
                                        &first_bad, &bad_sectors) == 0) {
                                atomic_inc(&rdev->nr_pending);
-                               rcu_read_unlock();
                                if (sync_page_io(rdev, sect, s<<9,
                                         conf->tmppage, REQ_OP_READ, false))
                                        success = 1;
                                rdev_dec_pending(rdev, mddev);
                                if (success)
                                        break;
-                       } else
-                               rcu_read_unlock();
+                       }
+
                        d++;
                        if (d == conf->raid_disks * 2)
                                d = 0;
@@ -2317,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
                        if (d==0)
                                d = conf->raid_disks * 2;
                        d--;
-                       rcu_read_lock();
-                       rdev = rcu_dereference(conf->mirrors[d].rdev);
+                       rdev = conf->mirrors[d].rdev;
                        if (rdev &&
                            !test_bit(Faulty, &rdev->flags)) {
                                atomic_inc(&rdev->nr_pending);
-                               rcu_read_unlock();
                                r1_sync_page_io(rdev, sect, s,
                                                conf->tmppage, WRITE);
                                rdev_dec_pending(rdev, mddev);
-                       } else
-                               rcu_read_unlock();
+                       }
                }
                d = start;
                while (d != read_disk) {
                        if (d==0)
                                d = conf->raid_disks * 2;
                        d--;
-                       rcu_read_lock();
-                       rdev = rcu_dereference(conf->mirrors[d].rdev);
+                       rdev = conf->mirrors[d].rdev;
                        if (rdev &&
                            !test_bit(Faulty, &rdev->flags)) {
                                atomic_inc(&rdev->nr_pending);
-                               rcu_read_unlock();
                                if (r1_sync_page_io(rdev, sect, s,
                                                    conf->tmppage, READ)) {
                                        atomic_add(s, &rdev->corrected_errors);
@@ -2350,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
                                                rdev->bdev);
                                }
                                rdev_dec_pending(rdev, mddev);
-                       } else
-                               rcu_read_unlock();
+                       }
                }
                sectors -= s;
                sect += s;
@@ -2732,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 
        r1_bio = raid1_alloc_init_r1buf(conf);
 
-       rcu_read_lock();
        /*
         * If we get a correctably read error during resync or recovery,
         * we might want to read from a different device.  So we
@@ -2753,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                struct md_rdev *rdev;
                bio = r1_bio->bios[i];
 
-               rdev = rcu_dereference(conf->mirrors[i].rdev);
+               rdev = conf->mirrors[i].rdev;
                if (rdev == NULL ||
                    test_bit(Faulty, &rdev->flags)) {
                        if (i < conf->raid_disks)
@@ -2811,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
                                bio->bi_opf |= MD_FAILFAST;
                }
        }
-       rcu_read_unlock();
        if (disk < 0)
                disk = wonly;
        r1_bio->read_disk = disk;