]> www.infradead.org Git - linux.git/commitdiff
bcachefs: Kill unnecessary mark_lock usage
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 8 Dec 2024 09:11:21 +0000 (04:11 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Dec 2024 06:36:22 +0000 (01:36 -0500)
We can't hold mark_lock while calling fsck_err() - that's a deadlock,
mark_lock is meant to be a leaf node lock.

It's also unnecessary for gc_bucket() and bucket_gen(); rcu suffices
since the bucket_gens array describes its size, and we can't race with
device removal or resize during gc/fsck since that takes state lock.

Reported-by: syzbot+38641fcbda1aaffefdd4@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_foreground.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/ec.c
fs/bcachefs/errcode.h
fs/bcachefs/super.c

index 57d5f14c93d07ed205d85630d9bb5aba2eb5233f..6df41c331a52e3cc8345ff771d4a57ca25ed50c4 100644 (file)
@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
                return;
        }
 
-       percpu_down_read(&c->mark_lock);
        spin_lock(&ob->lock);
-
        ob->valid = false;
        ob->data_type = 0;
-
        spin_unlock(&ob->lock);
-       percpu_up_read(&c->mark_lock);
 
        spin_lock(&c->freelist_lock);
        bch2_open_bucket_hash_remove(c, ob);
index e6cd93e1ed0f35027a35d91f840cc3f12951dcd5..3a3cb79d85186566cd9bc84c3ee5b6df3696876e 100644 (file)
@@ -547,15 +547,13 @@ struct bch_dev {
 
        /*
         * Buckets:
-        * Per-bucket arrays are protected by c->mark_lock, bucket_lock and
-        * gc_gens_lock, for device resize - holding any is sufficient for
-        * access: Or rcu_read_lock(), but only for dev_ptr_stale():
+        * Per-bucket arrays are protected by either rcu_read_lock or
+        * state_lock, for device resize.
         */
        GENRADIX(struct bucket) buckets_gc;
        struct bucket_gens __rcu *bucket_gens;
        u8                      *oldest_gen;
        unsigned long           *buckets_nouse;
-       struct rw_semaphore     bucket_lock;
 
        struct bch_dev_usage __percpu   *usage;
 
index 24f2f3bdf70411a99b79ec1dab6fbaf619882b5c..e5ba7d1429b9c098a5c3903d21e3ca5f06be8747 100644 (file)
@@ -811,7 +811,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
        old = bch2_alloc_to_v4(k, &old_convert);
        gc = new = *old;
 
-       percpu_down_read(&c->mark_lock);
        __bucket_m_to_alloc(&gc, *gc_bucket(ca, iter->pos.offset));
 
        old_gc = gc;
@@ -822,7 +821,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
                gc.data_type = old->data_type;
                gc.dirty_sectors = old->dirty_sectors;
        }
-       percpu_up_read(&c->mark_lock);
 
        /*
         * gc.data_type doesn't yet include need_discard & need_gc_gen states -
@@ -840,11 +838,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
                 * safe w.r.t. transaction restarts, so fixup the gc_bucket so
                 * we don't run it twice:
                 */
-               percpu_down_read(&c->mark_lock);
                struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
                gc_m->data_type = gc.data_type;
                gc_m->dirty_sectors = gc.dirty_sectors;
-               percpu_up_read(&c->mark_lock);
        }
 
        if (fsck_err_on(new.data_type != gc.data_type,
@@ -1088,7 +1084,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
        if (unlikely(test_bit(BCH_FS_going_ro, &c->flags)))
                return -EROFS;
 
-       percpu_down_read(&c->mark_lock);
        rcu_read_lock();
        bkey_for_each_ptr(ptrs, ptr) {
                struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
@@ -1097,7 +1092,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
 
                if (dev_ptr_stale(ca, ptr) > 16) {
                        rcu_read_unlock();
-                       percpu_up_read(&c->mark_lock);
                        goto update;
                }
        }
@@ -1112,7 +1106,6 @@ static int gc_btree_gens_key(struct btree_trans *trans,
                        *gen = ptr->gen;
        }
        rcu_read_unlock();
-       percpu_up_read(&c->mark_lock);
        return 0;
 update:
        u = bch2_bkey_make_mut(trans, iter, &k, 0);
index afd35c93fcfbb7502b7f7cc80e41e2398258e717..eb2ed4edbbbc89f1ed6dcdc9deda01750d9c1aa3 100644 (file)
@@ -262,8 +262,6 @@ int bch2_check_fix_ptrs(struct btree_trans *trans,
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       percpu_down_read(&c->mark_lock);
-
        bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) {
                ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update);
                if (ret)
@@ -364,7 +362,6 @@ found:
                        bch_info(c, "new key %s", buf.buf);
                }
 
-               percpu_up_read(&c->mark_lock);
                struct btree_iter iter;
                bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level,
                                          BTREE_ITER_intent|BTREE_ITER_all_snapshots);
@@ -373,8 +370,6 @@ found:
                                          BTREE_UPDATE_internal_snapshot_node|
                                          BTREE_TRIGGER_norun);
                bch2_trans_iter_exit(trans, &iter);
-               percpu_down_read(&c->mark_lock);
-
                if (ret)
                        goto err;
 
@@ -382,7 +377,6 @@ found:
                        bch2_btree_node_update_key_early(trans, btree, level - 1, k, new);
        }
 err:
-       percpu_up_read(&c->mark_lock);
        printbuf_exit(&buf);
        return ret;
 }
@@ -603,13 +597,12 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
        }
 
        if (flags & BTREE_TRIGGER_gc) {
-               percpu_down_read(&c->mark_lock);
                struct bucket *g = gc_bucket(ca, bucket.offset);
                if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n  %s",
                                            p.ptr.dev,
                                            (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                        ret = -BCH_ERR_trigger_pointer;
-                       goto err_unlock;
+                       goto err;
                }
 
                bucket_lock(g);
@@ -617,8 +610,6 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
                ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new);
                alloc_to_bucket(g, new);
                bucket_unlock(g);
-err_unlock:
-               percpu_up_read(&c->mark_lock);
 
                if (!ret)
                        ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
@@ -996,11 +987,10 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
        struct bch_fs *c = trans->c;
        int ret = 0;
 
-       percpu_down_read(&c->mark_lock);
        struct bucket *g = gc_bucket(ca, b);
        if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s",
                                    ca->dev_idx, bch2_data_type_str(data_type)))
-               goto err_unlock;
+               goto err;
 
        bucket_lock(g);
        struct bch_alloc_v4 old = bucket_m_to_alloc(*g);
@@ -1010,26 +1000,24 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *
                        "different types of data in same bucket: %s, %s",
                        bch2_data_type_str(g->data_type),
                        bch2_data_type_str(data_type)))
-               goto err;
+               goto err_unlock;
 
        if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c,
                        "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size",
                        ca->dev_idx, b, g->gen,
                        bch2_data_type_str(g->data_type ?: data_type),
                        g->dirty_sectors, sectors))
-               goto err;
+               goto err_unlock;
 
        g->data_type = data_type;
        g->dirty_sectors += sectors;
        struct bch_alloc_v4 new = bucket_m_to_alloc(*g);
        bucket_unlock(g);
-       percpu_up_read(&c->mark_lock);
        ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
        return ret;
-err:
-       bucket_unlock(g);
 err_unlock:
-       percpu_up_read(&c->mark_lock);
+       bucket_unlock(g);
+err:
        return -BCH_ERR_metadata_bucket_inconsistency;
 }
 
@@ -1295,7 +1283,11 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        bool resize = ca->bucket_gens != NULL;
        int ret;
 
-       BUG_ON(resize && ca->buckets_nouse);
+       if (resize)
+               lockdep_assert_held(&c->state_lock);
+
+       if (resize && ca->buckets_nouse)
+               return -BCH_ERR_no_resize_with_buckets_nouse;
 
        bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets),
                               GFP_KERNEL|__GFP_ZERO);
@@ -1309,11 +1301,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        bucket_gens->nbuckets_minus_first =
                bucket_gens->nbuckets - bucket_gens->first_bucket;
 
-       if (resize) {
-               down_write(&ca->bucket_lock);
-               percpu_down_write(&c->mark_lock);
-       }
-
        old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1);
 
        if (resize) {
@@ -1331,11 +1318,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
        nbuckets = ca->mi.nbuckets;
 
-       if (resize) {
-               percpu_up_write(&c->mark_lock);
-               up_write(&ca->bucket_lock);
-       }
-
        ret = 0;
 err:
        if (bucket_gens)
index 3bebc4c3044f2d6f61d9c746fde706f71d9ca7ef..a9acdd6c0c8656a7279da4ffa1710236d6728934 100644 (file)
@@ -82,16 +82,15 @@ static inline void bucket_lock(struct bucket *b)
 
 static inline struct bucket *gc_bucket(struct bch_dev *ca, size_t b)
 {
-       return genradix_ptr(&ca->buckets_gc, b);
+       return bucket_valid(ca, b)
+               ? genradix_ptr(&ca->buckets_gc, b)
+               : NULL;
 }
 
 static inline struct bucket_gens *bucket_gens(struct bch_dev *ca)
 {
        return rcu_dereference_check(ca->bucket_gens,
-                                    !ca->fs ||
-                                    percpu_rwsem_is_held(&ca->fs->mark_lock) ||
-                                    lockdep_is_held(&ca->fs->state_lock) ||
-                                    lockdep_is_held(&ca->bucket_lock));
+                                    lockdep_is_held(&ca->fs->state_lock));
 }
 
 static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
index df9c0e4533919af20a41f9b48ca6b172c35e6728..3e7e41cd8380603e6050cbc6c23ef7d17234f876 100644 (file)
@@ -305,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans,
        }
 
        if (flags & BTREE_TRIGGER_gc) {
-               percpu_down_read(&c->mark_lock);
                struct bucket *g = gc_bucket(ca, bucket.offset);
                if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n  %s",
                                            ptr->dev,
                                            (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
                        ret = -BCH_ERR_mark_stripe;
-                       goto err_unlock;
+                       goto err;
                }
 
                bucket_lock(g);
@@ -319,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
                ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
                alloc_to_bucket(g, new);
                bucket_unlock(g);
-err_unlock:
-               percpu_up_read(&c->mark_lock);
+
                if (!ret)
                        ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
        }
index 5e4dd85ac6690b136271aac00f23cc35ea115d59..a6a9561a890d2dbd559893f9309a56db7dc52b18 100644 (file)
        x(EINVAL,                       opt_parse_error)                        \
        x(EINVAL,                       remove_with_metadata_missing_unimplemented)\
        x(EINVAL,                       remove_would_lose_data)                 \
+       x(EINVAL,                       no_resize_with_buckets_nouse)           \
        x(EROFS,                        erofs_trans_commit)                     \
        x(EROFS,                        erofs_no_writes)                        \
        x(EROFS,                        erofs_journal_err)                      \
index 14157820705d1f3fe36bd1a19ba2aba3f0238519..2b2e0835c8fea7adbd70255c07daad4ead0587ca 100644 (file)
@@ -1311,8 +1311,6 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
        init_completion(&ca->ref_completion);
        init_completion(&ca->io_ref_completion);
 
-       init_rwsem(&ca->bucket_lock);
-
        INIT_WORK(&ca->io_error_work, bch2_io_error_work);
 
        bch2_time_stats_quantiles_init(&ca->io_latency[READ]);