From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 26 Mar 2025 15:41:07 +0000 (-0400)
Subject: bcachefs: Recovery no longer holds state_lock
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=2dd202dbaf0acfa4af7fcdf258f35866e31f7425;p=users%2Fjedix%2Flinux-maple.git

bcachefs: Recovery no longer holds state_lock

state_lock guards against devices coming or leaving, changing state, or
the filesystem changing between ro <-> rw.

But it's not necessary for running recovery passes, and holding it
blocks asynchronous events that would cause us to go RO or kick out
devices.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---

diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 5fb396be91272..b6dbeaa9c7ab0 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -589,6 +589,8 @@ iter_err:
 
 int bch2_alloc_read(struct bch_fs *c)
 {
+	down_read(&c->state_lock);
+
 	struct btree_trans *trans = bch2_trans_get(c);
 	struct bch_dev *ca = NULL;
 	int ret;
@@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c)
 	bch2_dev_put(ca);
 	bch2_trans_put(trans);
 
+	up_read(&c->state_lock);
 	bch_err_fn(c, ret);
 	return ret;
 }
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 20c497f0c2cb4..f08ab98853a62 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -1023,7 +1023,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
 	 * Can't allow devices to come/go/resize while we have bucket bitmaps
 	 * allocated
 	 */
-	lockdep_assert_held(&c->state_lock);
+	down_read(&c->state_lock);
 
 	for_each_member_device(c, ca) {
 		BUG_ON(ca->bucket_backpointer_mismatches);
@@ -1108,6 +1108,7 @@ err_free_bitmaps:
 		ca->bucket_backpointer_mismatches = NULL;
 	}
 
+	up_read(&c->state_lock);
 	bch_err_fn(c, ret);
 	return ret;
 }
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index ff681e7335983..fc44e7885ac5f 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1021,8 +1021,7 @@ int bch2_check_allocations(struct bch_fs *c)
 {
 	int ret;
 
-	lockdep_assert_held(&c->state_lock);
-
+	down_read(&c->state_lock);
 	down_write(&c->gc_lock);
 
 	bch2_btree_interior_updates_flush(c);
@@ -1060,6 +1059,7 @@ out:
 	percpu_up_write(&c->mark_lock);
 
 	up_write(&c->gc_lock);
+	up_read(&c->state_lock);
 
 	/*
 	 * At startup, allocations can happen directly instead of via the
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 101806d7ebe1d..5c8e40dea7119 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -5,6 +5,8 @@
 #define BCH_ERRCODES()								\
 	x(ERANGE,			ERANGE_option_too_small)		\
 	x(ERANGE,			ERANGE_option_too_big)			\
+	x(EINVAL,			injected)				\
+	x(BCH_ERR_injected,		injected_fs_start)			\
 	x(EINVAL,			mount_option)				\
 	x(BCH_ERR_mount_option,		option_name)				\
 	x(BCH_ERR_mount_option,		option_value)				\
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 4eea51edafca6..55bfa73f34def 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -482,14 +482,12 @@ void bch2_opts_to_text(struct printbuf *out,
 
 int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
 {
-	lockdep_assert_held(&c->state_lock);
-
 	int ret = 0;
 
 	switch (id) {
 	case Opt_state:
 		if (ca)
-			return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
+			return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
 		break;
 
 	case Opt_compression:
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 0b3c951c32da9..593ff142530df 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -234,28 +234,22 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
 
 int bch2_run_online_recovery_passes(struct bch_fs *c)
 {
-	int ret = 0;
-
-	down_read(&c->state_lock);
-
 	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
 		struct recovery_pass_fn *p = recovery_pass_fns + i;
 
 		if (!(p->when & PASS_ONLINE))
 			continue;
 
-		ret = bch2_run_recovery_pass(c, i);
+		int ret = bch2_run_recovery_pass(c, i);
 		if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
 			i = c->curr_recovery_pass;
 			continue;
 		}
 		if (ret)
-			break;
+			return ret;
 	}
 
-	up_read(&c->state_lock);
-
-	return ret;
+	return 0;
 }
 
 int bch2_run_recovery_passes(struct bch_fs *c)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 64432233303ab..20208f3c5d8b0 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -533,9 +533,11 @@ int bch2_fs_read_write(struct bch_fs *c)
 
 int bch2_fs_read_write_early(struct bch_fs *c)
 {
-	lockdep_assert_held(&c->state_lock);
+	down_write(&c->state_lock);
+	int ret = __bch2_fs_read_write(c, true);
+	up_write(&c->state_lock);
 
-	return __bch2_fs_read_write(c, true);
+	return ret;
 }
 
 /* Filesystem startup/shutdown: */
@@ -1019,38 +1021,39 @@ static void print_mount_opts(struct bch_fs *c)
 int bch2_fs_start(struct bch_fs *c)
 {
 	time64_t now = ktime_get_real_seconds();
-	int ret;
+	int ret = 0;
 
 	print_mount_opts(c);
 
 	down_write(&c->state_lock);
+	mutex_lock(&c->sb_lock);
 
 	BUG_ON(test_bit(BCH_FS_started, &c->flags));
 
-	mutex_lock(&c->sb_lock);
+	if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
+			sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
+		mutex_unlock(&c->sb_lock);
+		up_write(&c->state_lock);
+		ret = -BCH_ERR_ENOSPC_sb;
+		goto err;
+	}
 
 	ret = bch2_sb_members_v2_init(c);
 	if (ret) {
 		mutex_unlock(&c->sb_lock);
+		up_write(&c->state_lock);
 		goto err;
 	}
 
 	for_each_online_member(c, ca)
 		bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
 
-	struct bch_sb_field_ext *ext =
-		bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
 	mutex_unlock(&c->sb_lock);
 
-	if (!ext) {
-		bch_err(c, "insufficient space in superblock for sb_field_ext");
-		ret = -BCH_ERR_ENOSPC_sb;
-		goto err;
-	}
-
 	for_each_rw_member(c, ca)
 		bch2_dev_allocator_add(c, ca);
 	bch2_recalc_capacity(c);
+	up_write(&c->state_lock);
 
 	c->recovery_task = current;
 	ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
@@ -1066,31 +1069,28 @@ int bch2_fs_start(struct bch_fs *c)
 		goto err;
 
 	if (bch2_fs_init_fault("fs_start")) {
-		bch_err(c, "fs_start fault injected");
-		ret = -EINVAL;
+		ret = -BCH_ERR_injected_fs_start;
 		goto err;
 	}
 
 	set_bit(BCH_FS_started, &c->flags);
 	wake_up(&c->ro_ref_wait);
 
+	down_write(&c->state_lock);
 	if (c->opts.read_only) {
 		bch2_fs_read_only(c);
 	} else {
 		ret = !test_bit(BCH_FS_rw, &c->flags)
 			? bch2_fs_read_write(c)
 			: bch2_fs_read_write_late(c);
-		if (ret)
-			goto err;
 	}
+	up_write(&c->state_lock);
 
-	ret = 0;
 err:
 	if (ret)
 		bch_err_msg(c, ret, "starting filesystem");
 	else
 		bch_verbose(c, "done starting filesystem");
-	up_write(&c->state_lock);
 	return ret;
 }
 
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 251ba8224c1f0..74c186d65d1f4 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -631,8 +631,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
 	if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
 		return -EROFS;
 
-	down_write(&c->state_lock);
-
 	char *tmp = kstrdup(buf, GFP_KERNEL);
 	if (!tmp) {
 		ret = -ENOMEM;
@@ -675,7 +673,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
 
 	ret = size;
 err:
-	up_write(&c->state_lock);
 	bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
 	return ret;
 }