From 1c8dfd7ba50dbbb72113caf4fa7868512cdad2f4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 23:35:48 -0400 Subject: [PATCH 01/16] bcachefs: sb_validate() no longer requires members_v1 Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8ebc5e3f3ea3..872707e5fa95 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -384,7 +384,6 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { - struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; int ret; @@ -539,14 +538,17 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, } } + struct bch_sb_field *mi = + bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v2) ?: + bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v1); + /* members must be validated first: */ - mi = bch2_sb_field_get(sb, members_v1); if (!mi) { prt_printf(out, "Invalid superblock: member info area missing"); return -BCH_ERR_invalid_sb_members_missing; } - ret = bch2_sb_field_validate(sb, &mi->field, flags, out); + ret = bch2_sb_field_validate(sb, mi, flags, out); if (ret) return ret; -- 2.51.0 From 0dc73809e93aeb905acf9fa88502c73534cfa83d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Apr 2025 06:48:31 -0400 Subject: [PATCH 02/16] bcachefs: Shrink superblock downgrade table Don't generate entries for versions that won't be able to mount. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index badd0e17ada5..296c6c925386 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -374,6 +374,9 @@ int bch2_sb_downgrade_update(struct bch_fs *c) if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) continue; + if (src->version < c->sb.version_incompat) + continue; + struct bch_sb_field_downgrade_entry *dst; unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; -- 2.51.0 From 576493133f26a172b8db4313448206d30750c9b2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 16 Apr 2025 09:23:15 -0400 Subject: [PATCH 03/16] bcachefs: Print features on startup with -o verbose Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b34c91dd51b1..60e632e22b98 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1054,6 +1054,11 @@ static void print_mount_opts(struct bch_fs *c) bch2_version_to_text(&p, c->sb.version_incompat_allowed); } + if (c->opts.verbose) { + prt_printf(&p, "\n features: "); + prt_bitflags(&p, bch2_sb_features, c->sb.features); + } + bch_info(c, "%s", p.buf); printbuf_exit(&p); } -- 2.51.0 From 203852d9db68e14b50b119cbd123def7e7c9efd0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 14:19:23 -0400 Subject: [PATCH 04/16] bcachefs: BCH_FEATURE_no_alloc_info If a filesystem is going to only be used read-only, and will be a deployable image, we can strip out alloc info for a substantial reduction in metadata size - around half, due to backpointers. Alloc info will be regenerated on first read-write mount. Remounting RW is disallowed for now, since we don't yet have check_allocations running in RW mode. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/errcode.h | 1 + fs/bcachefs/recovery.c | 28 ++++++++++++++++++++++------ fs/bcachefs/recovery.h | 1 + fs/bcachefs/recovery_passes.c | 14 +++++++++++++- fs/bcachefs/recovery_passes_types.h | 18 ++++++++++-------- fs/bcachefs/sb-members.c | 6 ++++++ fs/bcachefs/super.c | 6 ++++++ 8 files changed, 61 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 22ee49408d11..c0041391e2e8 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -923,7 +923,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u x(alloc_v2, 17) \ x(extents_across_btree_nodes, 18) \ x(incompat_version_field, 19) \ - x(casefolding, 20) + x(casefolding, 20) \ + x(no_alloc_info, 21) #define BCH_SB_FEATURES_ALWAYS \ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 051938657cc9..8a4435660d86 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -221,6 +221,7 @@ x(EROFS, erofs_unfixed_errors) \ x(EROFS, erofs_norecovery) \ x(EROFS, erofs_nochanges) \ + x(EROFS, erofs_no_alloc_info) \ x(EROFS, insufficient_devices) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 4c336f20d5eb..b5ab77f3c692 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -33,7 +33,6 @@ #include #include - int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { u64 b = BIT_ULL(btree); @@ -114,11 +113,8 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) } /* for -o reconstruct_alloc: */ -static void bch2_reconstruct_alloc(struct bch_fs *c) +void bch2_reconstruct_alloc(struct bch_fs *c) { - bch2_journal_log_msg(c, "dropping alloc info"); - bch_info(c, "dropping and reconstructing all alloc info"); - mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); @@ -160,6 +156,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info)); + bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -889,8 +887,26 @@ use_clean: if (ret) goto err; - if (c->opts.reconstruct_alloc) + if (!c->opts.read_only && + (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) { + bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); + bch2_reconstruct_alloc(c); + } else if (c->opts.reconstruct_alloc) { + bch2_journal_log_msg(c, "dropping alloc info"); + bch_info(c, "dropping and reconstructing all alloc info"); + + bch2_reconstruct_alloc(c); + } + + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + /* We can't go RW to fix errors without alloc info */ + if (c->opts.fix_errors == FSCK_FIX_yes || + c->opts.fix_errors == FSCK_FIX_ask) + c->opts.fix_errors = FSCK_FIX_no; + if (c->opts.errors == BCH_ON_ERROR_fix_safe) + c->opts.errors = BCH_ON_ERROR_continue; + } /* * After an unclean shutdown, skip then next few journal sequence diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index b0d55754b21b..d858ba674eaa 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -3,6 +3,7 @@ #define _BCACHEFS_RECOVERY_H int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index b4de21f80811..87150dd30f4b 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -47,8 +47,18 @@ static int bch2_set_may_go_rw(struct bch_fs *c) set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr || !c->opts.read_only || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes) + if (keys->nr || + !c->opts.read_only || + !c->sb.clean || + c->opts.recovery_passes || + (c->opts.fsck && !(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))) { + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) { + bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); + bch2_reconstruct_alloc(c); + } + return bch2_fs_read_write_early(c); + } return 0; } @@ -240,6 +250,8 @@ static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pa { struct recovery_pass_fn *p = recovery_pass_fns + pass; + if ((p->when & PASS_ALLOC) && (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) + return false; if (c->opts.recovery_passes_exclude & BIT_ULL(pass)) return false; if (c->opts.recovery_passes & BIT_ULL(pass)) diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h index 4671ccf2d560..f9d565bb50dd 100644 --- a/fs/bcachefs/recovery_passes_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -7,6 +7,8 @@ #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) #define PASS_ONLINE BIT(4) +#define PASS_ALLOC BIT(5) +#define PASS_FSCK_ALLOC (PASS_FSCK|PASS_ALLOC) #ifdef CONFIG_BCACHEFS_DEBUG #define PASS_FSCK_DEBUG BIT(1) @@ -27,17 +29,17 @@ x(stripes_read, 1, 0) \ x(initialize_subvolumes, 2, 0) \ x(snapshots_read, 3, PASS_ALWAYS) \ - x(check_allocations, 5, PASS_FSCK) \ - x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ - x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ + x(check_allocations, 5, PASS_FSCK_ALLOC) \ + x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ + x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT|PASS_ALLOC) \ x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ x(journal_replay, 9, PASS_ALWAYS) \ - x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK) \ - x(check_lrus, 11, PASS_ONLINE|PASS_FSCK) \ - x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK) \ + x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ - x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK) \ - x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK) \ + x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \ + x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ x(bucket_gens_init, 17, 0) \ x(reconstruct_snapshots, 38, 0) \ diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 77809ee23c45..39ce94875dde 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -190,6 +190,12 @@ static int validate_member(struct printbuf *err, return -BCH_ERR_invalid_sb_members; } + if (BCH_MEMBER_FREESPACE_INITIALIZED(&m) && + sb->features[0] & cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info))) { + prt_printf(err, "device %u: freespace initialized but fs has no alloc info", i); + return -BCH_ERR_invalid_sb_members; + } + return 0; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 60e632e22b98..6ab3e63ef139 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -443,6 +443,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) BUG_ON(!test_bit(BCH_FS_may_go_rw, &c->flags)); + if (WARN_ON(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) + return -BCH_ERR_erofs_no_alloc_info; + if (test_bit(BCH_FS_initial_gc_unfixed, &c->flags)) { bch_err(c, "cannot go rw, unfixed btree errors"); return -BCH_ERR_erofs_unfixed_errors; @@ -535,6 +538,9 @@ int bch2_fs_read_write(struct bch_fs *c) if (c->opts.nochanges) return -BCH_ERR_erofs_nochanges; + if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) + return -BCH_ERR_erofs_no_alloc_info; + return __bch2_fs_read_write(c, false); } -- 2.51.0 From 530112d88ebd7405fb61711892b2a680048984c7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 15:15:36 -0400 Subject: [PATCH 05/16] bcachefs: BCH_FEATURE_small_image We can't go RW if it's an image file that hasn't been resized. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 8 +++++--- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/errcode.h | 1 + fs/bcachefs/journal.c | 9 ++++++++- fs/bcachefs/journal_reclaim.c | 26 ++++++++++++++------------ fs/bcachefs/recovery.c | 5 +++++ fs/bcachefs/super.c | 5 +++++ 7 files changed, 40 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 8b8c2344855f..6ac8bd49c629 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2392,14 +2392,16 @@ bkey_err: int bch2_fs_freespace_init(struct bch_fs *c) { - int ret = 0; - bool doing_init = false; + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) + return 0; + /* * We can crash during the device add path, so we need to check this on * every mount: */ + bool doing_init = false; for_each_member_device(c, ca) { if (ca->mi.freespace_initialized) continue; @@ -2409,7 +2411,7 @@ int bch2_fs_freespace_init(struct bch_fs *c) doing_init = true; } - ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); + int ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); if (ret) { bch2_dev_put(ca); bch_err_fn(c, ret); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index c0041391e2e8..7ce475c565b5 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -924,7 +924,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u x(extents_across_btree_nodes, 18) \ x(incompat_version_field, 19) \ x(casefolding, 20) \ - x(no_alloc_info, 21) + x(no_alloc_info, 21) \ + x(small_image, 22) #define BCH_SB_FEATURES_ALWAYS \ (BIT_ULL(BCH_FEATURE_new_extent_overwrite)| \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 8a4435660d86..6a4b3fe9ea99 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -222,6 +222,7 @@ x(EROFS, erofs_norecovery) \ x(EROFS, erofs_nochanges) \ x(EROFS, erofs_no_alloc_info) \ + x(EROFS, erofs_filesystem_full) \ x(EROFS, insufficient_devices) \ x(0, operation_blocked) \ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 5442d526a448..3694b83af8cc 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1295,9 +1295,16 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) { + struct bch_fs *c = ca->fs; + if (!(ca->mi.data_allowed & BIT(BCH_DATA_journal))) return 0; + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_err(c, "cannot allocate journal, filesystem is an unresized image file"); + return -BCH_ERR_erofs_filesystem_full; + } + unsigned nr; int ret; @@ -1318,7 +1325,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) min(1 << 13, (1 << 24) / ca->mi.bucket_size)); - ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs); + ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs); err: bch_err_fn(ca, ret); return ret; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index cc00b0fc40d8..a02f483a016a 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -215,18 +215,20 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; if (nr_online < metadata_replicas_required(c)) { - struct printbuf buf = PRINTBUF; - buf.atomic++; - prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" - "rw journal devs:", nr_online, metadata_replicas_required(c)); - - rcu_read_lock(); - for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) - prt_printf(&buf, " %s", ca->name); - rcu_read_unlock(); - - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); + if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { + struct printbuf buf = PRINTBUF; + buf.atomic++; + prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" + "rw journal devs:", nr_online, metadata_replicas_required(c)); + + rcu_read_lock(); + for_each_member_device_rcu(c, ca, &c->rw_devs[BCH_DATA_journal]) + prt_printf(&buf, " %s", ca->name); + rcu_read_unlock(); + + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + } ret = -BCH_ERR_insufficient_journal_devices; goto out; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b5ab77f3c692..2436f334dde4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -734,6 +734,11 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.read_only = true; } + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_info(c, "filesystem is an unresized image file, mounting ro"); + c->opts.read_only = true; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6ab3e63ef139..7cd075303f95 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -451,6 +451,11 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) return -BCH_ERR_erofs_unfixed_errors; } + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_err(c, "cannot go rw, filesystem is an unresized image file"); + return -BCH_ERR_erofs_filesystem_full; + } + if (test_bit(BCH_FS_rw, &c->flags)) return 0; -- 2.51.0 From 0ca375b1779f22f703f956b22ea2bdbc69c247eb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 14:09:34 -0400 Subject: [PATCH 06/16] bcachefs: BCH_MEMBER_RESIZE_ON_MOUNT Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 18 +++++++--- fs/bcachefs/sb-members.c | 1 + fs/bcachefs/sb-members.h | 1 + fs/bcachefs/sb-members_format.h | 2 ++ fs/bcachefs/sb-members_types.h | 1 + fs/bcachefs/super.c | 64 +++++++++++++++++++++++++++++---- fs/bcachefs/super.h | 2 ++ 7 files changed, 77 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2436f334dde4..2a8bcb9b1dd2 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -734,11 +734,6 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.read_only = true; } - if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { - bch_info(c, "filesystem is an unresized image file, mounting ro"); - c->opts.read_only = true; - } - mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; @@ -892,6 +887,17 @@ use_clean: if (ret) goto err; + ret = bch2_fs_resize_on_mount(c); + if (ret) { + up_write(&c->state_lock); + goto err; + } + + if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { + bch_info(c, "filesystem is an unresized image file, mounting ro"); + c->opts.read_only = true; + } + if (!c->opts.read_only && (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) { bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate"); @@ -954,6 +960,8 @@ use_clean: set_bit(BCH_FS_btree_running, &c->flags); ret = bch2_sb_set_upgrade_extra(c); + if (ret) + goto err; ret = bch2_run_recovery_passes(c); if (ret) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 39ce94875dde..462a2c21a9de 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -294,6 +294,7 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); + prt_printf(out, "Resize on mount:\t%llu\n", BCH_MEMBER_RESIZE_ON_MOUNT(&m)); printbuf_indent_sub(out, 2); } diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 0f1741fffcb6..424143f5e330 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -353,6 +353,7 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) ? BCH_MEMBER_DURABILITY(mi) - 1 : 1, .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi), + .resize_on_mount = BCH_MEMBER_RESIZE_ON_MOUNT(mi), .valid = bch2_member_alive(mi), .btree_bitmap_shift = mi->btree_bitmap_shift, .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap), diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index 3affec823b3f..472218a59102 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -88,6 +88,8 @@ LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28) LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30) LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED, struct bch_member, flags, 30, 31) +LE64_BITMASK(BCH_MEMBER_RESIZE_ON_MOUNT, + struct bch_member, flags, 31, 32) #if 0 LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20); diff --git a/fs/bcachefs/sb-members_types.h b/fs/bcachefs/sb-members_types.h index c0eda888fe39..d6443e186872 100644 --- a/fs/bcachefs/sb-members_types.h +++ b/fs/bcachefs/sb-members_types.h @@ -13,6 +13,7 @@ struct bch_member_cpu { u8 data_allowed; u8 durability; u8 freespace_initialized; + u8 resize_on_mount; u8 valid; u8 btree_bitmap_shift; u64 btree_allocated_bitmap; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7cd075303f95..839b1582c1f1 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1141,6 +1141,9 @@ int bch2_fs_start(struct bch_fs *c) for_each_online_member(c, ca) bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now); + /* + * Dno't write superblock yet: recovery might have to downgrade + */ mutex_unlock(&c->sb_lock); for_each_rw_member(c, ca) @@ -2039,6 +2042,18 @@ int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) return 0; } +static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new_nbuckets) +{ + struct bch_fs *c = ca->fs; + u64 v[3] = { new_nbuckets - old_nbuckets, 0, 0 }; + + return bch2_trans_commit_do(ca->fs, NULL, NULL, 0, + bch2_disk_accounting_mod2(trans, false, v, dev_data_type, + .dev = ca->dev_idx, + .data_type = BCH_DATA_free)) ?: + bch2_dev_freespace_init(c, ca, old_nbuckets, new_nbuckets); +} + int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { struct bch_member *m; @@ -2086,13 +2101,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) mutex_unlock(&c->sb_lock); if (ca->mi.freespace_initialized) { - u64 v[3] = { nbuckets - old_nbuckets, 0, 0 }; - - ret = bch2_trans_commit_do(ca->fs, NULL, NULL, 0, - bch2_disk_accounting_mod2(trans, false, v, dev_data_type, - .dev = ca->dev_idx, - .data_type = BCH_DATA_free)) ?: - bch2_dev_freespace_init(c, ca, old_nbuckets, nbuckets); + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); if (ret) goto err; } @@ -2103,6 +2112,47 @@ err: return ret; } +int bch2_fs_resize_on_mount(struct bch_fs *c) +{ + for_each_online_member(c, ca) { + u64 old_nbuckets = ca->mi.nbuckets; + u64 new_nbuckets = div64_u64(get_capacity(ca->disk_sb.bdev->bd_disk), + ca->mi.bucket_size); + + if (ca->mi.resize_on_mount && + new_nbuckets > ca->mi.nbuckets) { + bch_info(ca, "resizing to size %llu", new_nbuckets * ca->mi.bucket_size); + int ret = bch2_dev_buckets_resize(c, ca, new_nbuckets); + bch_err_fn(ca, ret); + if (ret) { + percpu_ref_put(&ca->io_ref[READ]); + up_write(&c->state_lock); + return ret; + } + + mutex_lock(&c->sb_lock); + struct bch_member *m = + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(new_nbuckets); + SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); + + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + if (ca->mi.freespace_initialized) { + ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); + if (ret) { + percpu_ref_put(&ca->io_ref[READ]); + up_write(&c->state_lock); + return ret; + } + } + } + } + return 0; +} + /* return with ref on ca->ref: */ struct bch_dev *bch2_dev_lookup(struct bch_fs *c, const char *name) { diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 23533bce5709..50588ab20be2 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -35,6 +35,8 @@ void bch2_fs_read_only(struct bch_fs *); int bch2_fs_read_write(struct bch_fs *); int bch2_fs_read_write_early(struct bch_fs *); +int bch2_fs_resize_on_mount(struct bch_fs *); + void __bch2_fs_stop(struct bch_fs *); void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); -- 2.51.0 From ecedc87cfaf016e7e857a209e1b2685a28d59566 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 19:33:54 -0400 Subject: [PATCH 07/16] bcachefs: export bch2_move_data_phys() Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 20 ++++++++++---------- fs/bcachefs/move.h | 5 +++++ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index a4678a205da6..29981ebcb972 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -900,16 +900,16 @@ err: return ret; } -static int bch2_move_data_phys(struct bch_fs *c, - unsigned dev, - u64 start, - u64 end, - unsigned data_types, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc, - move_pred_fn pred, void *arg) +int bch2_move_data_phys(struct bch_fs *c, + unsigned dev, + u64 start, + u64 end, + unsigned data_types, + struct bch_ratelimit *rate, + struct bch_move_stats *stats, + struct write_point_specifier wp, + bool wait_on_copygc, + move_pred_fn pred, void *arg) { struct moving_context ctxt; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 51e0505a8156..1ab6dd4621d6 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -135,6 +135,11 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); +int bch2_move_data_phys(struct bch_fs *, unsigned, u64, u64, unsigned, + struct bch_ratelimit *, struct bch_move_stats *, + struct write_point_specifier, bool, + move_pred_fn, void *); + int bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, -- 2.51.0 From f3c8eaf7a133ef122dfd97e6f6f972265cc84fb0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 19:42:02 -0400 Subject: [PATCH 08/16] bcachefs: Plumb target parameter through btree_node_rewrite_pos() Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_update_interior.c | 37 +++++++++++++++++++---------- fs/bcachefs/btree_update_interior.h | 4 ++-- fs/bcachefs/move.c | 5 ++-- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index b6f5e0dfc9f1..c1c671e340c7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1918,7 +1918,7 @@ static void btree_node_scrub_work(struct work_struct *work) bch_err(c, "error validating btree node during scrub on %s at btree %s", scrub->ca->name, err.buf); - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0); } err: bch2_trans_iter_exit(trans, &iter); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 2be7c10fc59c..3155b4360fbc 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -284,6 +284,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, bool interior_node, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; @@ -317,6 +318,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_unlock(&c->btree_reserve_cache_lock); retry: ret = bch2_alloc_sectors_start_trans(trans, + target ?: c->opts.metadata_target ?: c->opts.foreground_target, 0, @@ -325,7 +327,9 @@ retry: res->nr_replicas, min(res->nr_replicas, c->opts.metadata_replicas_required), - watermark, 0, cl, &wp); + watermark, + target ? BCH_WRITE_only_specified_devs : 0, + cl, &wp); if (unlikely(ret)) goto err; @@ -505,6 +509,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans * static int bch2_btree_reserve_get(struct btree_trans *trans, struct btree_update *as, unsigned nr_nodes[2], + unsigned target, unsigned flags, struct closure *cl) { @@ -527,7 +532,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans, while (p->nr < nr_nodes[interior]) { b = __bch2_btree_node_alloc(trans, &as->disk_res, cl, - interior, flags); + interior, target, flags); if (IS_ERR(b)) { ret = PTR_ERR(b); goto err; @@ -1116,7 +1121,8 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, - unsigned level_start, bool split, unsigned flags) + unsigned level_start, bool split, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; struct btree_update *as; @@ -1226,7 +1232,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); + ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, NULL); if (bch2_err_matches(ret, ENOSPC) || bch2_err_matches(ret, ENOMEM)) { struct closure cl; @@ -1245,7 +1251,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, closure_init_stack(&cl); do { - ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); + ret = bch2_btree_reserve_get(trans, as, nr_nodes, target, flags, &cl); bch2_trans_unlock(trans); bch2_wait_on_allocator(c, &cl); @@ -1878,7 +1884,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans, as = bch2_btree_update_start(trans, trans->paths + path, trans->paths[path].level, - true, flags); + true, 0, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -1948,7 +1954,8 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, return bch2_btree_split_leaf(trans, path, flags); struct btree_update *as = - bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags); + bch2_btree_update_start(trans, trans->paths + path, b->c.level, + true, 0, flags); if (IS_ERR(as)) return PTR_ERR(as); @@ -2077,7 +2084,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, parent = btree_node_parent(trans->paths + path, b); as = bch2_btree_update_start(trans, trans->paths + path, level, false, - BCH_TRANS_COMMIT_no_enospc|flags); + 0, BCH_TRANS_COMMIT_no_enospc|flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto err; @@ -2184,6 +2191,7 @@ err: int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, + unsigned target, unsigned flags) { struct bch_fs *c = trans->c; @@ -2196,7 +2204,8 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_path *path = btree_iter_path(trans, iter); parent = btree_node_parent(path, b); - as = bch2_btree_update_start(trans, path, b->c.level, false, flags); + as = bch2_btree_update_start(trans, path, b->c.level, + false, target, flags); ret = PTR_ERR_OR_ZERO(as); if (ret) goto out; @@ -2261,7 +2270,7 @@ static int bch2_btree_node_rewrite_key(struct btree_trans *trans, bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k); ret = found - ? bch2_btree_node_rewrite(trans, &iter, b, flags) + ? bch2_btree_node_rewrite(trans, &iter, b, 0, flags) : -ENOENT; out: bch2_trans_iter_exit(trans, &iter); @@ -2270,7 +2279,9 @@ out: int bch2_btree_node_rewrite_pos(struct btree_trans *trans, enum btree_id btree, unsigned level, - struct bpos pos, unsigned flags) + struct bpos pos, + unsigned target, + unsigned flags) { BUG_ON(!level); @@ -2282,7 +2293,7 @@ int bch2_btree_node_rewrite_pos(struct btree_trans *trans, if (ret) goto err; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); + ret = bch2_btree_node_rewrite(trans, &iter, b, target, flags); err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -2296,7 +2307,7 @@ int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, if (ret) return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - ret = bch2_btree_node_rewrite(trans, &iter, b, flags); + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, flags); bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index be71cd73b864..ff9b95aac554 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -168,10 +168,10 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, } int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, - struct btree *, unsigned); + struct btree *, unsigned, unsigned); int bch2_btree_node_rewrite_pos(struct btree_trans *, enum btree_id, unsigned, - struct bpos, unsigned); + struct bpos, unsigned, unsigned); int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *, struct btree *, unsigned); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 29981ebcb972..d40e2d14ec52 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -872,7 +872,8 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (!bp.v->level) ret = bch2_move_extent(ctxt, bucket_in_flight, &iter, k, io_opts, data_opts); else if (!data_opts.scrub) - ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, k.k->p, 0); + ret = bch2_btree_node_rewrite_pos(trans, bp.v->btree_id, bp.v->level, + k.k->p, data_opts.target, 0); else ret = bch2_btree_node_scrub(trans, bp.v->btree_id, bp.v->level, k, data_opts.read_dev); @@ -1022,7 +1023,7 @@ retry: if (!pred(c, arg, b, &io_opts, &data_opts)) goto next; - ret = bch2_btree_node_rewrite(trans, &iter, b, 0) ?: ret; + ret = bch2_btree_node_rewrite(trans, &iter, b, 0, 0) ?: ret; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) -- 2.51.0 From 7a274285d3706608d788efcbd9982f08531dd9ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 19:51:05 -0400 Subject: [PATCH 09/16] bcachefs: plumb btree_id through move_pred_fd Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 20 +++++++++++--------- fs/bcachefs/move.h | 2 +- fs/bcachefs/rebalance.c | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index d40e2d14ec52..07cea68b04f0 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -667,7 +667,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, continue; memset(&data_opts, 0, sizeof(data_opts)); - if (!pred(c, arg, k, io_opts, &data_opts)) + if (!pred(c, arg, extent_iter->btree_id, k, io_opts, &data_opts)) goto next; /* @@ -851,7 +851,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, } struct data_update_opts data_opts = {}; - if (!pred(c, arg, k, &io_opts, &data_opts)) { + if (!pred(c, arg, bp.v->btree_id, k, &io_opts, &data_opts)) { bch2_trans_iter_exit(trans, &iter); goto next; } @@ -934,7 +934,8 @@ struct evacuate_bucket_arg { struct data_update_opts data_opts; }; -static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, struct bkey_s_c k, +static bool evacuate_bucket_pred(struct bch_fs *c, void *_arg, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1048,7 +1049,7 @@ next: } static bool rereplicate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1080,7 +1081,7 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg, } static bool migrate_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1107,7 +1108,7 @@ static bool rereplicate_btree_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); + return rereplicate_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), io_opts, data_opts); } /* @@ -1163,7 +1164,7 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) } static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { @@ -1196,11 +1197,12 @@ static bool drop_extra_replicas_btree_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - return drop_extra_replicas_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); + return drop_extra_replicas_pred(c, arg, b->c.btree_id, bkey_i_to_s_c(&b->key), + io_opts, data_opts); } static bool scrub_pred(struct bch_fs *c, void *_arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 1ab6dd4621d6..9c6c229e583e 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -72,7 +72,7 @@ do { \ break; \ } while (1) -typedef bool (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, +typedef bool (*move_pred_fn)(struct bch_fs *, void *, enum btree_id, struct bkey_s_c, struct bch_io_opts *, struct data_update_opts *); extern const char * const bch2_data_ops_strs[]; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 3c45500c1a28..d2a7001cf872 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -454,7 +454,7 @@ out: } static bool rebalance_pred(struct bch_fs *c, void *arg, - struct bkey_s_c k, + enum btree_id btree, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { -- 2.51.0 From 3484840ece849ee700c7cf8e0d44d5536b29fa08 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 13 Apr 2025 16:31:34 -0400 Subject: [PATCH 10/16] bcachefs: bch2_move_data_btree() can move btree nodes Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 37 ++++++++++++++++++++++++------------- fs/bcachefs/move.h | 2 ++ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 07cea68b04f0..a8ad8d4538e0 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -423,6 +423,9 @@ static struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; int ret = 0; + if (extent_iter->min_depth) + return opts_ret; + if (extent_k.k->type == KEY_TYPE_reflink_v) goto out; @@ -573,11 +576,11 @@ static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans * return k; } -static int bch2_move_data_btree(struct moving_context *ctxt, - struct bpos start, - struct bpos end, - move_pred_fn pred, void *arg, - enum btree_id btree_id) +int bch2_move_data_btree(struct moving_context *ctxt, + struct bpos start, + struct bpos end, + move_pred_fn pred, void *arg, + enum btree_id btree_id, unsigned level) { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; @@ -604,10 +607,10 @@ static int bch2_move_data_btree(struct moving_context *ctxt, } bch2_trans_begin(trans); - bch2_trans_iter_init(trans, &iter, btree_id, start, - BTREE_ITER_prefetch| - BTREE_ITER_not_extents| - BTREE_ITER_all_snapshots); + bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level, + BTREE_ITER_prefetch| + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots); if (ctxt->rate) bch2_ratelimit_reset(ctxt->rate); @@ -627,7 +630,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (ret) break; - if (bkey_ge(bkey_start_pos(k.k), end)) + if (bkey_gt(bkey_start_pos(k.k), end)) break; if (ctxt->stats) @@ -677,7 +680,14 @@ static int bch2_move_data_btree(struct moving_context *ctxt, bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + if (!level) + ret2 = bch2_move_extent(ctxt, NULL, extent_iter, k, *io_opts, data_opts); + else if (!data_opts.scrub) + ret2 = bch2_btree_node_rewrite_pos(trans, btree_id, level, + k.k->p, data_opts.target, 0); + else + ret2 = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); + if (ret2) { if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; @@ -695,7 +705,8 @@ next: if (ctxt->stats) atomic64_add(k.k->size, &ctxt->stats->sectors_seen); next_nondata: - bch2_btree_iter_advance(trans, &iter); + if (!bch2_btree_iter_advance(trans, &iter)) + break; } bch2_trans_iter_exit(trans, &reflink_iter); @@ -727,7 +738,7 @@ int __bch2_move_data(struct moving_context *ctxt, ret = bch2_move_data_btree(ctxt, id == start.btree ? start.pos : POS_MIN, id == end.btree ? end.pos : POS_MAX, - pred, arg, id); + pred, arg, id, 0); if (ret) break; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 9c6c229e583e..0c620a5f728d 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -122,6 +122,8 @@ int bch2_move_extent(struct moving_context *, struct bch_io_opts, struct data_update_opts); +int bch2_move_data_btree(struct moving_context *, struct bpos, struct bpos, + move_pred_fn, void *, enum btree_id, unsigned); int __bch2_move_data(struct moving_context *, struct bbpos, struct bbpos, -- 2.51.0 From fe27298b92001d51797ddc26ca0d7c3d4a0f04d4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 15 Apr 2025 21:35:28 -0400 Subject: [PATCH 11/16] bcachefs: bch2_move_data_btree() can now walk roots Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index a8ad8d4538e0..ff56d8886c32 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -606,7 +606,52 @@ int bch2_move_data_btree(struct moving_context *ctxt, ctxt->stats->pos = BBPOS(btree_id, start); } +retry_root: bch2_trans_begin(trans); + + if (level == bch2_btree_id_root(c, btree_id)->level + 1) { + bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level - 1, + BTREE_ITER_prefetch| + BTREE_ITER_not_extents| + BTREE_ITER_all_snapshots); + struct btree *b = bch2_btree_iter_peek_node(trans, &iter); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + goto root_err; + + if (b != btree_node_root(c, b)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + + k = bkey_i_to_s_c(&b->key); + + io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, + iter.pos, &iter, k); + ret = PTR_ERR_OR_ZERO(io_opts); + if (ret) + goto root_err; + + memset(&data_opts, 0, sizeof(data_opts)); + if (!pred(c, arg, iter.btree_id, k, io_opts, &data_opts)) + goto out; + + + if (!data_opts.scrub) + ret = bch2_btree_node_rewrite_pos(trans, btree_id, level, + k.k->p, data_opts.target, 0); + else + ret = bch2_btree_node_scrub(trans, btree_id, level, k, data_opts.read_dev); + +root_err: + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + bch2_trans_iter_exit(trans, &iter); + goto retry_root; + } + + goto out; + } + bch2_trans_node_iter_init(trans, &iter, btree_id, start, 0, level, BTREE_ITER_prefetch| BTREE_ITER_not_extents| @@ -708,7 +753,7 @@ next_nondata: if (!bch2_btree_iter_advance(trans, &iter)) break; } - +out: bch2_trans_iter_exit(trans, &reflink_iter); bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); -- 2.51.0 From 9e260e4590e044dc5887f9eb21dfaf479226e7d4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 20 Apr 2024 17:40:47 -0400 Subject: [PATCH 12/16] docs: bcachefs: idle work scheduling design doc People have been asking to see the plan for this, so - bcachefs has various background tasks that need to be scheduled to balance efficiency, predictability of performance, etc. The design and philosophy hasn't changed too much since bcache, which was primarily designed for server usage, with sustained load in mind. These days we're seeing more desktop usage - where we really want to let the system idle effictively, to reduce total power usage - while also still balancing previous concerns, we still want to let work accumulate to a degree. This lays out all the requirements and starts to sketch out the algorithm I have in mind. Signed-off-by: Kent Overstreet --- .../filesystems/bcachefs/future/idle_work.rst | 78 +++++++++++++++++++ Documentation/filesystems/bcachefs/index.rst | 7 ++ 2 files changed, 85 insertions(+) create mode 100644 Documentation/filesystems/bcachefs/future/idle_work.rst diff --git a/Documentation/filesystems/bcachefs/future/idle_work.rst b/Documentation/filesystems/bcachefs/future/idle_work.rst new file mode 100644 index 000000000000..59a332509dcd --- /dev/null +++ b/Documentation/filesystems/bcachefs/future/idle_work.rst @@ -0,0 +1,78 @@ +Idle/background work classes design doc: + +Right now, our behaviour at idle isn't ideal, it was designed for servers that +would be under sustained load, to keep pending work at a "medium" level, to +let work build up so we can process it in more efficient batches, while also +giving headroom for bursts in load. + +But for desktops or mobile - scenarios where work is less sustained and power +usage is more important - we want to operate differently, with a "rush to +idle" so the system can go to sleep. We don't want to be dribbling out +background work while the system should be idle. + +The complicating factor is that there are a number of background tasks, which +form a heirarchy (or a digraph, depending on how you divide it up) - one +background task may generate work for another. + +Thus proper idle detection needs to model this heirarchy. + +- Foreground writes +- Page cache writeback +- Copygc, rebalance +- Journal reclaim + +When we implement idle detection and rush to idle, we need to be careful not +to disturb too much the existing behaviour that works reasonably well when the +system is under sustained load (or perhaps improve it in the case of +rebalance, which currently does not actively attempt to let work batch up). + +SUSTAINED LOAD REGIME +--------------------- + +When the system is under continuous load, we want these jobs to run +continuously - this is perhaps best modelled with a P/D controller, where +they'll be trying to keep a target value (i.e. fragmented disk space, +available journal space) roughly in the middle of some range. + +The goal under sustained load is to balance our ability to handle load spikes +without running out of x resource (free disk space, free space in the +journal), while also letting some work accumululate to be batched (or become +unnecessary). + +For example, we don't want to run copygc too aggressively, because then it +will be evacuating buckets that would have become empty (been overwritten or +deleted) anyways, and we don't want to wait until we're almost out of free +space because then the system will behave unpredicably - suddenly we're doing +a lot more work to service each write and the system becomes much slower. + +IDLE REGIME +----------- + +When the system becomes idle, we should start flushing our pending work +quicker so the system can go to sleep. + +Note that the definition of "idle" depends on where in the heirarchy a task +is - a task should start flushing work more quickly when the task above it has +stopped generating new work. + +e.g. rebalance should start flushing more quickly when page cache writeback is +idle, and journal reclaim should only start flushing more quickly when both +copygc and rebalance are idle. + +It's important to let work accumulate when more work is still incoming and we +still have room, because flushing is always more efficient if we let it batch +up. New writes may overwrite data before rebalance moves it, and tasks may be +generating more updates for the btree nodes that journal reclaim needs to flush. + +On idle, how much work we do at each interval should be proportional to the +length of time we have been idle for. If we're idle only for a short duration, +we shouldn't flush everything right away; the system might wake up and start +generating new work soon, and flushing immediately might end up doing a lot of +work that would have been unnecessary if we'd allowed things to batch more. + +To summarize, we will need: + + - A list of classes for background tasks that generate work, which will + include one "foreground" class. + - Tracking for each class - "Am I doing work, or have I gone to sleep?" + - And each class should check the class above it when deciding how much work to issue. diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst index 3864d0ae89c1..e5c4c2120b93 100644 --- a/Documentation/filesystems/bcachefs/index.rst +++ b/Documentation/filesystems/bcachefs/index.rst @@ -29,3 +29,10 @@ At this moment, only a few of these are described here. casefolding errorcodes + +Future design +------------- +.. toctree:: + :maxdepth: 1 + + future/idle_work -- 2.51.0 From 62095464e9d2a2340a6b08a90fb280ea2b091a28 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 17 Apr 2025 12:42:13 -0400 Subject: [PATCH 13/16] bcachefs: Fix struct with flex member ABI warning This pops up when buliding in userspace. The structs aren't actually variable length, but no way to tell the compiler that... Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 24 ++++++++++++------------ fs/bcachefs/disk_accounting.h | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 1f0422bfae35..e399237e124a 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -631,17 +631,17 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) } static int bch2_disk_accounting_validate_late(struct btree_trans *trans, - struct disk_accounting_pos acc, + struct disk_accounting_pos *acc, u64 *v, unsigned nr) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; int ret = 0, invalid_dev = -1; - switch (acc.type) { + switch (acc->type) { case BCH_DISK_ACCOUNTING_replicas: { struct bch_replicas_padded r; - __accounting_to_replicas(&r.e, &acc); + __accounting_to_replicas(&r.e, acc); for (unsigned i = 0; i < r.e.nr_devs; i++) if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && @@ -660,7 +660,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, trans, accounting_replicas_not_marked, "accounting not marked in superblock replicas\n%s", (printbuf_reset(&buf), - bch2_accounting_key_to_text(&buf, &acc), + bch2_accounting_key_to_text(&buf, acc), buf.buf))) { /* * We're not RW yet and still single threaded, dropping @@ -676,8 +676,8 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, } case BCH_DISK_ACCOUNTING_dev_data_type: - if (!bch2_dev_exists(c, acc.dev_data_type.dev)) { - invalid_dev = acc.dev_data_type.dev; + if (!bch2_dev_exists(c, acc->dev_data_type.dev)) { + invalid_dev = acc->dev_data_type.dev; goto invalid_device; } break; @@ -691,13 +691,13 @@ invalid_device: "accounting entry points to invalid device %i\n%s", invalid_dev, (printbuf_reset(&buf), - bch2_accounting_key_to_text(&buf, &acc), + bch2_accounting_key_to_text(&buf, acc), buf.buf))) { for (unsigned i = 0; i < nr; i++) v[i] = -v[i]; ret = commit_do(trans, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?: + bch2_disk_accounting_mod(trans, acc, v, nr, false)) ?: -BCH_ERR_remove_disk_accounting_entry; } else { ret = -BCH_ERR_remove_disk_accounting_entry; @@ -748,7 +748,7 @@ int bch2_accounting_read(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) break; - if (!bch2_accounting_is_mem(acc_k)) { + if (!bch2_accounting_is_mem(&acc_k)) { struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; @@ -770,7 +770,7 @@ int bch2_accounting_read(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, i->k->k.p); - if (!bch2_accounting_is_mem(acc_k)) + if (!bch2_accounting_is_mem(&acc_k)) continue; struct bkey_s_c k = bkey_i_to_s_c(i->k); @@ -826,7 +826,7 @@ int bch2_accounting_read(struct bch_fs *c) */ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) ? -BCH_ERR_remove_disk_accounting_entry - : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); + : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters); if (ret == -BCH_ERR_remove_disk_accounting_entry) { free_percpu(i->v[0]); @@ -939,7 +939,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c) if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) break; - if (!bch2_accounting_is_mem(acc_k)) { + if (!bch2_accounting_is_mem(&acc_k)) { struct disk_accounting_pos next; memset(&next, 0, sizeof(next)); next.type = acc_k.type + 1; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index d557b99b3c0a..54cb8a5b117d 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -139,10 +139,10 @@ int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, enum int bch2_accounting_mem_insert_locked(struct bch_fs *, struct bkey_s_c_accounting, enum bch_accounting_mode); void bch2_accounting_mem_gc(struct bch_fs *); -static inline bool bch2_accounting_is_mem(struct disk_accounting_pos acc) +static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc) { - return acc.type < BCH_DISK_ACCOUNTING_TYPE_NR && - acc.type != BCH_DISK_ACCOUNTING_inum; + return acc->type < BCH_DISK_ACCOUNTING_TYPE_NR && + acc->type != BCH_DISK_ACCOUNTING_inum; } /* @@ -163,7 +163,7 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, if (gc && !acc->gc_running) return 0; - if (!bch2_accounting_is_mem(acc_k)) + if (!bch2_accounting_is_mem(&acc_k)) return 0; if (mode == BCH_ACCOUNTING_normal) { -- 2.51.0 From 09279bba72f809eeb1f02d39a462e8e1d06fa32a Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Fri, 18 Apr 2025 15:52:10 +0800 Subject: [PATCH 14/16] bcachefs: Kill dead code Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index a02f483a016a..fd7a140c9fd6 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -637,8 +637,6 @@ static u64 journal_seq_to_flush(struct journal *j) /* Try to keep the journal at most half full: */ nr_buckets = ja->nr / 2; - nr_buckets = min(nr_buckets, ja->nr); - bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr; seq_to_flush = max(seq_to_flush, ja->bucket_seq[bucket_to_flush]); -- 2.51.0 From 834f9475aabd84f60760ac8ceffc45eedff4a176 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 09:46:25 -0400 Subject: [PATCH 15/16] bcachefs: bch2_check_rebalance_work() Add a pass for checking the rebalance_work btree. Signed-off-by: Kent Overstreet --- fs/bcachefs/rebalance.c | 116 ++++++++++++++++++++++++++++ fs/bcachefs/rebalance.h | 2 + fs/bcachefs/recovery_passes_types.h | 1 + 3 files changed, 119 insertions(+) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index d2a7001cf872..26c87ab019e8 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -712,3 +712,119 @@ void bch2_fs_rebalance_init(struct bch_fs *c) { bch2_pd_controller_init(&c->rebalance.pd); } + +static int check_rebalance_work_one(struct btree_trans *trans, + struct btree_iter *extent_iter, + struct btree_iter *rebalance_iter, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + struct bkey_s_c extent_k, rebalance_k; + struct printbuf buf = PRINTBUF; + + int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: + bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); + if (ret) + return ret; + + if (!extent_k.k && + extent_iter->btree_id == BTREE_ID_reflink && + (!rebalance_k.k || + rebalance_k.k->p.inode >= BCACHEFS_ROOT_INO)) { + bch2_trans_iter_exit(trans, extent_iter); + bch2_trans_iter_init(trans, extent_iter, + BTREE_ID_extents, POS_MIN, + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots); + return -BCH_ERR_transaction_restart_nested; + } + + if (!extent_k.k && !rebalance_k.k) + return 1; + + int cmp = bpos_cmp(extent_k.k ? extent_k.k->p : SPOS_MAX, + rebalance_k.k ? rebalance_k.k->p : SPOS_MAX); + + struct bkey deleted; + bkey_init(&deleted); + + if (cmp < 0) { + deleted.p = extent_k.k->p; + rebalance_k.k = &deleted; + } else if (cmp > 0) { + deleted.p = rebalance_k.k->p; + extent_k.k = &deleted; + } + + bool should_have_rebalance = + bch2_bkey_sectors_need_rebalance(c, extent_k) != 0; + bool have_rebalance = rebalance_k.k->type == KEY_TYPE_set; + + if (should_have_rebalance != have_rebalance) { + ret = bch2_btree_write_buffer_maybe_flush(trans, extent_k, last_flushed); + if (ret) + return ret; + + bch2_bkey_val_to_text(&buf, c, extent_k); + } + + if (fsck_err_on(!should_have_rebalance && have_rebalance, + trans, rebalance_work_incorrectly_set, + "rebalance work incorrectly set\n%s", buf.buf)) { + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, + extent_k.k->p, false); + if (ret) + goto err; + } + + if (fsck_err_on(should_have_rebalance && !have_rebalance, + trans, rebalance_work_incorrectly_unset, + "rebalance work incorrectly unset\n%s", buf.buf)) { + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, + extent_k.k->p, true); + if (ret) + goto err; + } + + if (cmp <= 0) + bch2_btree_iter_advance(trans, extent_iter); + if (cmp >= 0) + bch2_btree_iter_advance(trans, rebalance_iter); +err: +fsck_err: + printbuf_exit(&buf); + return ret; +} + +int bch2_check_rebalance_work(struct bch_fs *c) +{ + struct btree_trans *trans = bch2_trans_get(c); + struct btree_iter rebalance_iter, extent_iter; + int ret = 0; + + bch2_trans_iter_init(trans, &extent_iter, + BTREE_ID_reflink, POS_MIN, + BTREE_ITER_prefetch); + bch2_trans_iter_init(trans, &rebalance_iter, + BTREE_ID_rebalance_work, POS_MIN, + BTREE_ITER_prefetch); + + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + while (!ret) { + bch2_trans_begin(trans); + + ret = check_rebalance_work_one(trans, &extent_iter, &rebalance_iter, &last_flushed); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + ret = 0; + } + + bch2_bkey_buf_exit(&last_flushed, c); + bch2_trans_iter_exit(trans, &extent_iter); + bch2_trans_iter_exit(trans, &rebalance_iter); + bch2_trans_put(trans); + return ret < 0 ? ret : 0; +} diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index e5e8eb4a2dd1..b7c8c0652ad6 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -54,4 +54,6 @@ void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); void bch2_fs_rebalance_init(struct bch_fs *); +int bch2_check_rebalance_work(struct bch_fs *); + #endif /* _BCACHEFS_REBALANCE_H */ diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h index f9d565bb50dd..be3185fc6ef4 100644 --- a/fs/bcachefs/recovery_passes_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -59,6 +59,7 @@ x(check_subvolume_structure, 36, PASS_ONLINE|PASS_FSCK) \ x(check_directory_structure, 30, PASS_ONLINE|PASS_FSCK) \ x(check_nlinks, 31, PASS_FSCK) \ + x(check_rebalance_work, 43, PASS_ONLINE|PASS_FSCK) \ x(resume_logged_ops, 23, PASS_ALWAYS) \ x(delete_dead_inodes, 32, PASS_ALWAYS) \ x(fix_reflink_p, 33, 0) \ -- 2.51.0 From c53be0ffaa501d25f58ac2e56b7e5710f3408a50 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 19 Apr 2025 00:57:55 -0400 Subject: [PATCH 16/16] bcachefs: bch2_target_to_text() no longer depends on io_ref Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_groups.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index 2ca3cbf12b71..4e2f237338c2 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -554,14 +554,12 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) ? rcu_dereference(c->devs[t.dev]) : NULL; - if (ca && percpu_ref_tryget(&ca->io_ref[READ])) { + if (ca && ca->disk_sb.bdev) prt_printf(out, "/dev/%s", ca->name); - percpu_ref_put(&ca->io_ref[READ]); - } else if (ca) { + else if (ca) prt_printf(out, "offline device %u", t.dev); - } else { + else prt_printf(out, "invalid device %u", t.dev); - } rcu_read_unlock(); out->atomic--; -- 2.51.0