From d65d126c0256cf2349e118a3e8627d8281295eee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 23:27:59 -0400 Subject: [PATCH 01/16] bcachefs: Add locking for bch_fs.curr_recovery_pass Recovery can rewind in certain situations - when we discover we need to run a pass that doesn't normally run. This can happen from another thread for btree node read errors, so we need a bit of locking. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/recovery_passes.c | 76 ++++++++++++++++++++++++++--------- fs/bcachefs/super.c | 1 + 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 7db81e182c3c..fbd89f91625d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1060,6 +1060,7 @@ struct bch_fs { u64 recovery_passes_complete; /* never rewinds version of curr_recovery_pass */ enum bch_recovery_pass recovery_pass_done; + spinlock_t recovery_pass_lock; struct semaphore online_fsck_mutex; /* DEBUG JUNK */ diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 1cc010bf1695..5e7722cc0879 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -100,8 +100,8 @@ u64 bch2_recovery_passes_from_stable(u64 v) /* * For when we need to rewind recovery passes and run a pass we skipped: */ -int bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) +static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) { if (c->opts.recovery_passes & BIT_ULL(pass)) return 0; @@ -109,6 +109,13 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) return -BCH_ERR_not_in_recovery; + if (pass < BCH_RECOVERY_PASS_set_may_go_rw && + c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { + bch_info(c, "need recovery pass %s (%u), but already rw", + bch2_recovery_passes[pass], pass); + return -BCH_ERR_cannot_rewind_recovery; + } + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -124,6 +131,16 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } } +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + unsigned long flags; + spin_lock_irqsave(&c->recovery_pass_lock, flags); + int ret = __bch2_run_explicit_recovery_pass(c, pass); + spin_unlock_irqrestore(&c->recovery_pass_lock, flags); + return ret; +} + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { @@ -237,30 +254,51 @@ int bch2_run_recovery_passes(struct bch_fs *c) c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + spin_lock_irq(&c->recovery_pass_lock); + unsigned pass = c->curr_recovery_pass; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) + c->curr_recovery_pass > c->opts.recovery_pass_last) { + spin_unlock_irq(&c->recovery_pass_lock); break; + } - if (should_run_recovery_pass(c, c->curr_recovery_pass)) { - unsigned pass = c->curr_recovery_pass; - - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: - bch2_journal_flush(&c->journal); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || - (ret && c->curr_recovery_pass < pass)) - continue; - if (ret) - break; - - c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + if (!should_run_recovery_pass(c, pass)) { + c->curr_recovery_pass++; + c->recovery_pass_done = max(c->recovery_pass_done, pass); + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } + spin_unlock_irq(&c->recovery_pass_lock); + + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + spin_lock_irq(&c->recovery_pass_lock); + if (c->curr_recovery_pass < pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } else if (c->curr_recovery_pass == pass) { + c->curr_recovery_pass++; + } else { + BUG(); } + spin_unlock_irq(&c->recovery_pass_lock); - c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); + if (ret) + break; - if (!test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); - c->curr_recovery_pass++; + if (!test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); } return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 17442df7326d..d6411324cd3f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -766,6 +766,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) refcount_set(&c->ro_ref, 1); init_waitqueue_head(&c->ro_ref_wait); + spin_lock_init(&c->recovery_pass_lock); sema_init(&c->online_fsck_mutex, 1); init_rwsem(&c->gc_lock); -- 2.51.0 From e3c43dbe8e5ff64e77b6f927b32f489bccc7d75e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 23:40:01 -0400 Subject: [PATCH 02/16] bcachefs: bch2_btree_lost_data() now uses run_explicit_rceovery_pass_persistent() Also get a bit more fine grained about which passes to run for which btrees. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 63 +++++++++++++++++++++++------------ fs/bcachefs/recovery.h | 2 +- fs/bcachefs/recovery_passes.c | 11 ++++++ fs/bcachefs/recovery_passes.h | 1 + 4 files changed, 54 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3c7f941dde39..b1c83e72c0d8 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -34,21 +34,52 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { - if (btree >= BTREE_ID_NR_MAX) - return; - u64 b = BIT_ULL(btree); + int ret = 0; + + mutex_lock(&c->sb_lock); if (!(c->sb.btrees_lost_data & b)) { bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); - - mutex_lock(&c->sb_lock); bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); } + + switch (btree) { + case BTREE_ID_alloc: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_backpointers: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + goto out; + case BTREE_ID_need_discard: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_freespace: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_bucket_gens: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_lru: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_accounting: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + goto out; + default: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + goto out; + } +out: + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; } /* for -o reconstruct_alloc: */ @@ -524,22 +555,10 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s l=%u: %s", bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) { - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + if (btree_id_is_alloc(i)) r->error = 0; - } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { - bch_info(c, "will run btree node scan"); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - } - ret = 0; - bch2_btree_lost_data(c, i); + ret = bch2_btree_lost_data(c, i); } } diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 4bf818de1f2f..b0d55754b21b 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -void bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, enum btree_id); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 5e7722cc0879..1240c5c19fea 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -141,6 +141,17 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + + return bch2_run_explicit_recovery_pass(c, pass); +} + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 99b464e127b8..7d7339c8fa29 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); -- 2.51.0 From 0269e27ce3f7be2bd1e565cc17a88e4074facad1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 21:26:05 -0400 Subject: [PATCH 03/16] bcachefs: improved bkey_val_copy() Factor out some common code, add typechecking. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 550db3654f2c..dda07a320488 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -594,13 +594,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) +{ + unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); + memcpy(dst_v, src_k.v, b); + if (unlikely(b < dst_size)) + memset(dst_v + b, 0, dst_size - b); +} + #define bkey_val_copy(_dst_v, _src_k) \ do { \ - unsigned b = min_t(unsigned, sizeof(*_dst_v), \ - bkey_val_bytes(_src_k.k)); \ - memcpy(_dst_v, _src_k.v, b); \ - if (b < sizeof(*_dst_v)) \ - memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ + BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ + __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ } while (0) static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, @@ -609,17 +614,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned val_size, void *val) { struct btree_iter iter; - struct bkey_s_c k; - int ret; - - k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); - ret = bkey_err(k); + struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); + int ret = bkey_err(k); if (!ret) { - unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); - - memcpy(val, k.v, b); - if (unlikely(b < sizeof(*val))) - memset((void *) val + b, 0, sizeof(*val) - b); + __bkey_val_copy(val, val_size, k); bch2_trans_iter_exit(trans, &iter); } -- 2.51.0 From 106480e9a869e8d2dd2db34819d04e15ccfd896c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 21:51:05 -0400 Subject: [PATCH 04/16] bcachefs: Factor out jset_entry_log_msg_bytes() Needed for improved userspace cmd_list_journal Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 9 +++++++++ fs/bcachefs/journal_io.c | 3 +-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 79a80a78c2d8..c5e3824d5771 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1219,6 +1219,15 @@ struct jset_entry_log { u8 d[]; } __packed __aligned(8); +static inline unsigned jset_entry_log_msg_bytes(struct jset_entry_log *l) +{ + unsigned b = vstruct_bytes(&l->entry) - offsetof(struct jset_entry_log, d); + + while (b && !l->d[b - 1]) + --b; + return b; +} + struct jset_entry_datetime { struct jset_entry entry; __le64 seconds; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index fb35dd336331..7c7595e5369b 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -738,9 +738,8 @@ static void journal_entry_log_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry *entry) { struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); - unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); - prt_printf(out, "%.*s", bytes, l->d); + prt_printf(out, "%.*s", jset_entry_log_msg_bytes(l), l->d); } static int journal_entry_overwrite_validate(struct bch_fs *c, -- 2.51.0 From 9e2f5f79882b855156cd522acb7354e5a7901418 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 21:27:11 -0400 Subject: [PATCH 05/16] bcachefs: better error message in check_snapshot_tree() If we find a snapshot node and it didn't match the snapshot tree, we should print it. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index feaf2aa0d900..34e01bd8127f 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -506,7 +506,6 @@ static int bch2_snapshot_tree_master_subvol(struct btree_trans *trans, break; } } - bch2_trans_iter_exit(trans, &iter); if (!ret && !found) { @@ -536,6 +535,7 @@ static int check_snapshot_tree(struct btree_trans *trans, struct bch_snapshot s; struct bch_subvolume subvol; struct printbuf buf = PRINTBUF; + struct btree_iter snapshot_iter = {}; u32 root_id; int ret; @@ -545,16 +545,27 @@ static int check_snapshot_tree(struct btree_trans *trans, st = bkey_s_c_to_snapshot_tree(k); root_id = le32_to_cpu(st.v->root_snapshot); - ret = bch2_snapshot_lookup(trans, root_id, &s); + struct bkey_s_c_snapshot snapshot_k = + bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, + POS(0, root_id), 0, snapshot); + ret = bkey_err(snapshot_k); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; + if (!ret) + bkey_val_copy(&s, snapshot_k); + if (fsck_err_on(ret || root_id != bch2_snapshot_root(c, root_id) || st.k->p.offset != le32_to_cpu(s.tree), trans, snapshot_tree_to_missing_snapshot, "snapshot tree points to missing/incorrect snapshot:\n %s", - (bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) { + (bch2_bkey_val_to_text(&buf, c, st.s_c), + prt_newline(&buf), + ret + ? prt_printf(&buf, "(%s)", bch2_err_str(ret)) + : bch2_bkey_val_to_text(&buf, c, snapshot_k.s_c), + buf.buf))) { ret = bch2_btree_delete_at(trans, iter, 0); goto err; } @@ -605,6 +616,7 @@ static int check_snapshot_tree(struct btree_trans *trans, } err: fsck_err: + bch2_trans_iter_exit(trans, &snapshot_iter); printbuf_exit(&buf); return ret; } -- 2.51.0 From db514cf6775fa58b45780969e407f678e0a5132c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 23:02:04 -0400 Subject: [PATCH 06/16] bcachefs: Avoid bch2_btree_id_str() Prefer bch2_btree_id_to_text() - it prints out the integer ID when unknown. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 24 ++++++++------- fs/bcachefs/bbpos.h | 2 +- fs/bcachefs/btree_cache.c | 37 +++++++++++++++--------- fs/bcachefs/btree_cache.h | 3 +- fs/bcachefs/btree_gc.c | 45 +++++++++++++++++------------ fs/bcachefs/btree_io.c | 13 +++++---- fs/bcachefs/btree_iter.c | 32 ++++++++++---------- fs/bcachefs/btree_journal_iter.c | 5 +++- fs/bcachefs/btree_node_scan.c | 10 ++++--- fs/bcachefs/btree_update_interior.c | 23 ++++++++------- fs/bcachefs/debug.c | 4 ++- fs/bcachefs/disk_accounting.c | 3 +- fs/bcachefs/journal_io.c | 3 +- fs/bcachefs/recovery.c | 25 +++++++++++----- fs/bcachefs/sysfs.c | 3 +- 15 files changed, 140 insertions(+), 92 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 654a58132a4d..f323ce4b0b33 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -81,12 +81,11 @@ fsck_err: void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) { - prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", - bch2_btree_id_str(bp->btree_id), - bp->level, - (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), - (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), - bp->bucket_len); + bch2_btree_id_level_to_text(out, bp->btree_id, bp->level); + prt_printf(out, " offset=%llu:%u len=%u pos=", + (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), + (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), + bp->bucket_len); bch2_bpos_to_text(out, bp->pos); } @@ -501,9 +500,13 @@ found: goto err; prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); - prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); + prt_printf(&buf, "\n "); + bch2_btree_id_to_text(&buf, btree); + prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, extent); - prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); + prt_printf(&buf, "\n "); + bch2_btree_id_to_text(&buf, o_btree); + prt_str(&buf, " "); bch2_bkey_val_to_text(&buf, c, extent2); struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); @@ -638,8 +641,9 @@ check_existing_bp: goto err; missing: printbuf_reset(&buf); - prt_printf(&buf, "missing backpointer for btree=%s l=%u ", - bch2_btree_id_str(bp.btree_id), bp.level); + prt_str(&buf, "missing backpointer for btree="); + bch2_btree_id_to_text(&buf, bp.btree_id); + prt_printf(&buf, " l=%u ", bp.level); bch2_bkey_val_to_text(&buf, c, orig_k); prt_printf(&buf, "\n got: "); bch2_bkey_val_to_text(&buf, c, bp_k); diff --git a/fs/bcachefs/bbpos.h b/fs/bcachefs/bbpos.h index be2edced5213..63abe17f35ea 100644 --- a/fs/bcachefs/bbpos.h +++ b/fs/bcachefs/bbpos.h @@ -29,7 +29,7 @@ static inline struct bbpos bbpos_successor(struct bbpos pos) static inline void bch2_bbpos_to_text(struct printbuf *out, struct bbpos pos) { - prt_str(out, bch2_btree_id_str(pos.btree)); + bch2_btree_id_to_text(out, pos.btree); prt_char(out, ':'); bch2_bpos_to_text(out, pos.pos); } diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 7123019ab3bc..a0a406b0c7bc 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -1004,16 +1004,14 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) return; prt_printf(&buf, - "btree node header doesn't match ptr\n" - "btree %s level %u\n" - "ptr: ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + "btree node header doesn't match ptr: "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, "\nptr: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - prt_printf(&buf, "\nheader: btree %s level %llu\n" - "min ", - bch2_btree_id_str(BTREE_NODE_ID(b->data)), - BTREE_NODE_LEVEL(b->data)); + prt_str(&buf, "\nheader: "); + bch2_btree_id_level_to_text(&buf, BTREE_NODE_ID(b->data), BTREE_NODE_LEVEL(b->data)); + prt_str(&buf, "\nmin "); bch2_bpos_to_text(&buf, b->data->min_key); prt_printf(&buf, "\nmax "); @@ -1398,12 +1396,19 @@ void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) prt_printf(out, "(unknown btree %u)", btree); } +void bch2_btree_id_level_to_text(struct printbuf *out, enum btree_id btree, unsigned level) +{ + prt_str(out, "btree="); + bch2_btree_id_to_text(out, btree); + prt_printf(out, " level=%u", level); +} + void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { - prt_printf(out, "%s level %u/%u\n ", - bch2_btree_id_str(b->c.btree_id), - b->c.level, - bch2_btree_id_root(c, b->c.btree_id)->level); + bch2_btree_id_to_text(out, b->c.btree_id); + prt_printf(out, " level %u/%u\n ", + b->c.level, + bch2_btree_id_root(c, b->c.btree_id)->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); } @@ -1478,8 +1483,12 @@ void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); prt_newline(out); - for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) - prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); + for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) { + bch2_btree_id_to_text(out, i); + prt_printf(out, "\t"); + prt_human_readable_u64(out, bc->nr_by_btree[i] * c->opts.btree_node_size); + prt_printf(out, " (%zu)\n", bc->nr_by_btree[i]); + } prt_newline(out); prt_printf(out, "freed:\t%zu\n", bc->nr_freed); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 66e86d1a178d..6cfacacb6769 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -138,8 +138,9 @@ static inline struct btree *btree_node_root(struct bch_fs *c, struct btree *b) return bch2_btree_id_root(c, b->c.btree_id)->b; } -const char *bch2_btree_id_str(enum btree_id); +const char *bch2_btree_id_str(enum btree_id); /* avoid */ void bch2_btree_id_to_text(struct printbuf *, enum btree_id); +void bch2_btree_id_level_to_text(struct printbuf *, enum btree_id, unsigned); void bch2_btree_pos_to_text(struct printbuf *, struct bch_fs *, const struct btree *); void bch2_btree_node_to_text(struct printbuf *, struct bch_fs *, const struct btree *); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 81dcf9e512c0..3c4e66da1ca4 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -56,8 +56,8 @@ void bch2_gc_pos_to_text(struct printbuf *out, struct gc_pos *p) { prt_str(out, bch2_gc_phase_strs[p->phase]); prt_char(out, ' '); - bch2_btree_id_to_text(out, p->btree); - prt_printf(out, " l=%u ", p->level); + bch2_btree_id_level_to_text(out, p->btree, p->level); + prt_char(out, ' '); bch2_bpos_to_text(out, p->pos); } @@ -209,8 +209,9 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_eq(expected_start, cur->data->min_key)) return 0; - prt_printf(&buf, " at btree %s level %u:\n parent: ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_printf(&buf, " at "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_printf(&buf, ":\n parent: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); if (prev) { @@ -277,8 +278,9 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, if (bpos_eq(child->key.k.p, b->key.k.p)) return 0; - prt_printf(&buf, "at btree %s level %u:\n parent: ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_printf(&buf, " at "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_printf(&buf, ":\n parent: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, "\n child: "); @@ -341,14 +343,14 @@ again: ret = PTR_ERR_OR_ZERO(cur); printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level - 1); + prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), trans, btree_node_unreadable, - "Topology repair: unreadable btree node at btree %s level %u:\n" + "Topology repair: unreadable btree node at\n" " %s", - bch2_btree_id_str(b->c.btree_id), - b->c.level - 1, buf.buf)) { bch2_btree_node_evict(trans, cur_k.k); cur = NULL; @@ -370,7 +372,7 @@ again: break; if (bch2_btree_node_is_stale(c, cur)) { - bch_info(c, "btree node %s older than nodes found by scanning", buf.buf); + bch_info(c, "btree node older than nodes found by scanning\n %s", buf.buf); six_unlock_read(&cur->c.lock); bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, @@ -478,14 +480,13 @@ again: } printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); if (mustfix_fsck_err_on(!have_child, trans, btree_node_topology_interior_node_empty, - "empty interior btree node at btree %s level %u\n" - " %s", - bch2_btree_id_str(b->c.btree_id), - b->c.level, buf.buf)) + "empty interior btree node at %s", buf.buf)) ret = DROP_THIS_NODE; err: fsck_err: @@ -511,6 +512,7 @@ int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); struct bpos pulled_from_scan = POS_MIN; + struct printbuf buf = PRINTBUF; int ret = 0; bch2_trans_srcu_unlock(trans); @@ -519,19 +521,21 @@ int bch2_check_topology(struct bch_fs *c) struct btree_root *r = bch2_btree_id_root(c, i); bool reconstructed_root = false; + bch2_btree_id_to_text(&buf, i); + if (r->error) { ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes); if (ret) break; reconstruct_root: - bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i)); + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); r->alive = false; r->error = 0; if (!bch2_btree_has_scanned_nodes(c, i)) { mustfix_fsck_err(trans, btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", bch2_btree_id_str(i)); + "no nodes found for btree %s, continue?", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); } else { bch2_btree_root_alloc_fake_trans(trans, i, 1); @@ -560,13 +564,14 @@ reconstruct_root: if (!reconstructed_root) goto reconstruct_root; - bch_err(c, "empty btree root %s", bch2_btree_id_str(i)); + bch_err(c, "empty btree root %s", buf.buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } fsck_err: + printbuf_exit(&buf); bch2_trans_put(trans); return ret; } @@ -713,6 +718,7 @@ static int bch2_gc_btrees(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); enum btree_id ids[BTREE_ID_NR]; + struct printbuf buf = PRINTBUF; unsigned i; int ret = 0; @@ -731,10 +737,13 @@ static int bch2_gc_btrees(struct bch_fs *c) if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), trans, btree_node_read_error, "btree node read error for %s", - bch2_btree_id_str(btree))) + (printbuf_reset(&buf), + bch2_btree_id_to_text(&buf, btree), + buf.buf))) ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology); } fsck_err: + printbuf_exit(&buf); bch2_trans_put(trans); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 839d68802e42..89a42ee81e5c 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -25,9 +25,8 @@ static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) { - prt_printf(out, "btree=%s l=%u seq %llux\n", - bch2_btree_id_str(BTREE_NODE_ID(bn)), - (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); + bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); + prt_printf(out, " seq %llux\n", bn->keys.seq); prt_str(out, "min: "); bch2_bpos_to_text(out, bn->min_key); prt_newline(out); @@ -1343,9 +1342,11 @@ start: !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { printbuf_reset(&buf); - bch2_bpos_to_text(&buf, b->key.k.p); - bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error", - __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", + __func__, buf.buf); bch2_btree_node_rewrite_async(c, b); } diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 01152fd5ac57..07bce85dafaf 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1448,10 +1448,11 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) trans_for_each_update(trans, i) { struct bkey_s_c old = { &i->old_k, i->old_v }; - prt_printf(buf, "update: btree=%s cached=%u %pS\n", - bch2_btree_id_str(i->btree_id), - i->cached, - (void *) i->ip_allocated); + prt_str(buf, "update: btree="); + bch2_btree_id_to_text(buf, i->btree_id); + prt_printf(buf, " cached=%u %pS\n", + i->cached, + (void *) i->ip_allocated); prt_printf(buf, " old "); bch2_bkey_val_to_text(buf, trans->c, old); @@ -1484,13 +1485,13 @@ static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_tra { struct btree_path *path = trans->paths + path_idx; - prt_printf(out, "path: idx %3u ref %u:%u %c %c %c btree=%s l=%u pos ", + prt_printf(out, "path: idx %3u ref %u:%u %c %c %c ", path_idx, path->ref, path->intent_ref, path->preserve ? 'P' : ' ', path->should_be_locked ? 'S' : ' ', - path->cached ? 'C' : 'B', - bch2_btree_id_str(path->btree_id), - path->level); + path->cached ? 'C' : 'B'); + bch2_btree_id_level_to_text(out, path->btree_id, path->level); + prt_str(out, " pos "); bch2_bpos_to_text(out, path->pos); if (!path->cached && btree_node_locked(path, path->level)) { @@ -3336,8 +3337,9 @@ bch2_btree_bkey_cached_common_to_text(struct printbuf *out, pid = owner ? owner->pid : 0; rcu_read_unlock(); - prt_printf(out, "\t%px %c l=%u %s:", b, b->cached ? 'c' : 'b', - b->level, bch2_btree_id_str(b->btree_id)); + prt_printf(out, "\t%px %c ", b, b->cached ? 'c' : 'b'); + bch2_btree_id_to_text(out, b->btree_id); + prt_printf(out, " l=%u:", b->level); bch2_bpos_to_text(out, btree_node_pos(b)); prt_printf(out, "\t locks %u:%u:%u held by pid %u", @@ -3376,11 +3378,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) if (!path->nodes_locked) continue; - prt_printf(out, " path %u %c l=%u %s:", - idx, - path->cached ? 'c' : 'b', - path->level, - bch2_btree_id_str(path->btree_id)); + prt_printf(out, " path %u %c ", + idx, + path->cached ? 'c' : 'b'); + bch2_btree_id_to_text(out, path->btree_id); + prt_printf(out, " l=%u:", path->level); bch2_bpos_to_text(out, path->pos); prt_newline(out); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index c1657182c275..924b5e3a4390 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -628,8 +628,11 @@ void bch2_journal_keys_dump(struct bch_fs *c) darray_for_each(*keys, i) { printbuf_reset(&buf); + prt_printf(&buf, "btree="); + bch2_btree_id_to_text(&buf, i->btree_id); + prt_printf(&buf, " l=%u ", i->level); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); - pr_err("%s l=%u %s", bch2_btree_id_str(i->btree_id), i->level, buf.buf); + pr_err("%s", buf.buf); } printbuf_exit(&buf); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 30131c3bdd97..4b4df31d4b95 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -22,9 +22,9 @@ struct find_btree_nodes_worker { static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) { - prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ", - bch2_btree_id_str(n->btree_id), n->level, n->seq, - n->journal_seq, n->cookie); + bch2_btree_id_level_to_text(out, n->btree_id, n->level); + prt_printf(out, " seq=%u journal_seq=%llu cookie=%llx ", + n->seq, n->journal_seq, n->cookie); bch2_bpos_to_text(out, n->min_key); prt_str(out, "-"); bch2_bpos_to_text(out, n->max_key); @@ -499,7 +499,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, if (c->opts.verbose) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level); + prt_str(&buf, "recovery "); + bch2_btree_id_level_to_text(&buf, btree, level); + prt_str(&buf, " "); bch2_bpos_to_text(&buf, node_min); prt_str(&buf, " - "); bch2_bpos_to_text(&buf, node_max); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d596ef93239f..d62de3f79b29 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -97,9 +97,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bch2_topology_error(c); printbuf_reset(&buf); - prt_str(&buf, "end of prev node doesn't match start of next node\n"), - prt_printf(&buf, " in btree %s level %u node ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_str(&buf, "end of prev node doesn't match start of next node\n in "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, "\n prev "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); @@ -118,9 +118,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bch2_topology_error(c); printbuf_reset(&buf); - prt_str(&buf, "empty interior node\n"); - prt_printf(&buf, " in btree %s level %u node ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_str(&buf, "empty interior node\n in "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); need_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); @@ -129,9 +129,9 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bch2_topology_error(c); printbuf_reset(&buf); - prt_str(&buf, "last child node doesn't end at end of parent node\n"); - prt_printf(&buf, " in btree %s level %u node ", - bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_str(&buf, "last child node doesn't end at end of parent node\n in "); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, "\n last key "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); @@ -2575,8 +2575,9 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update prt_printf(out, "%ps: ", (void *) as->ip_started); bch2_trans_commit_flags_to_text(out, as->flags); - prt_printf(out, " btree=%s l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", - bch2_btree_id_str(as->btree_id), + prt_str(out, " "); + bch2_btree_id_to_text(out, as->btree_id); + prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", as->update_level_start, as->update_level_end, bch2_btree_update_modes[as->mode], diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 45aec1afdb0e..b5de52a50d10 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -472,7 +472,9 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); - prt_printf(out, "%px btree=%s l=%u\n", b, bch2_btree_id_str(b->c.btree_id), b->c.level); + prt_printf(out, "%px ", b); + bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); + prt_printf(out, "\n"); printbuf_indent_add(out, 2); diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 07eb8fa1b026..38b563113cfb 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -217,7 +217,8 @@ void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_po prt_printf(out, "id=%u", k->snapshot.id); break; case BCH_DISK_ACCOUNTING_btree: - prt_printf(out, "btree=%s", bch2_btree_id_str(k->btree.id)); + prt_str(out, "btree="); + bch2_btree_id_to_text(out, k->btree.id); break; } } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7c7595e5369b..9bc0caa9d5e4 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -421,7 +421,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs bch2_prt_jset_entry_type(out, entry->type); prt_str(out, ": "); } - prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); + bch2_btree_id_level_to_text(out, entry->btree_id, entry->level); + prt_char(out, ' '); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); first = false; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b1c83e72c0d8..0e5a53541ce4 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -42,7 +42,10 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) mutex_lock(&c->sb_lock); if (!(c->sb.btrees_lost_data & b)) { - bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); + struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, btree); + bch_err(c, "flagging btree %s lost data", buf.buf); + printbuf_exit(&buf); bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); } @@ -385,10 +388,13 @@ int bch2_journal_replay(struct bch_fs *c) ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim : 0), bch2_journal_replay_key(trans, k)); - bch_err_msg(c, ret, "while replaying key at btree %s level %u:", - bch2_btree_id_str(k->btree_id), k->level); - if (ret) + if (ret) { + struct printbuf buf = PRINTBUF; + bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); + bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); + printbuf_exit(&buf); goto err; + } BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } @@ -536,6 +542,7 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { + struct printbuf buf = PRINTBUF; int ret = 0; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { @@ -547,14 +554,17 @@ static int read_btree_roots(struct bch_fs *c) if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) continue; + printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, i, r->level); + if (mustfix_fsck_err_on((ret = r->error), c, btree_root_bkey_invalid, "invalid btree root %s", - bch2_btree_id_str(i)) || + buf.buf) || mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), c, btree_root_read_error, - "error reading btree root %s l=%u: %s", - bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { + "error reading btree root %s: %s", + buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; @@ -572,6 +582,7 @@ static int read_btree_roots(struct bch_fs *c) } } fsck_err: + printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 03e59f86f360..3270bfab9466 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -302,7 +302,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c) { - prt_printf(out, "%s: ", bch2_btree_id_str(c->gc_gens_btree)); + bch2_btree_id_to_text(out, c->gc_gens_btree); + prt_printf(out, ": "); bch2_bpos_to_text(out, c->gc_gens_pos); prt_printf(out, "\n"); } -- 2.51.0 From d55d4a0ca27adea2e6bb404eb9b65a19036dd047 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Sep 2024 14:57:26 -0400 Subject: [PATCH 07/16] bcachefs: Refactor new stripe path to reduce dependencies on ec_stripe_head We need to add a path for reshaping existing stripes (for e.g. device removal), and this new path won't necessarily use ec_stripe_head. Refactor the code to avoid unnecessary references to it for clarity. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 196 +++++++++++++++++++++++++---------------------- 1 file changed, 104 insertions(+), 92 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 075bfd1cbb15..8b727d63af3e 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1707,7 +1707,7 @@ static void ec_stripe_key_init(struct bch_fs *c, set_bkey_val_u64s(&s->k, u64s); } -static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) +static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s; @@ -1715,7 +1715,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) - return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; + return NULL; mutex_init(&s->lock); closure_init(&s->iodone, NULL); @@ -1730,10 +1730,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize, h->disk_label); - - h->s = s; - h->nr_created++; - return 0; + return s; } static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) @@ -1878,25 +1875,26 @@ err: return h; } -static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, +static int new_stripe_alloc_buckets(struct btree_trans *trans, + struct ec_stripe_head *h, struct ec_stripe_new *s, enum bch_watermark watermark, struct closure *cl) { struct bch_fs *c = trans->c; struct bch_devs_mask devs = h->devs; struct open_bucket *ob; struct open_buckets buckets; - struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; unsigned i, j, nr_have_parity = 0, nr_have_data = 0; bool have_cache = true; int ret = 0; - BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); - BUG_ON(v->nr_redundant != h->s->nr_parity); + BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); + BUG_ON(v->nr_redundant != s->nr_parity); /* * We bypass the sector allocator which normally does this: */ bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); - for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { /* * Note: we don't yet repair invalid blocks (failed/removed * devices) when reusing stripes - we still need a codepath to @@ -1906,21 +1904,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) __clear_bit(v->ptrs[i].dev, devs.d); - if (i < h->s->nr_data) + if (i < s->nr_data) nr_have_data++; else nr_have_parity++; } - BUG_ON(nr_have_data > h->s->nr_data); - BUG_ON(nr_have_parity > h->s->nr_parity); + BUG_ON(nr_have_data > s->nr_data); + BUG_ON(nr_have_parity > s->nr_parity); buckets.nr = 0; - if (nr_have_parity < h->s->nr_parity) { + if (nr_have_parity < s->nr_parity) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->parity_stripe, &devs, - h->s->nr_parity, + s->nr_parity, &nr_have_parity, &have_cache, 0, BCH_DATA_parity, @@ -1928,14 +1926,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data + h->s->nr_parity, - h->s->nr_data); - BUG_ON(j >= h->s->nr_data + h->s->nr_parity); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data + s->nr_parity, + s->nr_data); + BUG_ON(j >= s->nr_data + s->nr_parity); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -1943,11 +1941,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ } buckets.nr = 0; - if (nr_have_data < h->s->nr_data) { + if (nr_have_data < s->nr_data) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->block_stripe, &devs, - h->s->nr_data, + s->nr_data, &nr_have_data, &have_cache, 0, BCH_DATA_user, @@ -1955,13 +1953,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data, 0); - BUG_ON(j >= h->s->nr_data); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data, 0); + BUG_ON(j >= s->nr_data); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -2007,73 +2005,78 @@ static s64 get_existing_stripe(struct bch_fs *c, return ret; } -static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) +static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) { - struct bch_fs *c = trans->c; - struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; - struct bch_stripe *existing_v; + struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; unsigned i; - s64 idx; - int ret; - /* - * If we can't allocate a new stripe, and there's no stripes with empty - * blocks for us to reuse, that means we have to wait on copygc: - */ - idx = get_existing_stripe(c, h); - if (idx < 0) - return -BCH_ERR_stripe_alloc_blocked; - - ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); - bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, - "reading stripe key: %s", bch2_err_str(ret)); - if (ret) { - bch2_stripe_close(c, h->s); - return ret; - } - - existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; - - BUG_ON(existing_v->nr_redundant != h->s->nr_parity); - h->s->nr_data = existing_v->nr_blocks - + BUG_ON(existing_v->nr_redundant != s->nr_parity); + s->nr_data = existing_v->nr_blocks - existing_v->nr_redundant; - ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); + int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); return ret; } - BUG_ON(h->s->existing_stripe.size != h->blocksize); - BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); + BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); /* * Free buckets we initially allocated - they might conflict with * blocks from the stripe we're reusing: */ - for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { - bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); - h->s->blocks[i] = 0; + for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { + bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); + s->blocks[i] = 0; } - memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); - memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); + memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); + memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); - for (i = 0; i < existing_v->nr_blocks; i++) { + for (unsigned i = 0; i < existing_v->nr_blocks; i++) { if (stripe_blockcount_get(existing_v, i)) { - __set_bit(i, h->s->blocks_gotten); - __set_bit(i, h->s->blocks_allocated); + __set_bit(i, s->blocks_gotten); + __set_bit(i, s->blocks_allocated); } - ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); } - bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); - h->s->have_existing_stripe = true; + bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); + s->have_existing_stripe = true; return 0; } -static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) +static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) +{ + struct bch_fs *c = trans->c; + s64 idx; + int ret; + + /* + * If we can't allocate a new stripe, and there's no stripes with empty + * blocks for us to reuse, that means we have to wait on copygc: + */ + idx = get_existing_stripe(c, h); + if (idx < 0) + return -BCH_ERR_stripe_alloc_blocked; + + ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); + bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, + "reading stripe key: %s", bch2_err_str(ret)); + if (ret) { + bch2_stripe_close(c, s); + return ret; + } + + return init_new_stripe_from_existing(c, s); +} + +static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -2082,15 +2085,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - if (!h->s->res.sectors) { - ret = bch2_disk_reservation_get(c, &h->s->res, + if (!s->res.sectors) { + ret = bch2_disk_reservation_get(c, &s->res, h->blocksize, - h->s->nr_parity, + s->nr_parity, BCH_DISK_RESERVATION_NOFAIL); if (ret) return ret; } + /* + * Allocate stripe slot + * XXX: we're going to need a bitrange btree of free stripes + */ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { if (bkey_gt(k.k->p, POS(0, U32_MAX))) { @@ -2105,7 +2112,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st } if (bkey_deleted(k.k) && - bch2_try_open_stripe(c, h->s, k.k->p.offset)) + bch2_try_open_stripe(c, s, k.k->p.offset)) break; } @@ -2116,16 +2123,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st ret = ec_stripe_mem_alloc(trans, &iter); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); goto err; } - h->s->new_stripe.key.k.p = iter.pos; + s->new_stripe.key.k.p = iter.pos; out: bch2_trans_iter_exit(trans, &iter); return ret; err: - bch2_disk_reservation_put(c, &h->s->res); + bch2_disk_reservation_put(c, &s->res); goto out; } @@ -2156,22 +2163,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, return h; if (!h->s) { - ret = ec_new_stripe_alloc(c, h); - if (ret) { + h->s = ec_new_stripe_alloc(c, h); + if (!h->s) { + ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; bch_err(c, "failed to allocate new stripe"); goto err; } + + h->nr_created++; } - if (h->s->allocated) + struct ec_stripe_new *s = h->s; + + if (s->allocated) goto allocated; - if (h->s->have_existing_stripe) + if (s->have_existing_stripe) goto alloc_existing; /* First, try to allocate a full stripe: */ - ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (!ret) goto allocate_buf; if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || @@ -2183,15 +2195,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, * existing stripe: */ while (1) { - ret = __bch2_ec_stripe_head_reuse(trans, h); + ret = __bch2_ec_stripe_head_reuse(trans, h, s); if (!ret) break; if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; if (watermark == BCH_WATERMARK_copygc) { - ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (ret) goto err; goto allocate_buf; @@ -2209,19 +2221,19 @@ alloc_existing: * Retry allocating buckets, with the watermark for this * particular write: */ - ret = new_stripe_alloc_buckets(trans, h, watermark, cl); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); if (ret) goto err; allocate_buf: - ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); + ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); if (ret) goto err; - h->s->allocated = true; + s->allocated = true; allocated: - BUG_ON(!h->s->idx); - BUG_ON(!h->s->new_stripe.data[0]); + BUG_ON(!s->idx); + BUG_ON(!s->new_stripe.data[0]); BUG_ON(trans->restarted); return h; err: -- 2.51.0 From 8b22abb4c84058e9533d71a4814e54316ba2621f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2024 22:53:09 -0400 Subject: [PATCH 08/16] bcachefs: -o norecovery now bails out of recovery earlier -o norecovery (used by the dump tool) should be doing the absolute minimum amount of work to get the filesystem up and readable; we shouldn't be running check and repair code, or going read-write. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0e5a53541ce4..bc2fd174bb32 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -690,8 +690,13 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.norecovery) - c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; + if (c->opts.norecovery) { + c->opts.recovery_pass_last = c->opts.recovery_pass_last + ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) + : BCH_RECOVERY_PASS_snapshots_read; + c->opts.nochanges = true; + c->opts.read_only = true; + } mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); -- 2.51.0 From fb8c835b18d48dac953a5d755a8e90b0d8fb9c29 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2024 22:50:48 -0400 Subject: [PATCH 09/16] bcachefs: bch2_journal_meta() takes ref on c->writes This part of addressing https://github.com/koverstreet/bcachefs/issues/656 where we're getting stuck in bch2_journal_meta() in the dump tool. We shouldn't be invoking the journal without a ref on c->writes (if we're not RW), and there's no reason for the dump tool to be going read-write. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/journal.c | 27 +++++++++++++++++---------- fs/bcachefs/recovery.c | 4 +--- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index fbd89f91625d..d4d95ef6791f 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -688,6 +688,7 @@ struct btree_trans_buf { ((subvol_inum) { BCACHEFS_ROOT_SUBVOL, BCACHEFS_ROOT_INO }) #define BCH_WRITE_REFS() \ + x(journal) \ x(trans) \ x(write) \ x(promote) \ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 2dc0d60c1745..2cf8f24d50cc 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -831,19 +831,14 @@ out: return ret; } -int bch2_journal_meta(struct journal *j) +static int __bch2_journal_meta(struct journal *j) { - struct journal_buf *buf; - struct journal_res res; - int ret; - - memset(&res, 0, sizeof(res)); - - ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); + struct journal_res res = {}; + int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0); if (ret) return ret; - buf = j->buf + (res.seq & JOURNAL_BUF_MASK); + struct journal_buf *buf = j->buf + (res.seq & JOURNAL_BUF_MASK); buf->must_flush = true; if (!buf->flush_time) { @@ -856,6 +851,18 @@ int bch2_journal_meta(struct journal *j) return bch2_journal_flush_seq(j, res.seq, TASK_UNINTERRUPTIBLE); } +int bch2_journal_meta(struct journal *j) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_journal)) + return -EROFS; + + int ret = __bch2_journal_meta(j); + bch2_write_ref_put(c, BCH_WRITE_REF_journal); + return ret; +} + /* block/unlock the journal: */ void bch2_journal_unblock(struct journal *j) @@ -1193,7 +1200,7 @@ void bch2_fs_journal_stop(struct journal *j) * Always write a new journal entry, to make sure the clock hands are up * to date (and match the superblock) */ - bch2_journal_meta(j); + __bch2_journal_meta(j); journal_quiesce(j); cancel_delayed_work_sync(&j->write_work); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index bc2fd174bb32..431698189090 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -910,11 +910,9 @@ use_clean: set_bit(BCH_FS_accounting_replay_done, &c->flags); /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags) && - bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); } /* If we fixed errors, verify that fs is actually clean now: */ -- 2.51.0 From be5a7be1062b2e588519d7ed68ff2e8f4ed0a42a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 12 Oct 2024 14:07:44 -0400 Subject: [PATCH 10/16] bcachefs: Fix warning about passing flex array member by value this showed up when building in userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 38b563113cfb..55a00018dc8b 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -244,10 +244,10 @@ void bch2_accounting_swab(struct bkey_s k) } static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r, - struct disk_accounting_pos acc) + struct disk_accounting_pos *acc) { - unsafe_memcpy(r, &acc.replicas, - replicas_entry_bytes(&acc.replicas), + unsafe_memcpy(r, &acc->replicas, + replicas_entry_bytes(&acc->replicas), "variable length struct"); } @@ -258,7 +258,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc switch (acc_k.type) { case BCH_DISK_ACCOUNTING_replicas: - __accounting_to_replicas(r, acc_k); + __accounting_to_replicas(r, &acc_k); return true; default: return false; @@ -626,7 +626,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, switch (acc.type) { case BCH_DISK_ACCOUNTING_replicas: { struct bch_replicas_padded r; - __accounting_to_replicas(&r.e, acc); + __accounting_to_replicas(&r.e, &acc); for (unsigned i = 0; i < r.e.nr_devs; i++) if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && -- 2.51.0 From 27de0ee39f810dab2e948d2c465f8fcf8cbf9f8c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 14 Oct 2024 21:35:44 -0400 Subject: [PATCH 11/16] bcachefs: Add block plugging to read paths This will help with some of the btree_trans srcu lock hold time warnings that are still turning up; submit_bio() can block for awhile if the device is sufficiently congested. It's not a perfect solution since blk_plug bios are submitted when scheduling; we might want a way to disable the "submit on context switch" behaviour, or switch to our own plugging in the future. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 19 ++++++++++++++++++- fs/bcachefs/fs-io-direct.c | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 95972809e76d..0923f38a2fcd 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -248,6 +248,7 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_io_opts opts; struct folio *folio; struct readpages_iter readpages_iter; + struct blk_plug plug; bch2_inode_opts_get(&opts, c, &inode->ei_inode); @@ -255,6 +256,16 @@ void bch2_readahead(struct readahead_control *ractl) if (ret) return; + /* + * Besides being a general performance optimization, plugging helps with + * avoiding btree transaction srcu warnings - submitting a bio can + * block, and we don't want todo that with the transaction locked. + * + * However, plugged bios are submitted when we schedule; we ideally + * would have our own scheduler hook to call unlock_long() before + * scheduling. + */ + blk_start_plug(&plug); bch2_pagecache_add_get(inode); struct btree_trans *trans = bch2_trans_get(c); @@ -281,7 +292,7 @@ void bch2_readahead(struct readahead_control *ractl) bch2_trans_put(trans); bch2_pagecache_add_put(inode); - + blk_finish_plug(&plug); darray_exit(&readpages_iter.folios); } @@ -296,9 +307,13 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_read_bio *rbio; struct bch_io_opts opts; + struct blk_plug plug; int ret; DECLARE_COMPLETION_ONSTACK(done); + BUG_ON(folio_test_uptodate(folio)); + BUG_ON(folio_test_dirty(folio)); + if (!bch2_folio_create(folio, GFP_KERNEL)) return -ENOMEM; @@ -313,7 +328,9 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) rbio->bio.bi_iter.bi_sector = folio_sector(folio); BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); + blk_start_plug(&plug); bch2_trans_run(c, (bchfs_read(trans, rbio, inode_inum(inode), NULL), 0)); + blk_finish_plug(&plug); wait_for_completion(&done); ret = blk_status_to_errno(rbio->bio.bi_status); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 6d3a05ae5da8..2089c36b5866 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -70,6 +70,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) struct bch_io_opts opts; struct dio_read *dio; struct bio *bio; + struct blk_plug plug; loff_t offset = req->ki_pos; bool sync = is_sync_kiocb(req); size_t shorten; @@ -128,6 +129,8 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) */ dio->should_dirty = iter_is_iovec(iter); + blk_start_plug(&plug); + goto start; while (iter->count) { bio = bio_alloc_bioset(NULL, @@ -160,6 +163,8 @@ start: bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); } + blk_finish_plug(&plug); + iter->count += shorten; if (sync) { -- 2.51.0 From e0c8369bc8444daf0d68d23bcae472d11680d49f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 20 Oct 2024 19:02:44 -0400 Subject: [PATCH 12/16] bcachefs: Add version check for bch_btree_ptr_v2.sectors_written validate A user popped up with a very old (0.11) filesystem that needed repair and wasn't recently backed up. Reported-by: Manoa Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 37e3d69bec06..85b98c782e1b 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -203,7 +203,8 @@ int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, c, btree_ptr_v2_min_key_bad, "min_key > key"); - if (flags & BCH_VALIDATE_write) + if ((flags & BCH_VALIDATE_write) && + c->sb.version_min >= bcachefs_metadata_version_btree_ptr_sectors_written) bkey_fsck_err_on(!bp.v->sectors_written, c, btree_ptr_v2_written_0, "sectors_written == 0"); -- 2.51.0 From de902e3b4a9881ae02d414d427ad56cc384c9bf1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 19 Oct 2024 14:25:27 +0200 Subject: [PATCH 13/16] bcachefs: Use str_write_read() helper function Remove hard-coded strings by using the helper function str_write_read(). Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9bc0caa9d5e4..768a3b950997 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -17,6 +17,8 @@ #include "sb-clean.h" #include "trace.h" +#include + void bch2_journal_pos_from_member_info_set(struct bch_fs *c) { lockdep_assert_held(&c->sb_lock); @@ -666,7 +668,7 @@ static void journal_entry_clock_to_text(struct printbuf *out, struct bch_fs *c, struct jset_entry_clock *clock = container_of(entry, struct jset_entry_clock, entry); - prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); + prt_printf(out, "%s=%llu", str_write_read(clock->rw), le64_to_cpu(clock->time)); } static int journal_entry_dev_usage_validate(struct bch_fs *c, -- 2.51.0 From 751d869710ca91b6b2c6f4235137c71eb054ce02 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 20 Oct 2024 13:20:46 +0200 Subject: [PATCH 14/16] bcachefs: Use str_write_read() helper in ec_block_endio() Remove hard-coded strings by using the helper function str_write_read(). Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 8b727d63af3e..b46bf00c4a67 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -26,6 +26,7 @@ #include "util.h" #include +#include #ifdef __KERNEL__ @@ -732,7 +733,7 @@ static void ec_block_endio(struct bio *bio) ? BCH_MEMBER_ERROR_write : BCH_MEMBER_ERROR_read, "erasure coding %s error: %s", - bio_data_dir(bio) ? "write" : "read", + str_write_read(bio_data_dir(bio)), bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); -- 2.51.0 From ac9826f14739023bccf1345e6e4ddb0461fa9a2e Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 26 Oct 2024 12:47:23 +0200 Subject: [PATCH 15/16] bcachefs: Use str_write_read() helper in write_super_endio() Remove hard-coded strings by using the str_write_read() helper function. Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 7c71594f6a8b..c83bd3dedb1b 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -23,6 +23,7 @@ #include #include +#include static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { }; @@ -878,7 +879,7 @@ static void write_super_endio(struct bio *bio) ? BCH_MEMBER_ERROR_write : BCH_MEMBER_ERROR_read, "superblock %s error: %s", - bio_data_dir(bio) ? "write" : "read", + str_write_read(bio_data_dir(bio)), bch2_blk_status_to_str(bio->bi_status))) ca->sb_write_error = 1; -- 2.51.0 From 901ff6555ba02dd917aa65b1105c9715e25dc994 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 26 Oct 2024 17:47:04 +0200 Subject: [PATCH 16/16] bcachefs: Annotate struct bucket_gens with __counted_by() Add the __counted_by compiler attribute to the flexible array member b to improve access bounds-checking via CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE. Use struct_size() to calculate the number of bytes to be allocated. Update bucket_gens->nbuckets and bucket_gens->nbuckets_minus_first when resizing. Compile-tested only. Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 13 ++++++++----- fs/bcachefs/buckets_types.h | 2 +- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index ec7d9a59bea9..8bd17667e243 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1266,8 +1266,9 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) BUG_ON(resize && ca->buckets_nouse); - if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, - GFP_KERNEL|__GFP_ZERO))) { + bucket_gens = kvmalloc(struct_size(bucket_gens, b, nbuckets), + GFP_KERNEL|__GFP_ZERO); + if (!bucket_gens) { ret = -BCH_ERR_ENOMEM_bucket_gens; goto err; } @@ -1285,11 +1286,13 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); if (resize) { - size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); - + bucket_gens->nbuckets = min(bucket_gens->nbuckets, + old_bucket_gens->nbuckets); + bucket_gens->nbuckets_minus_first = + bucket_gens->nbuckets - bucket_gens->first_bucket; memcpy(bucket_gens->b, old_bucket_gens->b, - n); + bucket_gens->nbuckets); } rcu_assign_pointer(ca->bucket_gens, bucket_gens); diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 28bd09a253c8..7174047b8e92 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -24,7 +24,7 @@ struct bucket_gens { u16 first_bucket; size_t nbuckets; size_t nbuckets_minus_first; - u8 b[]; + u8 b[] __counted_by(nbuckets); }; struct bch_dev_usage { -- 2.51.0