From 600a9207c8def056b4681fde8158c463576d5aca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:27:10 -0400 Subject: [PATCH] bcachefs: Plumb printbuf through bch2_btree_lost_data() Part of the ongoing project to improve error messages by building them up in printbufs and emitting them all at once, so that we can easily see what events are related in the log. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 28 +++++++++++++++-------- fs/bcachefs/recovery.c | 42 ++++++++++++++++++----------------- fs/bcachefs/recovery.h | 2 +- fs/bcachefs/recovery_passes.c | 9 ++++---- fs/bcachefs/recovery_passes.h | 4 +++- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 84dae4c1ec13..41df1035ba2f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1304,7 +1304,6 @@ fsck_err: retry_read = 1; } else { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); } goto out; } @@ -1372,15 +1371,16 @@ start: if (!can_retry) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); break; } } - - async_object_list_del(c, btree_read_bio, rb->list_idx); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], - rb->start_time); - bio_put(&rb->bio); + if (btree_node_read_error(b)) { + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + } if ((saw_error || btree_node_need_rewrite(b)) && @@ -1398,6 +1398,10 @@ start: bch2_btree_node_rewrite_async(c, b); } + async_object_list_del(c, btree_read_bio, rb->list_idx); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], + rb->start_time); + bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); @@ -1587,7 +1591,12 @@ fsck_err: if (ret) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); + + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1721,6 +1730,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); + bch2_btree_lost_data(c, &buf, b->c.btree_id); bch_err_ratelimited(c, "%s", buf.buf); if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && @@ -1728,7 +1739,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bch2_fatal_error(c); set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b1afbe446d9e..d13a6df289c7 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -33,7 +33,9 @@ #include #include -int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, + struct printbuf *msg, + enum btree_id btree) { u64 b = BIT_ULL(btree); int ret = 0; @@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); + prt_printf(msg, "flagging btree "); + bch2_btree_id_to_text(msg, btree); + prt_printf(msg, " lost data\n"); + ext->btrees_lost_data |= cpu_to_le64(b); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index d858ba674eaa..c023f52fc2d6 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 9be715a49454..347e17fe7901 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, if (pass < BCH_RECOVERY_PASS_set_may_go_rw && c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { if (print) - prt_printf(out, "need recovery pass %s (%u), but already rw", + prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); return -BCH_ERR_cannot_rewind_recovery; } if (print) - prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)", + prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -194,14 +194,15 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); - return bch2_run_explicit_recovery_pass(c, pass); + return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } static void bch2_clear_recovery_pass_required(struct bch_fs *c, diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 62957e268a66..1f91be4258c5 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -12,7 +12,9 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); int bch2_run_recovery_passes(struct bch_fs *); -- 2.50.1