From 3be132f93cff2586be482cb81807ff83899f572e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:09:56 -0400 Subject: [PATCH 01/16] bcachefs: bch2_btree_lost_data() now handles snapshots tree We have a consolidated places for "this btree lost data, run this repair", so use it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 4 ++++ fs/bcachefs/snapshot.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2a8bcb9b1dd2..8f45d9e3a47e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -95,6 +95,10 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) case BTREE_ID_accounting: ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; + case BTREE_ID_snapshots: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + goto out; default: ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 14ea09ccee37..94cf60f76b64 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c) BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && test_bit(BCH_FS_may_go_rw, &c->flags)); - if (bch2_err_matches(ret, EIO) || - (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))) - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots); - return ret; } -- 2.51.0 From 3aecbb01a168bf6396955e5da0533f6e5f000441 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:13:28 -0400 Subject: [PATCH 02/16] bcachefs: Remove redundant calls to btree_lost_data() The btree node read path calls this before returning the read error. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 7 ------- fs/bcachefs/recovery.c | 3 --- 2 files changed, 10 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index fecf88079127..92ae31737a24 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -378,10 +378,6 @@ again: b->c.level, cur_k.k->k.p); if (ret) break; - - ret = bch2_btree_lost_data(c, b->c.btree_id); - if (ret) - break; continue; } @@ -543,9 +539,6 @@ int bch2_check_topology(struct bch_fs *c) bch2_btree_id_to_text(&buf, i); if (r->error) { - ret = bch2_btree_lost_data(c, i); - if (ret) - break; reconstruct_root: bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 8f45d9e3a47e..a0b42cca86fb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -587,9 +587,6 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; - - ret = bch2_btree_lost_data(c, i); - BUG_ON(ret); } } -- 2.51.0 From 300904700f14e4e05db2a16cf8e3890c8e856cf8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:28:56 -0400 Subject: [PATCH 03/16] bcachefs: kill bch2_run_explicit_recovery_pass_persistent() No longer has users, so we can kill it and rename bch2_run_explicit_recovery_pass_persistent_locked(). Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 30 +++++++++++++++--------------- fs/bcachefs/recovery_passes.c | 19 +------------------ fs/bcachefs/recovery_passes.h | 1 - 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a0b42cca86fb..b1afbe446d9e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -50,24 +50,24 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,30 +77,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 87150dd30f4b..9be715a49454 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -193,7 +193,7 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); @@ -204,23 +204,6 @@ int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, return bch2_run_explicit_recovery_pass(c, pass); } -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (!test_bit_le64(s, ext->recovery_passes_required)) { - __set_bit_le64(s, ext->recovery_passes_required); - bch2_write_super(c); - } - mutex_unlock(&c->sb_lock); - - return bch2_run_explicit_recovery_pass(c, pass); -} - static void bch2_clear_recovery_pass_required(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index e19a8aaba2f8..62957e268a66 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -12,7 +12,6 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); -- 2.51.0 From 600a9207c8def056b4681fde8158c463576d5aca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:27:10 -0400 Subject: [PATCH 04/16] bcachefs: Plumb printbuf through bch2_btree_lost_data() Part of the ongoing project to improve error messages by building them up in printbufs and emitting them all at once, so that we can easily see what events are related in the log. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 28 +++++++++++++++-------- fs/bcachefs/recovery.c | 42 ++++++++++++++++++----------------- fs/bcachefs/recovery.h | 2 +- fs/bcachefs/recovery_passes.c | 9 ++++---- fs/bcachefs/recovery_passes.h | 4 +++- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 84dae4c1ec13..41df1035ba2f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1304,7 +1304,6 @@ fsck_err: retry_read = 1; } else { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); } goto out; } @@ -1372,15 +1371,16 @@ start: if (!can_retry) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); break; } } - - async_object_list_del(c, btree_read_bio, rb->list_idx); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], - rb->start_time); - bio_put(&rb->bio); + if (btree_node_read_error(b)) { + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + } if ((saw_error || btree_node_need_rewrite(b)) && @@ -1398,6 +1398,10 @@ start: bch2_btree_node_rewrite_async(c, b); } + async_object_list_del(c, btree_read_bio, rb->list_idx); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], + rb->start_time); + bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); @@ -1587,7 +1591,12 @@ fsck_err: if (ret) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); + + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1721,6 +1730,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); + bch2_btree_lost_data(c, &buf, b->c.btree_id); bch_err_ratelimited(c, "%s", buf.buf); if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && @@ -1728,7 +1739,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bch2_fatal_error(c); set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b1afbe446d9e..d13a6df289c7 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -33,7 +33,9 @@ #include #include -int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, + struct printbuf *msg, + enum btree_id btree) { u64 b = BIT_ULL(btree); int ret = 0; @@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); + prt_printf(msg, "flagging btree "); + bch2_btree_id_to_text(msg, btree); + prt_printf(msg, " lost data\n"); + ext->btrees_lost_data |= cpu_to_le64(b); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index d858ba674eaa..c023f52fc2d6 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 9be715a49454..347e17fe7901 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, if (pass < BCH_RECOVERY_PASS_set_may_go_rw && c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { if (print) - prt_printf(out, "need recovery pass %s (%u), but already rw", + prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); return -BCH_ERR_cannot_rewind_recovery; } if (print) - prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)", + prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -194,14 +194,15 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); - return bch2_run_explicit_recovery_pass(c, pass); + return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } static void bch2_clear_recovery_pass_required(struct bch_fs *c, diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 62957e268a66..1f91be4258c5 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -12,7 +12,9 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); int bch2_run_recovery_passes(struct bch_fs *); -- 2.51.0 From d31f155964aee6e6141967fc392a9a99b221e117 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 09:02:15 -0400 Subject: [PATCH 05/16] bcachefs: bch2_fsck_err_opt() Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/error.h | 4 ++++ 3 files changed, 47 insertions(+) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 1a52edc7c8d8..4aac0182cbed 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -175,6 +175,7 @@ x(0, backpointer_to_overwritten_btree_node) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ + x(BCH_ERR_fsck, fsck_ask) \ x(BCH_ERR_fsck, fsck_fix) \ x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 20495062d6e1..731733e12e6b 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c, return print && !repeat; } +int bch2_fsck_err_opt(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err) +{ + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + + if (test_bit(BCH_FS_fsck_running, &c->flags)) { + if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) + return -BCH_ERR_fsck_repair_unimplemented; + + switch (c->opts.fix_errors) { + case FSCK_FIX_exit: + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_yes: + if (flags & FSCK_CAN_FIX) + return -BCH_ERR_fsck_fix; + fallthrough; + case FSCK_FIX_no: + if (flags & FSCK_CAN_IGNORE) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_ask: + if (flags & FSCK_AUTOFIX) + return -BCH_ERR_fsck_fix; + return -BCH_ERR_fsck_ask; + default: + BUG(); + } + } else { + if ((flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) + return -BCH_ERR_fsck_fix; + + if (c->opts.errors == BCH_ON_ERROR_continue && + (flags & FSCK_CAN_IGNORE)) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + } +} + int __bch2_fsck_err(struct bch_fs *c, struct btree_trans *trans, enum bch_fsck_flags flags, diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 0b3ede1c2015..d89dd270b2e5 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu #define bch2_count_fsck_err(_c, _err, ...) \ __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) +int bch2_fsck_err_opt(struct bch_fs *, + enum bch_fsck_flags, + enum bch_sb_error_id); + __printf(5, 6) __cold int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, enum bch_fsck_flags, -- 2.51.0 From 9c2472658be20d04c6dc34d5314a7e99cc4fed25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 20:38:50 -0400 Subject: [PATCH 06/16] bcachefs: bch2_mark_btree_validate_failure() Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 31 ++++++++++++++++++++++++++----- fs/bcachefs/extents.h | 1 + fs/bcachefs/extents_types.h | 1 + 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8a881b30fd4c..c4fe4ffd41f1 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -56,6 +56,14 @@ void bch2_io_failures_to_text(struct printbuf *out, for (struct bch_dev_io_failures *f = failed->devs; f < failed->devs + failed->nr; f++) { + unsigned errflags = + ((!!f->failed_io) << 0) | + ((!!f->failed_csum_nr) << 1) | + ((!!f->failed_ec) << 2); + + if (!errflags) + continue; + bch2_printbuf_make_room(out, 1024); rcu_read_lock(); out->atomic++; @@ -69,11 +77,6 @@ void bch2_io_failures_to_text(struct printbuf *out, prt_char(out, ' '); - unsigned errflags = - ((!!f->failed_io) << 0) | - ((!!f->failed_csum_nr) << 1) | - ((!!f->failed_ec) << 2); - if (is_power_of_2(errflags)) { prt_bitflags(out, error_types, errflags); prt_str(out, " error"); @@ -119,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, f->failed_csum_nr++; } +void bch2_mark_btree_validate_failure(struct bch_io_failures *failed, + unsigned dev) +{ + struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + memset(f, 0, sizeof(*f)); + f->dev = dev; + } + + f->failed_btree_validate = true; +} + static inline u64 dev_latency(struct bch_dev *ca) { return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX; @@ -219,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { have_io_errors |= f->failed_io; + have_io_errors |= f->failed_btree_validate; have_io_errors |= f->failed_ec; } have_csum_errors |= !!f->failed_csum_nr; @@ -226,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (p.has_ec && (f->failed_io || f->failed_csum_nr)) p.do_ec_reconstruct = true; else if (f->failed_io || + f->failed_btree_validate || f->failed_csum_nr > c->opts.checksum_err_retry_nr) continue; } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 9dd2655a5774..b8590e51b76e 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -398,6 +398,7 @@ struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *, unsigned); void bch2_mark_io_failure(struct bch_io_failures *, struct extent_ptr_decoded *, bool); +void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned); int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *, int); diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h index e51529dca4c2..b23ce4a373c0 100644 --- a/fs/bcachefs/extents_types.h +++ b/fs/bcachefs/extents_types.h @@ -34,6 +34,7 @@ struct bch_io_failures { u8 dev; unsigned failed_csum_nr:6, failed_io:1, + failed_btree_validate:1, failed_ec:1; } devs[BCH_REPLICAS_MAX + 1]; }; -- 2.51.0 From cd3cdb1ef706a1ac725194d81858d58375739b25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 09:14:19 -0400 Subject: [PATCH 07/16] bcachefs: Single err message for btree node reads Like we just did with the data read path, emit a single error message per btree node reads, nicely formatted, with all the actions we took grouped together. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 254 +++++++++++++++++++++++------------------ fs/bcachefs/btree_io.h | 4 +- fs/bcachefs/debug.c | 4 +- 3 files changed, 151 insertions(+), 111 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 41df1035ba2f..e079e12adf86 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -516,19 +516,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, + bool print_pos, struct btree *b, struct bset *i, struct bkey_packed *k, - unsigned offset, int write) + unsigned offset, int rw) { - prt_printf(out, bch2_log_msg(c, "%s"), - write == READ - ? "error validating btree node " - : "corrupt btree node before write "); + if (print_pos) { + prt_str(out, rw == READ + ? "error validating btree node " + : "corrupt btree node before write "); + prt_printf(out, "at btree "); + bch2_btree_pos_to_text(out, c, b); + prt_newline(out); + } + if (ca) - prt_printf(out, "on %s ", ca->name); - prt_printf(out, "at btree "); - bch2_btree_pos_to_text(out, c, b); + prt_printf(out, "%s ", ca->name); - prt_printf(out, "\nnode offset %u/%u", + prt_printf(out, "node offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); @@ -539,75 +543,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } -__printf(10, 11) +__printf(11, 12) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, - int write, - bool have_retry, + int rw, enum bch_sb_error_id err_type, + struct bch_io_failures *failed, + struct printbuf *err_msg, const char *fmt, ...) { - bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; + if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + return -BCH_ERR_fsck_fix; + + bool have_retry = false; + int ret2; + + if (ca) { + bch2_mark_btree_validate_failure(failed, ca->dev_idx); + + struct extent_ptr_decoded pick; + have_retry = !bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + failed, &pick, -1); + } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = -BCH_ERR_btree_node_read_err_fixable; if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; - if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) - bch2_sb_error_count(c, err_type); + bch2_sb_error_count(c, err_type); + + bool print_deferred = err_msg && + rw == READ && + !(test_bit(BCH_FS_fsck_running, &c->flags) && + c->opts.fix_errors == FSCK_FIX_ask); struct printbuf out = PRINTBUF; - if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { - printbuf_indent_add_nextline(&out, 2); -#ifdef BCACHEFS_LOG_PREFIX - prt_printf(&out, bch2_log_msg(c, "")); -#endif - } + bch2_log_msg_start(c, &out); + + if (!print_deferred) + err_msg = &out; - btree_err_msg(&out, c, ca, b, i, k, b->written, write); + btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw); va_list args; va_start(args, fmt); - prt_vprintf(&out, fmt, args); + prt_vprintf(err_msg, fmt, args); va_end(args); - if (write == WRITE) { + if (print_deferred) { + prt_newline(err_msg); + + switch (ret) { + case -BCH_ERR_btree_node_read_err_fixable: + ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; + goto fsck_err; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; + goto out; + case -BCH_ERR_btree_node_read_err_bad_node: + prt_str(&out, ", "); + ret = __bch2_topology_error(c, &out); + break; + } + + goto out; + } + + if (rw == WRITE) { prt_str(&out, ", "); ret = __bch2_inconsistent_error(c, &out) ? -BCH_ERR_fsck_errors_not_fixed : 0; - silent = false; + goto print; } switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - ret = !silent - ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf) - : -BCH_ERR_fsck_fix; - if (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore) + ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; goto fsck_err; - ret = -BCH_ERR_fsck_fix; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); ret = __bch2_topology_error(c, &out); - if (ret) - silent = false; - break; - case -BCH_ERR_btree_node_read_err_incompatible: - ret = -BCH_ERR_fsck_errors_not_fixed; - silent = false; break; } - - if (!silent) - bch2_print_str(c, KERN_ERR, out.buf); +print: + bch2_print_str(c, KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); @@ -616,8 +655,9 @@ fsck_err: #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \ + int _ret = __btree_err(type, c, ca, b, i, k, write, \ BCH_FSCK_ERR_##_err_type, \ + failed, err_msg, \ msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ @@ -625,7 +665,7 @@ fsck_err: goto fsck_err; \ } \ \ - *saw_error = true; \ + true; \ }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) @@ -683,8 +723,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, - unsigned offset, unsigned sectors, - int write, bool have_retry, bool *saw_error) + unsigned offset, unsigned sectors, int write, + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); @@ -897,7 +938,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b, static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, - bool have_retry, bool *saw_error) + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; @@ -1010,7 +1052,9 @@ fsck_err: } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - struct btree *b, bool have_retry, bool *saw_error) + struct btree *b, + struct bch_io_failures *failed, + struct printbuf *err_msg) { struct btree_node_entry *bne; struct sort_iter *iter; @@ -1023,7 +1067,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; - int ret = 0, retry_read = 0, write = READ; + int ret = 0, write = READ; u64 start_time = local_clock(); b->version_ondisk = U16_MAX; @@ -1157,15 +1201,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); - ret = validate_bset(c, ca, b, i, b->written, sectors, - READ, have_retry, saw_error); + ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg); if (ret) goto fsck_err; if (!b->written) btree_node_set_format(b, b->data->format); - ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); + ret = validate_bset_keys(c, b, i, READ, failed, err_msg); if (ret) goto fsck_err; @@ -1293,19 +1336,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (!ptr_written) set_btree_node_need_rewrite(b); -out: +fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); - return retry_read; -fsck_err: - if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) { - retry_read = 1; - } else { - set_btree_node_read_error(b); - } - goto out; + return ret; } static void btree_node_read_work(struct work_struct *work) @@ -1317,15 +1352,25 @@ static void btree_node_read_work(struct work_struct *work) struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; + int ret = 0; + struct printbuf buf = PRINTBUF; - bool saw_error = false; - bool retry = false; - bool can_retry; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "btree node read error at btree "); + bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); goto start; while (1) { - retry = true; - bch_info(c, "retrying read"); + ret = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick, -1); + if (ret) { + set_btree_node_read_error(b); + break; + } + ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; rb->start_time = local_clock(); @@ -1343,60 +1388,54 @@ static void btree_node_read_work(struct work_struct *work) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); start: - printbuf_reset(&buf); - bch2_btree_pos_to_text(&buf, c, b); - - if (ca && bio->bi_status) - bch_err_dev_ratelimited(ca, - "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = false; - bch2_mark_io_failure(&failed, &rb->pick, false); - - can_retry = bch2_bkey_pick_read_device(c, - bkey_i_to_s_c(&b->key), - &failed, &rb->pick, -1) > 0; - - if (!bio->bi_status && - !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { - if (retry) - bch_info(c, "retry success"); - break; + if (bio->bi_status) { + bch2_mark_io_failure(&failed, &rb->pick, false); + continue; } - saw_error = true; + ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); + if (ret == -BCH_ERR_btree_node_read_err_want_retry || + ret == -BCH_ERR_btree_node_read_err_must_retry) + continue; - if (!can_retry) { + if (ret) set_btree_node_read_error(b); - break; - } + + break; } - if (btree_node_read_error(b)) { - struct printbuf buf = PRINTBUF; + + bch2_io_failures_to_text(&buf, c, &failed); + + if (btree_node_read_error(b)) bch2_btree_lost_data(c, &buf, b->c.btree_id); - if (buf.pos) - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); + + /* + * only print retry success if we read from a replica with no errors + */ + if (btree_node_read_error(b)) + prt_printf(&buf, "ret %s", bch2_err_str(ret)); + else if (failed.nr) { + if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) + prt_printf(&buf, "retry success"); + else + prt_printf(&buf, "repair success"); } - if ((saw_error || + if ((failed.nr || btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - if (saw_error) { - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", - __func__, buf.buf); - } - + prt_printf(&buf, " (rewriting node)"); bch2_btree_node_rewrite_async(c, b); } + prt_newline(&buf); + + if (failed.nr) + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], @@ -1486,12 +1525,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) struct btree *b = ra->b; struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; - bool have_retry = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; bool _saw_error = false, *saw_error = &_saw_error; + struct printbuf *err_msg = NULL; + struct bch_io_failures *failed = NULL; for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; @@ -1584,7 +1624,7 @@ fsck_err: if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); - ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); + ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL); } else { ret = -1; } @@ -2211,8 +2251,6 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - bool saw_error; - int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, @@ -2225,8 +2263,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, return ret; } - ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); + ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: + validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index afdb11a9f71c..30a5180532c8 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *); void bch2_btree_init_next(struct btree_trans *, struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, - struct btree *, bool, bool *); + struct btree *, + struct bch_io_failures *, + struct printbuf *); void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 079bc2b359cd..4ee5d486b305 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, struct btree_node *n_sorted = c->verify_data->data; struct bset *sorted, *inmemory = &b->data->keys; struct bio *bio; - bool failed = false, saw_error = false; + bool failed = false; struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_verify_replicas); @@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); v->written = 0; - if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) + if (bch2_btree_node_read_done(c, ca, v, NULL, NULL)) return false; n_sorted = c->verify_data->data; -- 2.51.0 From c21f41f6905be4fc5059a10a5bba94105ba87269 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 17:55:20 -0400 Subject: [PATCH 08/16] bcachefs: bch2_dirent_to_text() shows casefolded dirents Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a51195088227..d198001838f3 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -212,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); - prt_printf(out, "%.*s -> ", d_name.len, d_name.name); + prt_printf(out, "%.*s", d_name.len, d_name.name); + + if (d.v->d_casefold) { + struct qstr d_name = bch2_dirent_get_lookup_name(d); + prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name); + } + + prt_str(out, " ->"); if (d.v->d_type != DT_SUBVOL) - prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum)); + prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum)); else - prt_printf(out, "%u -> %u", + prt_printf(out, " %u -> %u", le32_to_cpu(d.v->d_parent_subvol), le32_to_cpu(d.v->d_child_subvol)); -- 2.51.0 From aff2b6a7fc285287f7ffc6691aca333a63b18230 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:38:53 -0400 Subject: [PATCH 09/16] bcachefs: provide unlocked version of run_explicit_recovery_pass_persistent Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/recovery.c | 30 +++++++++++++++--------------- fs/bcachefs/recovery_passes.c | 20 +++++++++++++++++--- fs/bcachefs/recovery_passes.h | 2 ++ fs/bcachefs/super-io.c | 3 +++ 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 8989ea4a3934..0369dd656d32 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -844,6 +844,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; + u64 recovery_passes_required; unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d13a6df289c7..375111b56029 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -52,24 +52,24 @@ int bch2_btree_lost_data(struct bch_fs *c, } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -79,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 347e17fe7901..97af1e0629eb 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -193,9 +193,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - struct printbuf *out, - enum bch_recovery_pass pass) +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); @@ -205,6 +205,20 @@ int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass) +{ + if (c->sb.recovery_passes_required & BIT_ULL(pass)) + return 0; + + mutex_lock(&c->sb_lock); + int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass); + mutex_unlock(&c->sb_lock); + + return ret; +} + static void bch2_clear_recovery_pass_required(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 1f91be4258c5..94fbc64e9b7e 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -13,6 +13,8 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d53cbc5f9925..8730d2e78d1d 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c) struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); if (ext) { + c->sb.recovery_passes_required = + bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); -- 2.51.0 From 7677859a47a464f1c5603077809d4bc13f2d549f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:39:17 -0400 Subject: [PATCH 10/16] bcachefs: Run most explicit recovery passes persistent If we detect an error that requires running a recovery pass, and we're not in recovery, we won't be able to fix it until the next mount - make sure we're noting in the superblock that it needs to run. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 4 ++-- fs/bcachefs/error.c | 2 +- fs/bcachefs/recovery_passes.c | 2 +- fs/bcachefs/recovery_passes.h | 3 --- fs/bcachefs/sb-members.c | 2 +- fs/bcachefs/subvolume.c | 2 +- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3ec33a7e9d92..596edc7bba2f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf bool print = __bch2_count_fsck_err(c, id, buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, buf, BCH_RECOVERY_PASS_check_allocations); if (insert) { @@ -972,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); - bch2_run_explicit_recovery_pass_printbuf(c, &buf, + bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 731733e12e6b..d7bc70fd7762 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { - return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?: + return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?: -BCH_ERR_btree_node_read_validate_error; } } diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 97af1e0629eb..e14aca00cb7d 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, } } -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, +static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, struct printbuf *out, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 94fbc64e9b7e..f33dd005beb4 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -8,9 +8,6 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, - struct printbuf *, - enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 462a2c21a9de..9ab4d9a4b421 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index ff20ce98a476..51ab2ee10706 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) prt_printf(&buf, "missing subvolume %u", subvolid); bool print = bch2_count_fsck_err(c, subvol_missing, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_inodes); if (print) bch2_print_str(c, KERN_ERR, buf.buf); -- 2.51.0 From cf95296295bebadcf8b4a695064d2df35e0c127e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 11:45:56 -0400 Subject: [PATCH 11/16] bcachefs: bch2_trans_update_ip() Allow btree_insert_entry.ip_allocated to be passed in, so we get better info on where alloc updates are coming from. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 2 +- fs/bcachefs/btree_update.c | 7 ++++--- fs/bcachefs/btree_update.h | 12 ++++++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c63348c4b874..002e3853f8cf 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -484,7 +484,7 @@ struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, if (ret) return ERR_PTR(ret); - ret = bch2_trans_update(trans, &iter, &a->k_i, flags); + ret = bch2_trans_update_ip(trans, &iter, &a->k_i, flags, _RET_IP_); bch2_trans_iter_exit(trans, &iter); return unlikely(ret) ? ERR_PTR(ret) : a; } diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 2bffd5121c31..ce83cd037551 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -511,8 +511,9 @@ static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, return 0; } -int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, - struct bkey_i *k, enum btree_iter_update_trigger_flags flags) +int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags, + unsigned long ip) { kmsan_check_memory(k, bkey_bytes(&k->k)); @@ -548,7 +549,7 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter path_idx = iter->key_cache_path; } - return bch2_trans_update_by_path(trans, path_idx, k, flags, _RET_IP_); + return bch2_trans_update_by_path(trans, path_idx, k, flags, ip); } int bch2_btree_insert_clone_trans(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index e674419c299e..62d24b081e27 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -102,8 +102,16 @@ int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter * int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, enum btree_id, struct bpos); -int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *, - struct bkey_i *, enum btree_iter_update_trigger_flags); +int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *, + struct bkey_i *, enum btree_iter_update_trigger_flags, + unsigned long); + +static inline int __must_check +bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, + struct bkey_i *k, enum btree_iter_update_trigger_flags flags) +{ + return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_); +} struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned); -- 2.51.0 From a349868b5e2503271bedf5f0b6e3638552047e0f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 14:50:07 -0400 Subject: [PATCH 12/16] bcachefs: bch2_fs_open() now takes a darray Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.h | 1 + fs/bcachefs/fs.c | 4 ++-- fs/bcachefs/fsck.c | 4 ++-- fs/bcachefs/super.c | 24 ++++++++++++------------ fs/bcachefs/super.h | 2 +- fs/bcachefs/util.c | 4 ++-- fs/bcachefs/util.h | 4 ++-- 7 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 88f0ca3f0af5..50ec3decfe8c 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -21,6 +21,7 @@ struct { \ typedef DARRAY(char) darray_char; typedef DARRAY(char *) darray_str; +typedef DARRAY(const char *) darray_const_str; typedef DARRAY(u8) darray_u8; typedef DARRAY(u16) darray_u16; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index cdf84180829a..9916bd38a599 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2441,7 +2441,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) struct inode *vinode; struct bch2_opts_parse *opts_parse = fc->fs_private; struct bch_opts opts = opts_parse->opts; - darray_str devs; + darray_const_str devs; darray_fs devs_to_fs = {}; int ret; @@ -2465,7 +2465,7 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (!IS_ERR(sb)) goto got_sb; - c = bch2_fs_open(devs.data, devs.nr, opts); + c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c); if (ret) goto err; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d927fdafd43a..ef2d6cbffcc2 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -3059,7 +3059,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) { struct bch_ioctl_fsck_offline arg; struct fsck_thread *thr = NULL; - darray_str(devs) = {}; + darray_const_str devs = {}; long ret = 0; if (copy_from_user(&arg, user_arg, sizeof(arg))) @@ -3117,7 +3117,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); - thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); + thr->c = bch2_fs_open(&devs, &thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f29965469b28..5fcd7099bc6a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -807,7 +807,7 @@ static int bch2_fs_init_rw(struct bch_fs *c) return 0; } -static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, +static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch_sb_handles *sbs) { struct bch_fs *c; @@ -821,7 +821,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, goto out; } - c->stdio = (void *)(unsigned long) opts.stdio; + c->stdio = (void *)(unsigned long) opts->stdio; __module_get(THIS_MODULE); @@ -921,7 +921,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, if (ret) goto err; - bch2_opts_apply(&c->opts, opts); + bch2_opts_apply(&c->opts, *opts); c->btree_key_cache_btrees |= 1U << BTREE_ID_alloc; if (c->opts.inodes_use_key_cache) @@ -2273,8 +2273,8 @@ static inline int sb_cmp(struct bch_sb *l, struct bch_sb *r) cmp_int(le64_to_cpu(l->write_time), le64_to_cpu(r->write_time)); } -struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, - struct bch_opts opts) +struct bch_fs *bch2_fs_open(darray_const_str *devices, + struct bch_opts *opts) { bch_sb_handles sbs = {}; struct bch_fs *c = NULL; @@ -2285,26 +2285,26 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, if (!try_module_get(THIS_MODULE)) return ERR_PTR(-ENODEV); - if (!nr_devices) { + if (!devices->nr) { ret = -EINVAL; goto err; } - ret = darray_make_room(&sbs, nr_devices); + ret = darray_make_room(&sbs, devices->nr); if (ret) goto err; - for (unsigned i = 0; i < nr_devices; i++) { + darray_for_each(*devices, i) { struct bch_sb_handle sb = { NULL }; - ret = bch2_read_super(devices[i], &opts, &sb); + ret = bch2_read_super(*i, opts, &sb); if (ret) goto err; BUG_ON(darray_push(&sbs, sb)); } - if (opts.nochanges && !opts.read_only) { + if (opts->nochanges && !opts->read_only) { ret = -BCH_ERR_erofs_nochanges; goto err_print; } @@ -2314,7 +2314,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices, best = sb; darray_for_each_reverse(sbs, sb) { - ret = bch2_dev_in_fs(best, sb, &opts); + ret = bch2_dev_in_fs(best, sb, opts); if (ret == -BCH_ERR_device_has_been_removed || ret == -BCH_ERR_device_splitbrain) { @@ -2358,7 +2358,7 @@ out: return c; err_print: pr_err("bch_fs_open err opening %s: %s", - devices[0], bch2_err_str(ret)); + devices->data[0], bch2_err_str(ret)); err: if (!IS_ERR_OR_NULL(c)) bch2_fs_stop(c); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index a1566f2d77c3..be75603fefe9 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -45,7 +45,7 @@ void bch2_fs_free(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); int bch2_fs_start(struct bch_fs *); -struct bch_fs *bch2_fs_open(char * const *, unsigned, struct bch_opts); +struct bch_fs *bch2_fs_open(darray_const_str *, struct bch_opts *); extern const struct blk_holder_ops bch2_sb_handle_bdev_ops; diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 1cff407c8c9d..dc3817f545fa 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -1016,14 +1016,14 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) return ret; } -void bch2_darray_str_exit(darray_str *d) +void bch2_darray_str_exit(darray_const_str *d) { darray_for_each(*d, i) kfree(*i); darray_exit(d); } -int bch2_split_devs(const char *_dev_name, darray_str *ret) +int bch2_split_devs(const char *_dev_name, darray_const_str *ret) { darray_init(ret); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 7a93e187a49a..14cb2c7dfda4 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -690,8 +690,8 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r) return l.len == r.len && !memcmp(l.name, r.name, l.len); } -void bch2_darray_str_exit(darray_str *); -int bch2_split_devs(const char *, darray_str *); +void bch2_darray_str_exit(darray_const_str *); +int bch2_split_devs(const char *, darray_const_str *); #ifdef __KERNEL__ -- 2.51.0 From 98e5e36d8c58ab41c28367d3bfc9ec4e8795e421 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 28 Apr 2025 22:00:01 -0400 Subject: [PATCH 13/16] bcachefs: bch2_dev_add() can run on a non-started fs Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 5fcd7099bc6a..e89b659514b2 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1894,6 +1894,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_unlock; } unsigned dev_idx = ret; + ret = 0; /* success: */ @@ -1913,27 +1914,29 @@ int bch2_dev_add(struct bch_fs *c, const char *path) bch2_write_super(c); mutex_unlock(&c->sb_lock); - ret = bch2_dev_usage_init(ca, false); - if (ret) - goto err_late; + if (test_bit(BCH_FS_started, &c->flags)) { + ret = bch2_dev_usage_init(ca, false); + if (ret) + goto err_late; - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); - if (ret) - goto err_late; + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + bch_err_msg(ca, ret, "marking new superblock"); + if (ret) + goto err_late; - ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) - goto err_late; + ret = bch2_fs_freespace_init(c); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err_late; - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); - ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) - goto err_late; + ret = bch2_dev_journal_alloc(ca, false); + bch_err_msg(c, ret, "allocating journal"); + if (ret) + goto err_late; + } up_write(&c->state_lock); out: -- 2.51.0 From ae0386e111253eee0f71ae3f32635a3ba22e5a7b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 30 Apr 2025 13:22:01 -0600 Subject: [PATCH 14/16] bcachefs: Avoid -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Refactor a couple of structs that contain flexible arrays in the middle by replacing them with unions. So, with these changes, fix the following warnings: fs/bcachefs/disk_accounting.c:429:51: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] fs/bcachefs/ec_types.h:8:41: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 14 ++++++++------ fs/bcachefs/ec_types.h | 7 ++++--- fs/bcachefs/journal_io.c | 6 +++--- fs/bcachefs/journal_reclaim.c | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e399237e124a..195dc3fcec1d 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -287,7 +287,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc static int bch2_accounting_update_sb_one(struct bch_fs *c, struct bpos p) { - struct bch_replicas_padded r; + union bch_replicas_padded r; return accounting_to_replicas(&r.e, p) ? bch2_mark_replicas(c, &r.e) : 0; @@ -361,7 +361,7 @@ err: int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, enum bch_accounting_mode mode) { - struct bch_replicas_padded r; + union bch_replicas_padded r; if (mode != BCH_ACCOUNTING_read && accounting_to_replicas(&r.e, a.k->p) && @@ -379,7 +379,7 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, int bch2_accounting_mem_insert_locked(struct bch_fs *c, struct bkey_s_c_accounting a, enum bch_accounting_mode mode) { - struct bch_replicas_padded r; + union bch_replicas_padded r; if (mode != BCH_ACCOUNTING_read && accounting_to_replicas(&r.e, a.k->p) && @@ -438,10 +438,12 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) percpu_down_read(&c->mark_lock); darray_for_each(acc->k, i) { - struct { + union { + u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs, + BCH_BKEY_PTRS_MAX)]; struct bch_replicas_usage r; - u8 pad[BCH_BKEY_PTRS_MAX]; } u; + u.r.r.nr_devs = BCH_BKEY_PTRS_MAX; if (!accounting_to_replicas(&u.r.r, i->pos)) continue; @@ -640,7 +642,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, switch (acc->type) { case BCH_DISK_ACCOUNTING_replicas: { - struct bch_replicas_padded r; + union bch_replicas_padded r; __accounting_to_replicas(&r.e, acc); for (unsigned i = 0; i < r.e.nr_devs; i++) diff --git a/fs/bcachefs/ec_types.h b/fs/bcachefs/ec_types.h index 06144bfd9c19..809446c78951 100644 --- a/fs/bcachefs/ec_types.h +++ b/fs/bcachefs/ec_types.h @@ -4,9 +4,10 @@ #include "bcachefs_format.h" -struct bch_replicas_padded { +union bch_replicas_padded { + u8 bytes[struct_size_t(struct bch_replicas_entry_v1, + devs, BCH_BKEY_PTRS_MAX)]; struct bch_replicas_entry_v1 e; - u8 pad[BCH_BKEY_PTRS_MAX]; }; struct stripe { @@ -28,7 +29,7 @@ struct gc_stripe { u16 block_sectors[BCH_BKEY_PTRS_MAX]; struct bch_extent_ptr ptrs[BCH_BKEY_PTRS_MAX]; - struct bch_replicas_padded r; + union bch_replicas_padded r; }; #endif /* _BCACHEFS_EC_TYPES_H */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 8f38e9485cd8..be86fd21de2a 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1406,7 +1406,7 @@ int bch2_journal_read(struct bch_fs *c, } genradix_for_each(&c->journal_entries, radix_iter, _i) { - struct bch_replicas_padded replicas = { + union bch_replicas_padded replicas = { .e.data_type = BCH_DATA_journal, .e.nr_devs = 0, .e.nr_required = 1, @@ -1634,7 +1634,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_replicas_padded replicas; + union bch_replicas_padded replicas; u64 seq = le64_to_cpu(w->data->seq); int err = 0; @@ -2057,7 +2057,7 @@ CLOSURE_CALLBACK(bch2_journal_write) closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bch_replicas_padded replicas; + union bch_replicas_padded replicas; unsigned nr_rw_members = dev_mask_nr(&c->rw_devs[BCH_DATA_journal]); int ret; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index bb339be54e7b..ce9e0bd7ec4f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -963,7 +963,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) seq = 0; spin_lock(&j->lock); while (!ret) { - struct bch_replicas_padded replicas; + union bch_replicas_padded replicas; seq = max(seq, journal_last_seq(j)); if (seq >= j->pin.back) -- 2.51.0 From 5ce11d9d1bd5dfd8876d35bd9e61f38f47807c42 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 29 Apr 2025 14:41:37 -0400 Subject: [PATCH 15/16] bcachefs: sysfs trigger_recalc_capacity For bug diagnosis Signed-off-by: Kent Overstreet --- fs/bcachefs/sysfs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index dfae5eda7a4c..1d0c0f24a7b9 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -146,8 +146,9 @@ write_attribute(trigger_journal_flush); write_attribute(trigger_journal_writes); write_attribute(trigger_btree_cache_shrink); write_attribute(trigger_btree_key_cache_shrink); -write_attribute(trigger_freelist_wakeup); write_attribute(trigger_btree_updates); +write_attribute(trigger_freelist_wakeup); +write_attribute(trigger_recalc_capacity); read_attribute(gc_gens_pos); read_attribute(uuid); @@ -428,6 +429,12 @@ STORE(bch2_fs) if (attr == &sysfs_trigger_freelist_wakeup) closure_wake_up(&c->freelist_wait); + if (attr == &sysfs_trigger_recalc_capacity) { + down_read(&c->state_lock); + bch2_recalc_capacity(c); + up_read(&c->state_lock); + } + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -553,8 +560,9 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_journal_writes, &sysfs_trigger_btree_cache_shrink, &sysfs_trigger_btree_key_cache_shrink, - &sysfs_trigger_freelist_wakeup, &sysfs_trigger_btree_updates, + &sysfs_trigger_freelist_wakeup, + &sysfs_trigger_recalc_capacity, &sysfs_gc_gens_pos, -- 2.51.0 From 8a6b883e78bfed6909e21c2afb6138b603d1ee6c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 30 Apr 2025 22:05:49 -0400 Subject: [PATCH 16/16] bcachefs: Fix setting ca->name in device add Device add doesn't get the devide index and attach to the filesystem until after attaching the block device, and setting the device name from the block device name - these needs some minor tweaks. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e89b659514b2..9381644cabee 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1488,7 +1488,9 @@ static void bch2_dev_attach(struct bch_fs *c, struct bch_dev *ca, { ca->dev_idx = dev_idx; __set_bit(ca->dev_idx, ca->self.d); - scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); + + if (!ca->name[0]) + scnprintf(ca->name, sizeof(ca->name), "dev-%u", dev_idx); ca->fs = c; rcu_assign_pointer(c->devs[ca->dev_idx], ca); @@ -1540,6 +1542,11 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (ret) return ret; + struct printbuf name = PRINTBUF; + prt_bdevname(&name, sb->bdev); + strscpy(ca->name, name.buf, sizeof(ca->name)); + printbuf_exit(&name); + /* Commit: */ ca->disk_sb = *sb; memset(sb, 0, sizeof(*sb)); @@ -1581,11 +1588,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb) bch2_dev_sysfs_online(c, ca); - struct printbuf name = PRINTBUF; - prt_bdevname(&name, ca->disk_sb.bdev); - strscpy(ca->name, name.buf, sizeof(ca->name)); - printbuf_exit(&name); - bch2_rebalance_wakeup(c); return 0; } -- 2.51.0