From 0499a82b18b5ddee0d97d2cfcae0c0120f858c1f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Apr 2025 12:01:50 -0400 Subject: [PATCH 01/16] bcachefs: Async object debugging Debugging infrastructure for async objs: this lets us easily create fast_lists for various object types so they'll be visible in debugfs. Add new object types to the BCH_ASYNC_OBJS_TYPES() enum, and drop a pretty-printer wrapper in async_objs.c. Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 4 ++ fs/bcachefs/Makefile | 2 + fs/bcachefs/async_objs.c | 105 +++++++++++++++++++++++++++++++++ fs/bcachefs/async_objs.h | 44 ++++++++++++++ fs/bcachefs/async_objs_types.h | 20 +++++++ fs/bcachefs/bcachefs.h | 7 +++ fs/bcachefs/debug.c | 52 +++++++--------- fs/bcachefs/debug.h | 18 ++++++ fs/bcachefs/errcode.h | 1 + fs/bcachefs/super.c | 3 + 10 files changed, 224 insertions(+), 32 deletions(-) create mode 100644 fs/bcachefs/async_objs.c create mode 100644 fs/bcachefs/async_objs.h create mode 100644 fs/bcachefs/async_objs_types.h diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index a14e4a60b187..8cb2b9d5da96 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -107,6 +107,10 @@ config BCACHEFS_TRANS_KMALLOC_TRACE bool "Trace bch2_trans_kmalloc() calls" depends on BCACHEFS_FS +config BCACHEFS_ASYNC_OBJECT_LISTS + bool "Keep async objects on fast_lists for debugfs visibility" + depends on BCACHEFS_FS && DEBUG_FS + config MEAN_AND_VARIANCE_UNIT_TEST tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 3be39845e4f6..93c8ee5425c8 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -99,6 +99,8 @@ bcachefs-y := \ varint.o \ xattr.o +bcachefs-$(CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS) += async_objs.o + obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST) += mean_and_variance_test.o # Silence "note: xyz changed in GCC X.X" messages diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c new file mode 100644 index 000000000000..8d78f390a759 --- /dev/null +++ b/fs/bcachefs/async_objs.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Async obj debugging: keep asynchronous objects on (very fast) lists, make + * them visibile in debugfs: + */ + +#include "bcachefs.h" +#include "async_objs.h" +#include "btree_io.h" +#include "debug.h" +#include "io_read.h" + +#include + +static int bch2_async_obj_list_open(struct inode *inode, struct file *file) +{ + struct async_obj_list *list = inode->i_private; + struct dump_iter *i; + + i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL); + if (!i) + return -ENOMEM; + + file->private_data = i; + i->from = POS_MIN; + i->iter = 0; + i->c = container_of(list, struct bch_fs, async_objs[list->idx]); + i->list = list; + i->buf = PRINTBUF; + return 0; +} + +static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct async_obj_list *list = i->list; + ssize_t ret = 0; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + struct genradix_iter iter; + void *obj; + fast_list_for_each_from(&list->list, iter, obj, i->iter) { + ret = bch2_debugfs_flush_buf(i); + if (ret) + return ret; + + if (!i->size) + break; + + list->obj_to_text(&i->buf, obj); + } + + if (i->buf.allocation_failure) + ret = -ENOMEM; + else + i->iter = iter.pos; + + if (!ret) + ret = bch2_debugfs_flush_buf(i); + + return ret ?: i->ret; +} + +__maybe_unused +static const struct file_operations async_obj_ops = { + .owner = THIS_MODULE, + .open = bch2_async_obj_list_open, + .release = bch2_dump_release, + .read = bch2_async_obj_list_read, +}; + +void bch2_fs_async_obj_debugfs_init(struct bch_fs *c) +{ + c->async_obj_dir = debugfs_create_dir("async_objs", c->fs_debug_dir); + +#define x(n) debugfs_create_file(#n, 0400, c->async_obj_dir, \ + &c->async_objs[BCH_ASYNC_OBJ_LIST_##n], &async_obj_ops); + BCH_ASYNC_OBJ_LISTS() +#undef x +} + +void bch2_fs_async_obj_exit(struct bch_fs *c) +{ + for (unsigned i = 0; i < ARRAY_SIZE(c->async_objs); i++) + fast_list_exit(&c->async_objs[i].list); +} + +int bch2_fs_async_obj_init(struct bch_fs *c) +{ + for (unsigned i = 0; i < ARRAY_SIZE(c->async_objs); i++) { + if (fast_list_init(&c->async_objs[i].list)) + return -BCH_ERR_ENOMEM_async_obj_init; + c->async_objs[i].idx = i; + } + +#define x(n) c->async_objs[BCH_ASYNC_OBJ_LIST_##n].obj_to_text = n##_obj_to_text; + BCH_ASYNC_OBJ_LISTS() +#undef x + + return 0; +} diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h new file mode 100644 index 000000000000..cd6489b8cf76 --- /dev/null +++ b/fs/bcachefs/async_objs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ASYNC_OBJS_H +#define _BCACHEFS_ASYNC_OBJS_H + +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS +static inline void __async_object_list_del(struct fast_list *head, unsigned idx) +{ + fast_list_remove(head, idx); +} + +static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx) +{ + int ret = fast_list_add(head, obj); + *idx = ret > 0 ? ret : 0; + return ret < 0 ? ret : 0; +} + +#define async_object_list_del(_c, _list, idx) \ + __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx) + +#define async_object_list_add(_c, _list, obj, idx) \ + __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx) + +void bch2_fs_async_obj_debugfs_init(struct bch_fs *); +void bch2_fs_async_obj_exit(struct bch_fs *); +int bch2_fs_async_obj_init(struct bch_fs *); + +#else /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */ + +#define async_object_list_del(_c, _n, idx) do {} while (0) + +static inline int __async_object_list_add(void) +{ + return 0; +} +#define async_object_list_add(_c, _n, obj, idx) __async_object_list_add() + +static inline void bch2_fs_async_obj_debugfs_init(struct bch_fs *c) {} +static inline void bch2_fs_async_obj_exit(struct bch_fs *c) {} +static inline int bch2_fs_async_obj_init(struct bch_fs *c) { return 0; } + +#endif /* CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS */ + +#endif /* _BCACHEFS_ASYNC_OBJS_H */ diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h new file mode 100644 index 000000000000..28cb73e3f56d --- /dev/null +++ b/fs/bcachefs/async_objs_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_ASYNC_OBJS_TYPES_H +#define _BCACHEFS_ASYNC_OBJS_TYPES_H + +#define BCH_ASYNC_OBJ_LISTS() + +enum bch_async_obj_lists { +#define x(n) BCH_ASYNC_OBJ_LIST_##n, + BCH_ASYNC_OBJ_LISTS() +#undef x + BCH_ASYNC_OBJ_NR +}; + +struct async_obj_list { + struct fast_list list; + void (*obj_to_text)(struct printbuf *, void *); + unsigned idx; +}; + +#endif /* _BCACHEFS_ASYNC_OBJS_TYPES_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3d18dbe0d6f5..94e3edd932e3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -209,6 +209,7 @@ #include "btree_journal_iter_types.h" #include "disk_accounting_types.h" #include "errcode.h" +#include "fast_list.h" #include "fifo.h" #include "nocow_locking_types.h" #include "opts.h" @@ -474,6 +475,7 @@ enum bch_time_stats { }; #include "alloc_types.h" +#include "async_objs_types.h" #include "btree_gc_types.h" #include "btree_types.h" #include "btree_node_scan_types.h" @@ -1027,6 +1029,10 @@ struct bch_fs { nocow_locks; struct rhashtable promote_table; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + struct async_obj_list async_objs[BCH_ASYNC_OBJ_NR]; +#endif + mempool_t compression_bounce[2]; mempool_t compress_workspace[BCH_COMPRESSION_OPT_NR]; size_t zstd_workspace_size; @@ -1115,6 +1121,7 @@ struct bch_fs { /* DEBUG JUNK */ struct dentry *fs_debug_dir; struct dentry *btree_debug_dir; + struct dentry *async_obj_dir; struct btree_debug btree_debug[BTREE_ID_NR]; struct btree *verify_data; struct btree_node *verify_ondisk; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 4cbb19c36fa1..079bc2b359cd 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -8,6 +8,7 @@ #include "bcachefs.h" #include "alloc_foreground.h" +#include "async_objs.h" #include "bkey_methods.h" #include "btree_cache.h" #include "btree_io.h" @@ -16,6 +17,7 @@ #include "btree_update.h" #include "btree_update_interior.h" #include "buckets.h" +#include "data_update.h" #include "debug.h" #include "error.h" #include "extents.h" @@ -306,23 +308,7 @@ out: #ifdef CONFIG_DEBUG_FS -/* XXX: bch_fs refcounting */ - -struct dump_iter { - struct bch_fs *c; - enum btree_id id; - struct bpos from; - struct bpos prev_node; - u64 iter; - - struct printbuf buf; - - char __user *ubuf; /* destination user buffer */ - size_t size; /* size of requested read */ - ssize_t ret; /* bytes read so far */ -}; - -static ssize_t flush_buf(struct dump_iter *i) +ssize_t bch2_debugfs_flush_buf(struct dump_iter *i) { if (i->buf.pos) { size_t bytes = min_t(size_t, i->buf.pos, i->size); @@ -360,7 +346,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file) return 0; } -static int bch2_dump_release(struct inode *inode, struct file *file) +int bch2_dump_release(struct inode *inode, struct file *file) { struct dump_iter *i = file->private_data; @@ -378,7 +364,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; - return flush_buf(i) ?: + return bch2_debugfs_flush_buf(i) ?: bch2_trans_run(i->c, for_each_btree_key(trans, iter, i->id, i->from, BTREE_ITER_prefetch| @@ -387,7 +373,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, prt_newline(&i->buf); bch2_trans_unlock(trans); i->from = bpos_successor(iter.pos); - flush_buf(i); + bch2_debugfs_flush_buf(i); }))) ?: i->ret; } @@ -408,7 +394,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, i->size = size; i->ret = 0; - ssize_t ret = flush_buf(i); + ssize_t ret = bch2_debugfs_flush_buf(i); if (ret) return ret; @@ -422,7 +408,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, ? bpos_successor(b->key.k.p) : b->key.k.p; - drop_locks_do(trans, flush_buf(i)); + drop_locks_do(trans, bch2_debugfs_flush_buf(i)); }))) ?: i->ret; } @@ -442,7 +428,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, i->size = size; i->ret = 0; - return flush_buf(i) ?: + return bch2_debugfs_flush_buf(i) ?: bch2_trans_run(i->c, for_each_btree_key(trans, iter, i->id, i->from, BTREE_ITER_prefetch| @@ -460,7 +446,7 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, bch2_bfloat_to_text(&i->buf, l->b, _k); bch2_trans_unlock(trans); i->from = bpos_successor(iter.pos); - flush_buf(i); + bch2_debugfs_flush_buf(i); }))) ?: i->ret; } @@ -521,7 +507,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct rhash_head *pos; struct btree *b; - ret = flush_buf(i); + ret = bch2_debugfs_flush_buf(i); if (ret) return ret; @@ -544,7 +530,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, ret = -ENOMEM; if (!ret) - ret = flush_buf(i); + ret = bch2_debugfs_flush_buf(i); return ret ?: i->ret; } @@ -618,7 +604,7 @@ restart: closure_put(&trans->ref); - ret = flush_buf(i); + ret = bch2_debugfs_flush_buf(i); if (ret) goto unlocked; @@ -631,7 +617,7 @@ unlocked: ret = -ENOMEM; if (!ret) - ret = flush_buf(i); + ret = bch2_debugfs_flush_buf(i); return ret ?: i->ret; } @@ -656,7 +642,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, i->ret = 0; while (1) { - err = flush_buf(i); + err = bch2_debugfs_flush_buf(i); if (err) return err; @@ -699,7 +685,7 @@ static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf, i->iter++; } - err = flush_buf(i); + err = bch2_debugfs_flush_buf(i); if (err) return err; @@ -757,7 +743,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, while (1) { struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; - err = flush_buf(i); + err = bch2_debugfs_flush_buf(i); if (err) return err; @@ -878,7 +864,7 @@ static ssize_t bch2_simple_print(struct file *file, char __user *buf, ret = -ENOMEM; if (!ret) - ret = flush_buf(i); + ret = bch2_debugfs_flush_buf(i); return ret ?: i->ret; } @@ -967,6 +953,8 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("write_points", 0400, c->fs_debug_dir, c->btree_debug, &write_points_ops); + bch2_fs_async_obj_debugfs_init(c); + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; diff --git a/fs/bcachefs/debug.h b/fs/bcachefs/debug.h index 2c37143b5fd1..52dbea736709 100644 --- a/fs/bcachefs/debug.h +++ b/fs/bcachefs/debug.h @@ -19,6 +19,24 @@ static inline void bch2_btree_verify(struct bch_fs *c, struct btree *b) } #ifdef CONFIG_DEBUG_FS +struct dump_iter { + struct bch_fs *c; + struct async_obj_list *list; + enum btree_id id; + struct bpos from; + struct bpos prev_node; + u64 iter; + + struct printbuf buf; + + char __user *ubuf; /* destination user buffer */ + size_t size; /* size of requested read */ + ssize_t ret; /* bytes read so far */ +}; + +ssize_t bch2_debugfs_flush_buf(struct dump_iter *); +int bch2_dump_release(struct inode *, struct file *); + void bch2_fs_debug_exit(struct bch_fs *); void bch2_fs_debug_init(struct bch_fs *); #else diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 6a4b3fe9ea99..1a52edc7c8d8 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -53,6 +53,7 @@ x(ENOMEM, ENOMEM_dio_write_bioset_init) \ x(ENOMEM, ENOMEM_nocow_flush_bioset_init) \ x(ENOMEM, ENOMEM_promote_table_init) \ + x(ENOMEM, ENOMEM_async_obj_init) \ x(ENOMEM, ENOMEM_compression_bounce_read_init) \ x(ENOMEM, ENOMEM_compression_bounce_write_init) \ x(ENOMEM, ENOMEM_compression_workspace_init) \ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index bed0f8a80212..f29965469b28 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -10,6 +10,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "async_objs.h" #include "bkey_sort.h" #include "btree_cache.h" #include "btree_gc.h" @@ -579,6 +580,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); bch2_fs_accounting_exit(c); + bch2_fs_async_obj_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); @@ -971,6 +973,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts, } ret = + bch2_fs_async_obj_init(c) ?: bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: -- 2.51.0 From 41e51769b8a649dd3db7070370cb6aa127f86307 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 21 Apr 2025 13:02:51 -0400 Subject: [PATCH 02/16] bcachefs: Make various async objs visible in debugfs Add async objs list for - promote_op - bch_read_bio - btree_read_bio - btree_write_bio This gets us introspection on in-flight async ops, and because under the hood it uses fast_lists (percpu slot buffer on top of a radix tree), it'll be fast enough to enable in production. This will be very helpful for debugging "something got stuck" issues, which have been cropping up from time to time (in the CI, especially with folio writeback). Signed-off-by: Kent Overstreet --- fs/bcachefs/async_objs.c | 23 +++++++++++++++++++- fs/bcachefs/async_objs_types.h | 6 +++++- fs/bcachefs/btree_io.c | 12 +++++++++++ fs/bcachefs/btree_io.h | 8 +++++++ fs/bcachefs/data_update.h | 15 +++++++++++++ fs/bcachefs/io_read.c | 39 ++++++++++++++++++++++------------ fs/bcachefs/io_read.h | 12 +++++++++++ 7 files changed, 100 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c index 8d78f390a759..57e2fe421461 100644 --- a/fs/bcachefs/async_objs.c +++ b/fs/bcachefs/async_objs.c @@ -12,6 +12,28 @@ #include +static void promote_obj_to_text(struct printbuf *out, void *obj) +{ + bch2_promote_op_to_text(out, obj); +} + +static void rbio_obj_to_text(struct printbuf *out, void *obj) +{ + bch2_read_bio_to_text(out, obj); +} + +static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj) +{ + struct btree_read_bio *rbio = obj; + bch2_btree_read_bio_to_text(out, rbio); +} + +static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj) +{ + struct btree_write_bio *wbio = obj; + bch2_bio_to_text(out, &wbio->wbio.bio); +} + static int bch2_async_obj_list_open(struct inode *inode, struct file *file) { struct async_obj_list *list = inode->i_private; @@ -65,7 +87,6 @@ static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf, return ret ?: i->ret; } -__maybe_unused static const struct file_operations async_obj_ops = { .owner = THIS_MODULE, .open = bch2_async_obj_list_open, diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h index 28cb73e3f56d..310a4f90f49b 100644 --- a/fs/bcachefs/async_objs_types.h +++ b/fs/bcachefs/async_objs_types.h @@ -2,7 +2,11 @@ #ifndef _BCACHEFS_ASYNC_OBJS_TYPES_H #define _BCACHEFS_ASYNC_OBJS_TYPES_H -#define BCH_ASYNC_OBJ_LISTS() +#define BCH_ASYNC_OBJ_LISTS() \ + x(promote) \ + x(rbio) \ + x(btree_read_bio) \ + x(btree_write_bio) enum bch_async_obj_lists { #define x(n) BCH_ASYNC_OBJ_LIST_##n, diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 8fe9e0fc6629..84dae4c1ec13 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "async_objs.h" #include "bkey_buf.h" #include "bkey_methods.h" #include "bkey_sort.h" @@ -1376,6 +1377,7 @@ start: } } + async_object_list_del(c, btree_read_bio, rb->list_idx); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); @@ -1416,6 +1418,11 @@ static void btree_node_read_endio(struct bio *bio) queue_work(c->btree_read_complete_wq, &rb->work); } +void bch2_btree_read_bio_to_text(struct printbuf *out, struct btree_read_bio *rbio) +{ + bch2_bio_to_text(out, &rbio->bio); +} + struct btree_node_read_all { struct closure cl; struct bch_fs *c; @@ -1748,6 +1755,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bio->bi_end_io = btree_node_read_endio; bch2_bio_map(bio, b->data, btree_buf_bytes(b)); + async_object_list_add(c, btree_read_bio, rb, &rb->list_idx); + if (rb->have_ioref) { this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], bio_sectors(bio)); @@ -2121,6 +2130,7 @@ static void btree_node_write_work(struct work_struct *work) goto err; } out: + async_object_list_del(c, btree_write_bio, wbio->list_idx); bio_put(&wbio->wbio.bio); btree_node_write_done(c, b, start_time); return; @@ -2473,6 +2483,8 @@ do_write: atomic64_inc(&c->btree_write_stats[type].nr); atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); + async_object_list_add(c, btree_write_bio, wbio, &wbio->list_idx); + INIT_WORK(&wbio->work, btree_write_submit); queue_work(c->btree_write_submit_wq, &wbio->work); return; diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index dbf76d22c660..afdb11a9f71c 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -41,6 +41,9 @@ struct btree_read_bio { u64 start_time; unsigned have_ioref:1; unsigned idx:7; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + unsigned list_idx; +#endif struct extent_ptr_decoded pick; struct work_struct work; struct bio bio; @@ -53,6 +56,9 @@ struct btree_write_bio { unsigned data_bytes; unsigned sector_offset; u64 start_time; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + unsigned list_idx; +#endif struct bch_write_bio wbio; }; @@ -133,6 +139,8 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); +void bch2_btree_read_bio_to_text(struct printbuf *, struct btree_read_bio *); + int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, unsigned); diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index ed05125867da..5e14d13568de 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -50,6 +50,21 @@ struct data_update { struct bio_vec *bvecs; }; +struct promote_op { + struct rcu_head rcu; + u64 start_time; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + unsigned list_idx; +#endif + + struct rhash_head hash; + struct bpos pos; + + struct work_struct work; + struct data_update write; + struct bio_vec bi_inline_vecs[]; /* must be last */ +}; + void bch2_data_update_to_text(struct printbuf *, struct data_update *); void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index df96e2c8ceda..abfd3a4c1d7d 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -9,6 +9,7 @@ #include "bcachefs.h" #include "alloc_background.h" #include "alloc_foreground.h" +#include "async_objs.h" #include "btree_update.h" #include "buckets.h" #include "checksum.h" @@ -88,18 +89,6 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) /* Cache promotion on read */ -struct promote_op { - struct rcu_head rcu; - u64 start_time; - - struct rhash_head hash; - struct bpos pos; - - struct work_struct work; - struct data_update write; - struct bio_vec bi_inline_vecs[]; /* must be last */ -}; - static const struct rhashtable_params bch_promote_params = { .head_offset = offsetof(struct promote_op, hash), .key_offset = offsetof(struct promote_op, pos), @@ -177,6 +166,8 @@ static noinline void promote_free(struct bch_read_bio *rbio) bch_promote_params); BUG_ON(ret); + async_object_list_del(c, promote, op->list_idx); + bch2_data_update_exit(&op->write); enumerated_ref_put(&c->writes, BCH_WRITE_REF_promote); @@ -262,6 +253,10 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, goto err; } + ret = async_object_list_add(c, promote, op, &op->list_idx); + if (ret < 0) + goto err_remove_hash; + ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), &orig->opts, @@ -273,7 +268,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, * -BCH_ERR_ENOSPC_disk_reservation: */ if (ret) - goto err_remove_hash; + goto err_remove_list; rbio_init_fragment(&op->write.rbio.bio, orig); op->write.rbio.bounce = true; @@ -281,6 +276,8 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, op->write.op.end_io = promote_done; return &op->write.rbio; +err_remove_list: + async_object_list_del(c, promote, op->list_idx); err_remove_hash: BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, bch_promote_params)); @@ -353,6 +350,18 @@ nopromote: return NULL; } +void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +{ + if (!op->write.read_done) { + prt_printf(out, "parent read: %px\n", op->write.rbio.parent); + printbuf_indent_add(out, 2); + bch2_read_bio_to_text(out, op->write.rbio.parent); + printbuf_indent_sub(out, 2); + } + + bch2_data_update_to_text(out, &op->write); +} + /* Read */ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, @@ -421,6 +430,8 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) else promote_free(rbio); } else { + async_object_list_del(rbio->c, rbio, rbio->list_idx); + if (rbio->bounce) bch2_bio_free_pages_pool(rbio->c, &rbio->bio); @@ -1246,6 +1257,8 @@ retry_pick: rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; + async_object_list_add(c, rbio, rbio, &rbio->list_idx); + if (rbio->bounce) trace_and_count(c, io_read_bounce, &rbio->bio); diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 13bb68eb91c4..c08b9c047b3e 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -4,6 +4,7 @@ #include "bkey_buf.h" #include "btree_iter.h" +#include "extents_types.h" #include "reflink.h" struct bch_read_bio { @@ -48,6 +49,9 @@ struct bch_read_bio { u16 _state; }; s16 ret; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + unsigned list_idx; +#endif struct extent_ptr_decoded pick; @@ -173,6 +177,9 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, rbio->split = true; rbio->parent = orig; rbio->opts = orig->opts; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + rbio->list_idx = 0; +#endif return rbio; } @@ -190,9 +197,14 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, rbio->ret = 0; rbio->opts = opts; rbio->bio.bi_end_io = end_io; +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + rbio->list_idx = 0; +#endif return rbio; } +struct promote_op; +void bch2_promote_op_to_text(struct printbuf *, struct promote_op *); void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *); void bch2_fs_io_read_exit(struct bch_fs *); -- 2.51.0 From dbc18c97f1f0d336e3c4f6bb50f34c5255216995 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 06:03:33 -0400 Subject: [PATCH 03/16] bcachefs: print_string_as_lines: avoid printing empty line If the final line in in the message to be printed is blang, don't print it. This happens with indented printbufs - after a newline we emit spaces up to the indent level. Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 7e6ebe8cd9ea..1cff407c8c9d 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -252,6 +252,16 @@ void bch2_prt_u64_base2(struct printbuf *out, u64 v) bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); } +static bool string_is_spaces(const char *str) +{ + while (*str) { + if (*str != ' ') + return false; + str++; + } + return true; +} + void bch2_print_string_as_lines(const char *prefix, const char *lines, bool nonblocking) { @@ -272,6 +282,9 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines, while (*lines) { p = strchrnul(lines, '\n'); + if (!*p && string_is_spaces(lines)) + break; + printk("%s%.*s\n", prefix, (int) (p - lines), lines); if (!*p) break; -- 2.51.0 From 353b89c6e6df522c221997a527358854b1c826d7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 05:45:48 -0400 Subject: [PATCH 04/16] bcachefs: bch2_io_failures_to_text() Pretty printer for bch_io_failures, to be used for better read error messages. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/extents.h | 2 ++ 2 files changed, 42 insertions(+) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ef116c55f0a7..8a881b30fd4c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -45,6 +45,46 @@ static void bch2_extent_crc_pack(union bch_extent_crc *, struct bch_extent_crc_unpacked, enum bch_extent_entry_type); +void bch2_io_failures_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_io_failures *failed) +{ + static const char * const error_types[] = { + "io", "checksum", "ec reconstruct", NULL + }; + + for (struct bch_dev_io_failures *f = failed->devs; + f < failed->devs + failed->nr; + f++) { + bch2_printbuf_make_room(out, 1024); + rcu_read_lock(); + out->atomic++; + struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev); + if (ca) + prt_str(out, ca->name); + else + prt_printf(out, "(invalid device %u)", f->dev); + --out->atomic; + rcu_read_unlock(); + + prt_char(out, ' '); + + unsigned errflags = + ((!!f->failed_io) << 0) | + ((!!f->failed_csum_nr) << 1) | + ((!!f->failed_ec) << 2); + + if (is_power_of_2(errflags)) { + prt_bitflags(out, error_types, errflags); + prt_str(out, " error"); + } else { + prt_str(out, "errors: "); + prt_bitflags(out, error_types, errflags); + } + prt_newline(out); + } +} + struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f, unsigned dev) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 9fe153183b36..9dd2655a5774 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -392,6 +392,8 @@ out: \ /* utility code common to all keys with pointers: */ +void bch2_io_failures_to_text(struct printbuf *, struct bch_fs *, + struct bch_io_failures *); struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *, unsigned); void bch2_mark_io_failure(struct bch_io_failures *, -- 2.51.0 From 156d9e8341e8aad55b0e79b2dc54003cb14e5077 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 05:49:20 -0400 Subject: [PATCH 05/16] bcachefs: Emit a single log message on data read error Instead of emitting a message immediately when we get an error in the read path, and then another at the end if we successfully retry - emit one single log message before returning from bch2_rbio_retry(). Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 10 +++++ fs/bcachefs/io_read.c | 83 +++++++++++------------------------------- 2 files changed, 31 insertions(+), 62 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 94e3edd932e3..8989ea4a3934 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -295,6 +295,16 @@ do { \ bch2_print(_c, __VA_ARGS__); \ } while (0) +#define bch2_print_str_ratelimited(_c, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + bch2_print_str(_c, __VA_ARGS__); \ +} while (0) + #define bch_info(c, fmt, ...) \ bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_info_ratelimited(c, fmt, ...) \ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index abfd3a4c1d7d..cc708d46557e 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -582,7 +582,6 @@ static void bch2_rbio_retry(struct work_struct *work) .inum = rbio->read_pos.inode, }; struct bch_io_failures failed = { .nr = 0 }; - int orig_error = rbio->ret; struct btree_trans *trans = bch2_trans_get(c); @@ -623,10 +622,11 @@ static void bch2_rbio_retry(struct work_struct *work) if (ret) { rbio->ret = ret; rbio->bio.bi_status = BLK_STS_IOERR; - } else if (orig_error != -BCH_ERR_data_read_retry_csum_err_maybe_userspace && - orig_error != -BCH_ERR_data_read_ptr_stale_race && - !failed.nr) { + } + + if (failed.nr || ret) { struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, @@ -634,9 +634,22 @@ static void bch2_rbio_retry(struct work_struct *work) read_pos.offset << 9)); if (rbio->data_update) prt_str(&buf, "(internal move) "); - prt_str(&buf, "successful retry"); - bch_err_ratelimited(c, "%s", buf.buf); + prt_str(&buf, "data read error, "); + if (!ret) + prt_str(&buf, "successful retry"); + else + prt_str(&buf, bch2_err_str(ret)); + prt_newline(&buf); + + if (!bkey_deleted(&sk.k->k)) { + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); + prt_newline(&buf); + } + + bch2_io_failures_to_text(&buf, c, &failed); + + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } @@ -671,27 +684,6 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, } } -static void bch2_read_io_err(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bio *bio = &rbio->bio; - struct bch_fs *c = rbio->c; - struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; - struct printbuf buf = PRINTBUF; - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); - - if (ca) - bch_err_ratelimited(ca, "%s", buf.buf); - else - bch_err_ratelimited(c, "%s", buf.buf); - - printbuf_exit(&buf); - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status); -} - static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { @@ -755,31 +747,6 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) __bch2_rbio_narrow_crcs(trans, rbio)); } -static void bch2_read_csum_err(struct work_struct *work) -{ - struct bch_read_bio *rbio = - container_of(work, struct bch_read_bio, work); - struct bch_fs *c = rbio->c; - struct bio *src = &rbio->bio; - struct bch_extent_crc_unpacked crc = rbio->pick.crc; - struct nonce nonce = extent_nonce(rbio->version, crc); - struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); - struct printbuf buf = PRINTBUF; - - bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); - prt_str(&buf, "data "); - bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); - - struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; - if (ca) - bch_err_ratelimited(ca, "%s", buf.buf); - else - bch_err_ratelimited(c, "%s", buf.buf); - - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); - printbuf_exit(&buf); -} - static void bch2_read_decompress_err(struct work_struct *work) { struct bch_read_bio *rbio = @@ -940,7 +907,7 @@ out: memalloc_nofs_restore(nofs_flags); return; csum_err: - bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_csum_err, BLK_STS_IOERR); goto out; decompression_err: bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); @@ -966,7 +933,7 @@ static void bch2_read_endio(struct bio *bio) rbio->bio.bi_end_io = rbio->end_io; if (unlikely(bio->bi_status)) { - bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); + bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_io_err, bio->bi_status); return; } @@ -1292,14 +1259,6 @@ retry_pick: if (likely(!rbio->pick.do_ec_reconstruct)) { if (unlikely(!rbio->have_ioref)) { - struct printbuf buf = PRINTBUF; - bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); - prt_printf(&buf, "no device to read from:\n "); - bch2_bkey_val_to_text(&buf, c, k); - - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); - bch2_rbio_error(rbio, -BCH_ERR_data_read_retry_device_offline, BLK_STS_IOERR); -- 2.51.0 From b3bbd47f8314997b7a13cfe0b2048a19a5b62fcf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 09:16:49 -0400 Subject: [PATCH 06/16] bcachefs: Kill redundant error message in topology repair The btree node read path already logs btree node read errors, this isn't needed. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2e72784332ff..fecf88079127 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -371,10 +371,7 @@ again: prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k)); - if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), - trans, btree_node_read_error, - "Topology repair: unreadable btree node at\n%s", - buf.buf)) { + if (bch2_err_matches(ret, EIO)) { bch2_btree_node_evict(trans, cur_k.k); cur = NULL; ret = bch2_journal_key_delete(c, b->c.btree_id, -- 2.51.0 From 3be132f93cff2586be482cb81807ff83899f572e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:09:56 -0400 Subject: [PATCH 07/16] bcachefs: bch2_btree_lost_data() now handles snapshots tree We have a consolidated places for "this btree lost data, run this repair", so use it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 4 ++++ fs/bcachefs/snapshot.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 2a8bcb9b1dd2..8f45d9e3a47e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -95,6 +95,10 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) case BTREE_ID_accounting: ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; + case BTREE_ID_snapshots: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + goto out; default: ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 14ea09ccee37..94cf60f76b64 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1743,10 +1743,6 @@ int bch2_snapshots_read(struct bch_fs *c) BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) && test_bit(BCH_FS_may_go_rw, &c->flags)); - if (bch2_err_matches(ret, EIO) || - (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))) - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots); - return ret; } -- 2.51.0 From 3aecbb01a168bf6396955e5da0533f6e5f000441 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:13:28 -0400 Subject: [PATCH 08/16] bcachefs: Remove redundant calls to btree_lost_data() The btree node read path calls this before returning the read error. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 7 ------- fs/bcachefs/recovery.c | 3 --- 2 files changed, 10 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index fecf88079127..92ae31737a24 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -378,10 +378,6 @@ again: b->c.level, cur_k.k->k.p); if (ret) break; - - ret = bch2_btree_lost_data(c, b->c.btree_id); - if (ret) - break; continue; } @@ -543,9 +539,6 @@ int bch2_check_topology(struct bch_fs *c) bch2_btree_id_to_text(&buf, i); if (r->error) { - ret = bch2_btree_lost_data(c, i); - if (ret) - break; reconstruct_root: bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 8f45d9e3a47e..a0b42cca86fb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -587,9 +587,6 @@ static int read_btree_roots(struct bch_fs *c) buf.buf, bch2_err_str(ret))) { if (btree_id_is_alloc(i)) r->error = 0; - - ret = bch2_btree_lost_data(c, i); - BUG_ON(ret); } } -- 2.51.0 From 300904700f14e4e05db2a16cf8e3890c8e856cf8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:28:56 -0400 Subject: [PATCH 09/16] bcachefs: kill bch2_run_explicit_recovery_pass_persistent() No longer has users, so we can kill it and rename bch2_run_explicit_recovery_pass_persistent_locked(). Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 30 +++++++++++++++--------------- fs/bcachefs/recovery_passes.c | 19 +------------------ fs/bcachefs/recovery_passes.h | 1 - 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index a0b42cca86fb..b1afbe446d9e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -50,24 +50,24 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,30 +77,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 87150dd30f4b..9be715a49454 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -193,7 +193,7 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); @@ -204,23 +204,6 @@ int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, return bch2_run_explicit_recovery_pass(c, pass); } -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) -{ - enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass); - - mutex_lock(&c->sb_lock); - struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); - - if (!test_bit_le64(s, ext->recovery_passes_required)) { - __set_bit_le64(s, ext->recovery_passes_required); - bch2_write_super(c); - } - mutex_unlock(&c->sb_lock); - - return bch2_run_explicit_recovery_pass(c, pass); -} - static void bch2_clear_recovery_pass_required(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index e19a8aaba2f8..62957e268a66 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -12,7 +12,6 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); -- 2.51.0 From 600a9207c8def056b4681fde8158c463576d5aca Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 09:27:10 -0400 Subject: [PATCH 10/16] bcachefs: Plumb printbuf through bch2_btree_lost_data() Part of the ongoing project to improve error messages by building them up in printbufs and emitting them all at once, so that we can easily see what events are related in the log. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 28 +++++++++++++++-------- fs/bcachefs/recovery.c | 42 ++++++++++++++++++----------------- fs/bcachefs/recovery.h | 2 +- fs/bcachefs/recovery_passes.c | 9 ++++---- fs/bcachefs/recovery_passes.h | 4 +++- 5 files changed, 50 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 84dae4c1ec13..41df1035ba2f 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1304,7 +1304,6 @@ fsck_err: retry_read = 1; } else { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); } goto out; } @@ -1372,15 +1371,16 @@ start: if (!can_retry) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); break; } } - - async_object_list_del(c, btree_read_bio, rb->list_idx); - bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], - rb->start_time); - bio_put(&rb->bio); + if (btree_node_read_error(b)) { + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + } if ((saw_error || btree_node_need_rewrite(b)) && @@ -1398,6 +1398,10 @@ start: bch2_btree_node_rewrite_async(c, b); } + async_object_list_del(c, btree_read_bio, rb->list_idx); + bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], + rb->start_time); + bio_put(&rb->bio); printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); @@ -1587,7 +1591,12 @@ fsck_err: if (ret) { set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); + + struct printbuf buf = PRINTBUF; + bch2_btree_lost_data(c, &buf, b->c.btree_id); + if (buf.pos) + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1721,6 +1730,8 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, prt_str(&buf, "btree node read error: no device to read from\n at "); bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); + bch2_btree_lost_data(c, &buf, b->c.btree_id); bch_err_ratelimited(c, "%s", buf.buf); if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && @@ -1728,7 +1739,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, bch2_fatal_error(c); set_btree_node_read_error(b); - bch2_btree_lost_data(c, b->c.btree_id); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b1afbe446d9e..d13a6df289c7 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -33,7 +33,9 @@ #include #include -int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, + struct printbuf *msg, + enum btree_id btree) { u64 b = BIT_ULL(btree); int ret = 0; @@ -42,32 +44,32 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { - struct printbuf buf = PRINTBUF; - bch2_btree_id_to_text(&buf, btree); - bch_err(c, "flagging btree %s lost data", buf.buf); - printbuf_exit(&buf); + prt_printf(msg, "flagging btree "); + bch2_btree_id_to_text(msg, btree); + prt_printf(msg, " lost data\n"); + ext->btrees_lost_data |= cpu_to_le64(b); } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -77,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index d858ba674eaa..c023f52fc2d6 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -int bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, struct printbuf *, enum btree_id); void bch2_reconstruct_alloc(struct bch_fs *); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 9be715a49454..347e17fe7901 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -141,13 +141,13 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, if (pass < BCH_RECOVERY_PASS_set_may_go_rw && c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { if (print) - prt_printf(out, "need recovery pass %s (%u), but already rw", + prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); return -BCH_ERR_cannot_rewind_recovery; } if (print) - prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)", + prt_printf(out, "running explicit recovery pass %s (%u), currently at %s (%u)\n", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -194,14 +194,15 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - enum bch_recovery_pass pass) + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); - return bch2_run_explicit_recovery_pass(c, pass); + return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } static void bch2_clear_recovery_pass_required(struct bch_fs *c, diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 62957e268a66..1f91be4258c5 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -12,7 +12,9 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); + +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); int bch2_run_recovery_passes(struct bch_fs *); -- 2.51.0 From d31f155964aee6e6141967fc392a9a99b221e117 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 09:02:15 -0400 Subject: [PATCH 11/16] bcachefs: bch2_fsck_err_opt() Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/bcachefs/error.h | 4 ++++ 3 files changed, 47 insertions(+) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 1a52edc7c8d8..4aac0182cbed 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -175,6 +175,7 @@ x(0, backpointer_to_overwritten_btree_node) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ + x(BCH_ERR_fsck, fsck_ask) \ x(BCH_ERR_fsck, fsck_fix) \ x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 20495062d6e1..731733e12e6b 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -393,6 +393,48 @@ bool __bch2_count_fsck_err(struct bch_fs *c, return print && !repeat; } +int bch2_fsck_err_opt(struct bch_fs *c, + enum bch_fsck_flags flags, + enum bch_sb_error_id err) +{ + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + flags |= fsck_flags_extra[err]; + + if (test_bit(BCH_FS_fsck_running, &c->flags)) { + if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) + return -BCH_ERR_fsck_repair_unimplemented; + + switch (c->opts.fix_errors) { + case FSCK_FIX_exit: + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_yes: + if (flags & FSCK_CAN_FIX) + return -BCH_ERR_fsck_fix; + fallthrough; + case FSCK_FIX_no: + if (flags & FSCK_CAN_IGNORE) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + case FSCK_FIX_ask: + if (flags & FSCK_AUTOFIX) + return -BCH_ERR_fsck_fix; + return -BCH_ERR_fsck_ask; + default: + BUG(); + } + } else { + if ((flags & FSCK_AUTOFIX) && + (c->opts.errors == BCH_ON_ERROR_continue || + c->opts.errors == BCH_ON_ERROR_fix_safe)) + return -BCH_ERR_fsck_fix; + + if (c->opts.errors == BCH_ON_ERROR_continue && + (flags & FSCK_CAN_IGNORE)) + return -BCH_ERR_fsck_ignore; + return -BCH_ERR_fsck_errors_not_fixed; + } +} + int __bch2_fsck_err(struct bch_fs *c, struct btree_trans *trans, enum bch_fsck_flags flags, diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 0b3ede1c2015..d89dd270b2e5 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -80,6 +80,10 @@ bool __bch2_count_fsck_err(struct bch_fs *, enum bch_sb_error_id, struct printbu #define bch2_count_fsck_err(_c, _err, ...) \ __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) +int bch2_fsck_err_opt(struct bch_fs *, + enum bch_fsck_flags, + enum bch_sb_error_id); + __printf(5, 6) __cold int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, enum bch_fsck_flags, -- 2.51.0 From 9c2472658be20d04c6dc34d5314a7e99cc4fed25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 20:38:50 -0400 Subject: [PATCH 12/16] bcachefs: bch2_mark_btree_validate_failure() Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 31 ++++++++++++++++++++++++++----- fs/bcachefs/extents.h | 1 + fs/bcachefs/extents_types.h | 1 + 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 8a881b30fd4c..c4fe4ffd41f1 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -56,6 +56,14 @@ void bch2_io_failures_to_text(struct printbuf *out, for (struct bch_dev_io_failures *f = failed->devs; f < failed->devs + failed->nr; f++) { + unsigned errflags = + ((!!f->failed_io) << 0) | + ((!!f->failed_csum_nr) << 1) | + ((!!f->failed_ec) << 2); + + if (!errflags) + continue; + bch2_printbuf_make_room(out, 1024); rcu_read_lock(); out->atomic++; @@ -69,11 +77,6 @@ void bch2_io_failures_to_text(struct printbuf *out, prt_char(out, ' '); - unsigned errflags = - ((!!f->failed_io) << 0) | - ((!!f->failed_csum_nr) << 1) | - ((!!f->failed_ec) << 2); - if (is_power_of_2(errflags)) { prt_bitflags(out, error_types, errflags); prt_str(out, " error"); @@ -119,6 +122,22 @@ void bch2_mark_io_failure(struct bch_io_failures *failed, f->failed_csum_nr++; } +void bch2_mark_btree_validate_failure(struct bch_io_failures *failed, + unsigned dev) +{ + struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + memset(f, 0, sizeof(*f)); + f->dev = dev; + } + + f->failed_btree_validate = true; +} + static inline u64 dev_latency(struct bch_dev *ca) { return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX; @@ -219,6 +238,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (ca && ca->mi.state != BCH_MEMBER_STATE_failed) { have_io_errors |= f->failed_io; + have_io_errors |= f->failed_btree_validate; have_io_errors |= f->failed_ec; } have_csum_errors |= !!f->failed_csum_nr; @@ -226,6 +246,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (p.has_ec && (f->failed_io || f->failed_csum_nr)) p.do_ec_reconstruct = true; else if (f->failed_io || + f->failed_btree_validate || f->failed_csum_nr > c->opts.checksum_err_retry_nr) continue; } diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 9dd2655a5774..b8590e51b76e 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -398,6 +398,7 @@ struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *, unsigned); void bch2_mark_io_failure(struct bch_io_failures *, struct extent_ptr_decoded *, bool); +void bch2_mark_btree_validate_failure(struct bch_io_failures *, unsigned); int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, struct bch_io_failures *, struct extent_ptr_decoded *, int); diff --git a/fs/bcachefs/extents_types.h b/fs/bcachefs/extents_types.h index e51529dca4c2..b23ce4a373c0 100644 --- a/fs/bcachefs/extents_types.h +++ b/fs/bcachefs/extents_types.h @@ -34,6 +34,7 @@ struct bch_io_failures { u8 dev; unsigned failed_csum_nr:6, failed_io:1, + failed_btree_validate:1, failed_ec:1; } devs[BCH_REPLICAS_MAX + 1]; }; -- 2.51.0 From cd3cdb1ef706a1ac725194d81858d58375739b25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 22 Apr 2025 09:14:19 -0400 Subject: [PATCH 13/16] bcachefs: Single err message for btree node reads Like we just did with the data read path, emit a single error message per btree node reads, nicely formatted, with all the actions we took grouped together. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 254 +++++++++++++++++++++++------------------ fs/bcachefs/btree_io.h | 4 +- fs/bcachefs/debug.c | 4 +- 3 files changed, 151 insertions(+), 111 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 41df1035ba2f..e079e12adf86 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -516,19 +516,23 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) static void btree_err_msg(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca, + bool print_pos, struct btree *b, struct bset *i, struct bkey_packed *k, - unsigned offset, int write) + unsigned offset, int rw) { - prt_printf(out, bch2_log_msg(c, "%s"), - write == READ - ? "error validating btree node " - : "corrupt btree node before write "); + if (print_pos) { + prt_str(out, rw == READ + ? "error validating btree node " + : "corrupt btree node before write "); + prt_printf(out, "at btree "); + bch2_btree_pos_to_text(out, c, b); + prt_newline(out); + } + if (ca) - prt_printf(out, "on %s ", ca->name); - prt_printf(out, "at btree "); - bch2_btree_pos_to_text(out, c, b); + prt_printf(out, "%s ", ca->name); - prt_printf(out, "\nnode offset %u/%u", + prt_printf(out, "node offset %u/%u", b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); if (i) prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); @@ -539,75 +543,110 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, prt_str(out, ": "); } -__printf(10, 11) +__printf(11, 12) static int __btree_err(int ret, struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, struct bkey_packed *k, - int write, - bool have_retry, + int rw, enum bch_sb_error_id err_type, + struct bch_io_failures *failed, + struct printbuf *err_msg, const char *fmt, ...) { - bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; + if (c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + return -BCH_ERR_fsck_fix; + + bool have_retry = false; + int ret2; + + if (ca) { + bch2_mark_btree_validate_failure(failed, ca->dev_idx); + + struct extent_ptr_decoded pick; + have_retry = !bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + failed, &pick, -1); + } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) ret = -BCH_ERR_btree_node_read_err_fixable; if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = -BCH_ERR_btree_node_read_err_bad_node; - if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) - bch2_sb_error_count(c, err_type); + bch2_sb_error_count(c, err_type); + + bool print_deferred = err_msg && + rw == READ && + !(test_bit(BCH_FS_fsck_running, &c->flags) && + c->opts.fix_errors == FSCK_FIX_ask); struct printbuf out = PRINTBUF; - if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { - printbuf_indent_add_nextline(&out, 2); -#ifdef BCACHEFS_LOG_PREFIX - prt_printf(&out, bch2_log_msg(c, "")); -#endif - } + bch2_log_msg_start(c, &out); + + if (!print_deferred) + err_msg = &out; - btree_err_msg(&out, c, ca, b, i, k, b->written, write); + btree_err_msg(err_msg, c, ca, !print_deferred, b, i, k, b->written, rw); va_list args; va_start(args, fmt); - prt_vprintf(&out, fmt, args); + prt_vprintf(err_msg, fmt, args); va_end(args); - if (write == WRITE) { + if (print_deferred) { + prt_newline(err_msg); + + switch (ret) { + case -BCH_ERR_btree_node_read_err_fixable: + ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; + goto fsck_err; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; + goto out; + case -BCH_ERR_btree_node_read_err_bad_node: + prt_str(&out, ", "); + ret = __bch2_topology_error(c, &out); + break; + } + + goto out; + } + + if (rw == WRITE) { prt_str(&out, ", "); ret = __bch2_inconsistent_error(c, &out) ? -BCH_ERR_fsck_errors_not_fixed : 0; - silent = false; + goto print; } switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: - ret = !silent - ? __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf) - : -BCH_ERR_fsck_fix; - if (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore) + ret2 = __bch2_fsck_err(c, NULL, FSCK_CAN_FIX, err_type, "%s", out.buf); + if (ret2 != -BCH_ERR_fsck_fix && + ret2 != -BCH_ERR_fsck_ignore) { + ret = ret2; goto fsck_err; - ret = -BCH_ERR_fsck_fix; + } + + if (!have_retry) + ret = -BCH_ERR_fsck_fix; goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); ret = __bch2_topology_error(c, &out); - if (ret) - silent = false; - break; - case -BCH_ERR_btree_node_read_err_incompatible: - ret = -BCH_ERR_fsck_errors_not_fixed; - silent = false; break; } - - if (!silent) - bch2_print_str(c, KERN_ERR, out.buf); +print: + bch2_print_str(c, KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); @@ -616,8 +655,9 @@ fsck_err: #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ ({ \ - int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \ + int _ret = __btree_err(type, c, ca, b, i, k, write, \ BCH_FSCK_ERR_##_err_type, \ + failed, err_msg, \ msg, ##__VA_ARGS__); \ \ if (_ret != -BCH_ERR_fsck_fix) { \ @@ -625,7 +665,7 @@ fsck_err: goto fsck_err; \ } \ \ - *saw_error = true; \ + true; \ }) #define btree_err_on(cond, ...) ((cond) ? btree_err(__VA_ARGS__) : false) @@ -683,8 +723,9 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct btree *b, struct bset *i, - unsigned offset, unsigned sectors, - int write, bool have_retry, bool *saw_error) + unsigned offset, unsigned sectors, int write, + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); @@ -897,7 +938,8 @@ static inline int btree_node_read_bkey_cmp(const struct btree *b, static int validate_bset_keys(struct bch_fs *c, struct btree *b, struct bset *i, int write, - bool have_retry, bool *saw_error) + struct bch_io_failures *failed, + struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; @@ -1010,7 +1052,9 @@ fsck_err: } int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, - struct btree *b, bool have_retry, bool *saw_error) + struct btree *b, + struct bch_io_failures *failed, + struct printbuf *err_msg) { struct btree_node_entry *bne; struct sort_iter *iter; @@ -1023,7 +1067,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; - int ret = 0, retry_read = 0, write = READ; + int ret = 0, write = READ; u64 start_time = local_clock(); b->version_ondisk = U16_MAX; @@ -1157,15 +1201,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, b->version_ondisk = min(b->version_ondisk, le16_to_cpu(i->version)); - ret = validate_bset(c, ca, b, i, b->written, sectors, - READ, have_retry, saw_error); + ret = validate_bset(c, ca, b, i, b->written, sectors, READ, failed, err_msg); if (ret) goto fsck_err; if (!b->written) btree_node_set_format(b, b->data->format); - ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); + ret = validate_bset_keys(c, b, i, READ, failed, err_msg); if (ret) goto fsck_err; @@ -1293,19 +1336,11 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (!ptr_written) set_btree_node_need_rewrite(b); -out: +fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); - return retry_read; -fsck_err: - if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) { - retry_read = 1; - } else { - set_btree_node_read_error(b); - } - goto out; + return ret; } static void btree_node_read_work(struct work_struct *work) @@ -1317,15 +1352,25 @@ static void btree_node_read_work(struct work_struct *work) struct btree *b = rb->b; struct bio *bio = &rb->bio; struct bch_io_failures failed = { .nr = 0 }; + int ret = 0; + struct printbuf buf = PRINTBUF; - bool saw_error = false; - bool retry = false; - bool can_retry; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "btree node read error at btree "); + bch2_btree_pos_to_text(&buf, c, b); + prt_newline(&buf); goto start; while (1) { - retry = true; - bch_info(c, "retrying read"); + ret = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick, -1); + if (ret) { + set_btree_node_read_error(b); + break; + } + ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; rb->start_time = local_clock(); @@ -1343,60 +1388,54 @@ static void btree_node_read_work(struct work_struct *work) bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, rb->start_time, !bio->bi_status); start: - printbuf_reset(&buf); - bch2_btree_pos_to_text(&buf, c, b); - - if (ca && bio->bi_status) - bch_err_dev_ratelimited(ca, - "btree read error %s for %s", - bch2_blk_status_to_str(bio->bi_status), buf.buf); if (rb->have_ioref) enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = false; - bch2_mark_io_failure(&failed, &rb->pick, false); - - can_retry = bch2_bkey_pick_read_device(c, - bkey_i_to_s_c(&b->key), - &failed, &rb->pick, -1) > 0; - - if (!bio->bi_status && - !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { - if (retry) - bch_info(c, "retry success"); - break; + if (bio->bi_status) { + bch2_mark_io_failure(&failed, &rb->pick, false); + continue; } - saw_error = true; + ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); + if (ret == -BCH_ERR_btree_node_read_err_want_retry || + ret == -BCH_ERR_btree_node_read_err_must_retry) + continue; - if (!can_retry) { + if (ret) set_btree_node_read_error(b); - break; - } + + break; } - if (btree_node_read_error(b)) { - struct printbuf buf = PRINTBUF; + + bch2_io_failures_to_text(&buf, c, &failed); + + if (btree_node_read_error(b)) bch2_btree_lost_data(c, &buf, b->c.btree_id); - if (buf.pos) - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); + + /* + * only print retry success if we read from a replica with no errors + */ + if (btree_node_read_error(b)) + prt_printf(&buf, "ret %s", bch2_err_str(ret)); + else if (failed.nr) { + if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) + prt_printf(&buf, "retry success"); + else + prt_printf(&buf, "repair success"); } - if ((saw_error || + if ((failed.nr || btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - if (saw_error) { - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", - __func__, buf.buf); - } - + prt_printf(&buf, " (rewriting node)"); bch2_btree_node_rewrite_async(c, b); } + prt_newline(&buf); + + if (failed.nr) + bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], @@ -1486,12 +1525,13 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) struct btree *b = ra->b; struct printbuf buf = PRINTBUF; bool dump_bset_maps = false; - bool have_retry = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; bool _saw_error = false, *saw_error = &_saw_error; + struct printbuf *err_msg = NULL; + struct bch_io_failures *failed = NULL; for (i = 0; i < ra->nr; i++) { struct btree_node *bn = ra->buf[i]; @@ -1584,7 +1624,7 @@ fsck_err: if (best >= 0) { memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); - ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); + ret = bch2_btree_node_read_done(c, NULL, b, NULL, NULL); } else { ret = -1; } @@ -2211,8 +2251,6 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - bool saw_error; - int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), (struct bkey_validate_context) { .from = BKEY_VALIDATE_btree_node, @@ -2225,8 +2263,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, return ret; } - ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: - validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); + ret = validate_bset_keys(c, b, i, WRITE, NULL, NULL) ?: + validate_bset(c, NULL, b, i, b->written, sectors, WRITE, NULL, NULL); if (ret) { bch2_inconsistent_error(c); dump_stack(); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index afdb11a9f71c..30a5180532c8 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -134,7 +134,9 @@ void bch2_btree_build_aux_trees(struct btree *); void bch2_btree_init_next(struct btree_trans *, struct btree *); int bch2_btree_node_read_done(struct bch_fs *, struct bch_dev *, - struct btree *, bool, bool *); + struct btree *, + struct bch_io_failures *, + struct printbuf *); void bch2_btree_node_read(struct btree_trans *, struct btree *, bool); int bch2_btree_root_read(struct bch_fs *, enum btree_id, const struct bkey_i *, unsigned); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 079bc2b359cd..4ee5d486b305 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -42,7 +42,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, struct btree_node *n_sorted = c->verify_data->data; struct bset *sorted, *inmemory = &b->data->keys; struct bio *bio; - bool failed = false, saw_error = false; + bool failed = false; struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_verify_replicas); @@ -66,7 +66,7 @@ static bool bch2_btree_verify_replica(struct bch_fs *c, struct btree *b, memcpy(n_ondisk, n_sorted, btree_buf_bytes(b)); v->written = 0; - if (bch2_btree_node_read_done(c, ca, v, false, &saw_error) || saw_error) + if (bch2_btree_node_read_done(c, ca, v, NULL, NULL)) return false; n_sorted = c->verify_data->data; -- 2.51.0 From c21f41f6905be4fc5059a10a5bba94105ba87269 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 24 Apr 2025 17:55:20 -0400 Subject: [PATCH 14/16] bcachefs: bch2_dirent_to_text() shows casefolded dirents Signed-off-by: Kent Overstreet --- fs/bcachefs/dirent.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index a51195088227..d198001838f3 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -212,12 +212,19 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); - prt_printf(out, "%.*s -> ", d_name.len, d_name.name); + prt_printf(out, "%.*s", d_name.len, d_name.name); + + if (d.v->d_casefold) { + struct qstr d_name = bch2_dirent_get_lookup_name(d); + prt_printf(out, " (casefold %.*s)", d_name.len, d_name.name); + } + + prt_str(out, " ->"); if (d.v->d_type != DT_SUBVOL) - prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum)); + prt_printf(out, " %llu", le64_to_cpu(d.v->d_inum)); else - prt_printf(out, "%u -> %u", + prt_printf(out, " %u -> %u", le32_to_cpu(d.v->d_parent_subvol), le32_to_cpu(d.v->d_child_subvol)); -- 2.51.0 From aff2b6a7fc285287f7ffc6691aca333a63b18230 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:38:53 -0400 Subject: [PATCH 15/16] bcachefs: provide unlocked version of run_explicit_recovery_pass_persistent Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/recovery.c | 30 +++++++++++++++--------------- fs/bcachefs/recovery_passes.c | 20 +++++++++++++++++--- fs/bcachefs/recovery_passes.h | 2 ++ fs/bcachefs/super-io.c | 3 +++ 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 8989ea4a3934..0369dd656d32 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -844,6 +844,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; + u64 recovery_passes_required; unsigned long errors_silent[BITS_TO_LONGS(BCH_FSCK_ERR_MAX)]; u64 btrees_lost_data; } sb; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d13a6df289c7..375111b56029 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -52,24 +52,24 @@ int bch2_btree_lost_data(struct bch_fs *c, } /* Once we have runtime self healing for topology errors we won't need this: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_topology) ?: ret; /* Btree node accounting will be off: */ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; #ifdef CONFIG_BCACHEFS_DEBUG /* * These are much more minor, and don't need to be corrected right away, * but in debug mode we want the next fsck run to be clean: */ - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; #endif switch (btree) { case BTREE_ID_alloc: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); @@ -79,30 +79,30 @@ int bch2_btree_lost_data(struct bch_fs *c, __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); goto out; case BTREE_ID_backpointers: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; goto out; case BTREE_ID_need_discard: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_freespace: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_bucket_gens: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_lru: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; goto out; case BTREE_ID_accounting: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_check_allocations) ?: ret; goto out; case BTREE_ID_snapshots: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; default: - ret = bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = __bch2_run_explicit_recovery_pass_persistent(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; goto out; } out: diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 347e17fe7901..97af1e0629eb 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -193,9 +193,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } -int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, - struct printbuf *out, - enum bch_recovery_pass pass) +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass) { lockdep_assert_held(&c->sb_lock); @@ -205,6 +205,20 @@ int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, return bch2_run_explicit_recovery_pass_printbuf(c, out, pass); } +int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, + struct printbuf *out, + enum bch_recovery_pass pass) +{ + if (c->sb.recovery_passes_required & BIT_ULL(pass)) + return 0; + + mutex_lock(&c->sb_lock); + int ret = __bch2_run_explicit_recovery_pass_persistent(c, out, pass); + mutex_unlock(&c->sb_lock); + + return ret; +} + static void bch2_clear_recovery_pass_required(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 1f91be4258c5..94fbc64e9b7e 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -13,6 +13,8 @@ int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, + enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, enum bch_recovery_pass); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d53cbc5f9925..8730d2e78d1d 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -623,6 +623,9 @@ static void bch2_sb_update(struct bch_fs *c) struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); if (ext) { + c->sb.recovery_passes_required = + bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, sizeof(c->sb.errors_silent) * 8); c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); -- 2.51.0 From 7677859a47a464f1c5603077809d4bc13f2d549f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 26 Apr 2025 12:39:17 -0400 Subject: [PATCH 16/16] bcachefs: Run most explicit recovery passes persistent If we detect an error that requires running a recovery pass, and we're not in recovery, we won't be able to fix it until the next mount - make sure we're noting in the superblock that it needs to run. Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 4 ++-- fs/bcachefs/error.c | 2 +- fs/bcachefs/recovery_passes.c | 2 +- fs/bcachefs/recovery_passes.h | 3 --- fs/bcachefs/sb-members.c | 2 +- fs/bcachefs/subvolume.c | 2 +- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3ec33a7e9d92..596edc7bba2f 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -399,7 +399,7 @@ static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf bool print = __bch2_count_fsck_err(c, id, buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, buf, BCH_RECOVERY_PASS_check_allocations); if (insert) { @@ -972,7 +972,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, bool print = bch2_count_fsck_err(c, bucket_metadata_type_mismatch, &buf); - bch2_run_explicit_recovery_pass_printbuf(c, &buf, + bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 731733e12e6b..d7bc70fd7762 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -104,7 +104,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { - return bch2_run_explicit_recovery_pass_printbuf(c, out, BCH_RECOVERY_PASS_check_topology) ?: + return bch2_run_explicit_recovery_pass_persistent(c, out, BCH_RECOVERY_PASS_check_topology) ?: -BCH_ERR_btree_node_read_validate_error; } } diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 97af1e0629eb..e14aca00cb7d 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -162,7 +162,7 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out, } } -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, +static int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *c, struct printbuf *out, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 94fbc64e9b7e..f33dd005beb4 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -8,9 +8,6 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); -int bch2_run_explicit_recovery_pass_printbuf(struct bch_fs *, - struct printbuf *, - enum bch_recovery_pass); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 462a2c21a9de..9ab4d9a4b421 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -20,7 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) bool print = bch2_count_fsck_err(c, ptr_to_invalid_device, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_allocations); if (print) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index ff20ce98a476..51ab2ee10706 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -23,7 +23,7 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) prt_printf(&buf, "missing subvolume %u", subvolid); bool print = bch2_count_fsck_err(c, subvol_missing, &buf); - int ret = bch2_run_explicit_recovery_pass_printbuf(c, &buf, + int ret = bch2_run_explicit_recovery_pass_persistent(c, &buf, BCH_RECOVERY_PASS_check_inodes); if (print) bch2_print_str(c, KERN_ERR, buf.buf); -- 2.51.0