From f7f196170dcd7c629126ee9d37be5dbdb6d4f941 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Nov 2024 17:57:55 -0500 Subject: [PATCH 01/16] bcachefs: cryptographic MACs on superblock are not (yet?) supported We should add support for cryptographic macs on the superblock - and it won't be hard, but it'll need an incompatible feature bit (and we have a new incompatible feature versioning scheme coming). For now, just add a guard to avoid a dull ptr deref in gen_poly_key(). Reported-by: syzbot+dd3d9835055dacb66f35@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/super-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 4c29f8215d54..6a086c1c4b14 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -677,7 +677,8 @@ reread: } enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); - if (csum_type >= BCH_CSUM_NR) { + if (csum_type >= BCH_CSUM_NR || + bch2_csum_type_is_encryption(csum_type)) { prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); return -BCH_ERR_invalid_sb_csum_type; } -- 2.51.0 From ff1dd05f82338cc0be15285035b55b517b4c64a2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Nov 2024 18:05:06 -0500 Subject: [PATCH 02/16] bcachefs: bch2_trans_relock() is trylock for lockdep fix some spurious lockdep splats Reported-by: syzbot+e088be3c2d5c05aaac35@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 8 ++++---- fs/bcachefs/btree_locking.c | 2 +- fs/bcachefs/btree_locking.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 80c3b55ce763..9c54891c737a 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1007,7 +1007,7 @@ retry_all: bch2_trans_unlock(trans); cond_resched(); - trans_set_locked(trans); + trans_set_locked(trans, false); if (unlikely(trans->memory_allocation_failure)) { struct closure cl; @@ -3248,7 +3248,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->last_begin_ip = _RET_IP_; - trans_set_locked(trans); + trans_set_locked(trans, false); if (trans->restarted) { bch2_btree_path_traverse_all(trans); @@ -3354,7 +3354,7 @@ got_trans: trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); trans->srcu_lock_time = jiffies; trans->srcu_held = true; - trans_set_locked(trans); + trans_set_locked(trans, false); closure_init_stack_release(&trans->ref); return trans; @@ -3622,7 +3622,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) #ifdef CONFIG_LOCKDEP fs_reclaim_acquire(GFP_KERNEL); struct btree_trans *trans = bch2_trans_get(c); - trans_set_locked(trans); + trans_set_locked(trans, false); bch2_trans_put(trans); fs_reclaim_release(GFP_KERNEL); #endif diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index efe2a007b482..d343df9f0ad2 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -782,7 +782,7 @@ static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace) return bch2_trans_relock_fail(trans, path, &f, trace); } - trans_set_locked(trans); + trans_set_locked(trans, true); out: bch2_trans_verify_locks(trans); return 0; diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index ca4aeefd631e..7474ab6ce019 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -188,10 +188,10 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p); /* lock: */ -static inline void trans_set_locked(struct btree_trans *trans) +static inline void trans_set_locked(struct btree_trans *trans, bool try) { if (!trans->locked) { - lock_acquire_exclusive(&trans->dep_map, 0, 0, NULL, _THIS_IP_); + lock_acquire_exclusive(&trans->dep_map, 0, try, NULL, _THIS_IP_); trans->locked = true; trans->last_unlock_ip = 0; -- 2.51.0 From 9bdb3b73e73203546107eb11a4d8bb3ad3f48851 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Nov 2024 19:02:18 -0500 Subject: [PATCH 03/16] bcachefs: Check for extent crc uncompressed/compressed size mismatch When not compressed, these must be equal - this fixes an assertion pop in bch2_rechecksum_bio(). Reported-by: syzbot+50d3544c9b8db9c99fd2@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 22 +++++++++++++--------- fs/bcachefs/sb-errors_format.h | 5 +++-- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index aa3b88291814..2fc9ace5533c 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1323,9 +1323,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, - c, ptr_crc_uncompressed_size_too_small, - "checksum offset + key size > uncompressed size"); bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, ptr_crc_csum_type_unknown, "invalid checksum type"); @@ -1333,6 +1330,19 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, c, ptr_crc_compression_type_unknown, "invalid compression type"); + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + bkey_fsck_err_on(!crc_is_compressed(crc) && + crc.compressed_size != crc.uncompressed_size, + c, ptr_crc_uncompressed_size_mismatch, + "not compressed but compressed != uncompressed size"); + if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; @@ -1346,12 +1356,6 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, "redundant crc entry"); crc_since_last_ptr = true; - bkey_fsck_err_on(crc_is_encoded(crc) && - (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), - c, ptr_crc_uncompressed_size_too_big, - "too large encoded extent"); - size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 342eda8ab69f..3bbda181f314 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -172,10 +172,11 @@ enum bch_fsck_flags { x(ptr_bucket_data_type_mismatch, 155, 0) \ x(ptr_cached_and_erasure_coded, 156, 0) \ x(ptr_crc_uncompressed_size_too_small, 157, 0) \ + x(ptr_crc_uncompressed_size_too_big, 161, 0) \ + x(ptr_crc_uncompressed_size_mismatch, 300, 0) \ x(ptr_crc_csum_type_unknown, 158, 0) \ x(ptr_crc_compression_type_unknown, 159, 0) \ x(ptr_crc_redundant, 160, 0) \ - x(ptr_crc_uncompressed_size_too_big, 161, 0) \ x(ptr_crc_nonce_mismatch, 162, 0) \ x(ptr_stripe_redundant, 163, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \ @@ -310,7 +311,7 @@ enum bch_fsck_flags { x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \ x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \ - x(MAX, 300, 0) + x(MAX, 301, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, -- 2.51.0 From 2cd85fea49d850629404d4668e104466114598e3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 28 Nov 2024 19:30:23 -0500 Subject: [PATCH 04/16] bcachefs: Don't recurse in check_discard_freespace_key When calling check_discard_freeespace_key from the allocator, we can't repair without recursing - run it asynchronously instead. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 72 ++++++++++++++++++++++++++++++---- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/bcachefs.h | 1 + 4 files changed, 67 insertions(+), 10 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index e8c246e5803c..b2d570453351 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1338,7 +1338,40 @@ fsck_err: return ret; } -int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen) +struct check_discard_freespace_key_async { + struct work_struct work; + struct bch_fs *c; + struct bbpos pos; +}; + +static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); + int ret = bkey_err(k); + if (ret) + return ret; + + u8 gen; + ret = k.k->type != KEY_TYPE_set + ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) + : 0; + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +static void check_discard_freespace_key_work(struct work_struct *work) +{ + struct check_discard_freespace_key_async *w = + container_of(work, struct check_discard_freespace_key_async, work); + + bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); + bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); + kfree(w); +} + +int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, + bool async_repair) { struct bch_fs *c = trans->c; enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard @@ -1351,7 +1384,8 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite u64 genbits = iter->pos.offset & (~0ULL << 56); struct btree_iter alloc_iter; - struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, BTREE_ITER_cached); + struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, + BTREE_ID_alloc, bucket, BTREE_ITER_cached); int ret = bkey_err(alloc_k); if (ret) return ret; @@ -1392,17 +1426,39 @@ fsck_err: printbuf_exit(&buf); return ret; delete: - ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc) ?: - -BCH_ERR_transaction_restart_commit; - goto out; + if (!async_repair) { + ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_transaction_restart_commit; + goto out; + } else { + /* + * We can't repair here when called from the allocator path: the + * commit will recurse back into the allocator + */ + struct check_discard_freespace_key_async *w = + kzalloc(sizeof(*w), GFP_KERNEL); + if (!w) + goto out; + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) { + kfree(w); + goto out; + } + + INIT_WORK(&w->work, check_discard_freespace_key_work); + w->c = c; + w->pos = BBPOS(iter->btree_id, iter->pos); + queue_work(c->write_ref_wq, &w->work); + goto out; + } } static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) { u8 gen; - int ret = bch2_check_discard_freespace_key(trans, iter, &gen); + int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8cacddd188f4..de25ba4ee94b 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -310,7 +310,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); -int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *); +int bch2_check_discard_freespace_key(struct btree_trans *, struct btree_iter *, u8 *, bool); int bch2_check_alloc_info(struct bch_fs *); int bch2_check_alloc_to_lru_refs(struct bch_fs *); void bch2_dev_do_discards(struct bch_dev *); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 4d1ff7f1f302..c40a76df76b8 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -281,7 +281,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); u8 gen; - int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen); + int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true); if (ret < 0) return ERR_PTR(ret); if (ret) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d88129503bc5..c16937e54734 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -680,6 +680,7 @@ struct btree_trans_buf { x(dio_write) \ x(discard) \ x(discard_fast) \ + x(check_discard_freespace_key) \ x(invalidate) \ x(delete_dead_snapshots) \ x(gc_gens) \ -- 2.51.0 From 2f8d5edf556a7499a188c8b38a650bed5aebcdbf Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 29 Nov 2024 14:38:27 +0800 Subject: [PATCH 05/16] bcachefs: Add missing parameter description to bch2_bucket_alloc_trans() The function bch2_bucket_alloc_trans() lacked a description for the nowait parameter in its documentation comment block. This patch adds the missing description to ensure all parameters are properly documented. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=12179 Signed-off-by: Yang Li Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index c40a76df76b8..095bfe7c53bd 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -505,6 +505,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, * @watermark: how important is this allocation? * @data_type: BCH_DATA_journal, btree, user... * @cl: if not NULL, closure to be used to wait if buckets not available + * @nowait: if true, do not wait for buckets to become available * @usage: for secondarily also returning the current device usage * * Returns: an open_bucket on success, or an ERR_PTR() on failure. -- 2.51.0 From c9b9afe78c436bb6d64ac0d3fa255c9cc232f2a9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 21:12:47 -0500 Subject: [PATCH 06/16] bcachefs: Fix fsck.c build in userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f2174528ee5f..cc15ff135cd6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -3206,6 +3206,8 @@ int bch2_fix_reflink_p(struct bch_fs *c) return ret; } +#ifndef NO_BCACHEFS_CHARDEV + struct fsck_thread { struct thread_with_stdio thr; struct bch_fs *c; @@ -3421,3 +3423,5 @@ err: } return ret; } + +#endif /* NO_BCACHEFS_CHARDEV */ -- 2.51.0 From f7727a6767277a9d939f9ab3720eb327973a262e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:40:49 -0400 Subject: [PATCH 07/16] bcachefs: bch2_inum_to_path() Add a function for walking backpointers to find a path from a given inode number, and convert various error messages to use it. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/error.c | 34 +++++++ fs/bcachefs/error.h | 6 ++ fs/bcachefs/fs-common.c | 81 +++++++++++++++ fs/bcachefs/fs-common.h | 2 + fs/bcachefs/fs-io-buffered.c | 10 +- fs/bcachefs/fsck.c | 12 ++- fs/bcachefs/io_misc.c | 12 ++- fs/bcachefs/io_read.c | 185 +++++++++++++++++++++++++---------- 9 files changed, 279 insertions(+), 64 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a0cfc0f286f4..47387f7d6202 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -116,6 +116,7 @@ x(ENOENT, ENOENT_dirent_doesnt_match_inode) \ x(ENOENT, ENOENT_dev_not_found) \ x(ENOENT, ENOENT_dev_idx_not_found) \ + x(ENOENT, ENOENT_inode_no_backpointer) \ x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ x(EEXIST, EEXIST_str_hash_set) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 0517782ca57a..abaa9570cd62 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -3,6 +3,7 @@ #include "btree_cache.h" #include "btree_iter.h" #include "error.h" +#include "fs-common.h" #include "journal.h" #include "recovery_passes.h" #include "super.h" @@ -515,3 +516,36 @@ void bch2_flush_fsck_errs(struct bch_fs *c) mutex_unlock(&c->fsck_error_msgs_lock); } + +int bch2_inum_err_msg_trans(struct btree_trans *trans, struct printbuf *out, subvol_inum inum) +{ + u32 restart_count = trans->restart_count; + int ret = 0; + + /* XXX: we don't yet attempt to print paths when we don't know the subvol */ + if (inum.subvol) + ret = lockrestart_do(trans, bch2_inum_to_path(trans, inum, out)); + if (!inum.subvol || ret) + prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); + + return trans_was_restarted(trans, restart_count); +} + +int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, + subvol_inum inum, u64 offset) +{ + int ret = bch2_inum_err_msg_trans(trans, out, inum); + prt_printf(out, " offset %llu: ", offset); + return ret; +} + +void bch2_inum_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum) +{ + bch2_trans_run(c, bch2_inum_err_msg_trans(trans, out, inum)); +} + +void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, + subvol_inum inum, u64 offset) +{ + bch2_trans_run(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); +} diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 12ca5287e20a..7acf2a27ca28 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -238,4 +238,10 @@ void bch2_io_error(struct bch_dev *, enum bch_member_error_type); _ret; \ }) +int bch2_inum_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum); +int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subvol_inum, u64); + +void bch2_inum_err_msg(struct bch_fs *, struct printbuf *, subvol_inum); +void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64); + #endif /* _BCACHEFS_ERROR_H */ diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 7e10a9ddcfd9..dcaa47f68f31 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -548,3 +548,84 @@ err: bch2_trans_iter_exit(trans, &src_dir_iter); return ret; } + +static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n) +{ + bch2_printbuf_make_room(out, n); + + unsigned can_print = min(n, printbuf_remaining(out)); + + b += n; + + for (unsigned i = 0; i < can_print; i++) + out->buf[out->pos++] = *((char *) --b); + + printbuf_nul_terminate(out); +} + +static inline void reverse_bytes(void *b, size_t n) +{ + char *e = b + n, *s = b; + + while (s < e) { + --e; + swap(*s, *e); + s++; + } +} + +/* XXX: we don't yet attempt to print paths when we don't know the subvol */ +int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path) +{ + unsigned orig_pos = path->pos; + int ret = 0; + + while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL && + inum.inum == BCACHEFS_ROOT_INO)) { + struct bch_inode_unpacked inode; + ret = bch2_inode_find_by_inum_trans(trans, inum, &inode); + if (ret) + goto err; + + if (!inode.bi_dir && !inode.bi_dir_offset) { + ret = -BCH_ERR_ENOENT_inode_no_backpointer; + goto err; + } + + u32 snapshot; + ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + if (ret) + goto err; + + struct btree_iter d_iter; + struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter, + BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot), + 0, dirent); + ret = bkey_err(d.s_c); + if (ret) + goto err; + + struct qstr dirent_name = bch2_dirent_get_name(d); + prt_bytes_reversed(path, dirent_name.name, dirent_name.len); + + prt_char(path, '/'); + + if (d.v->d_type == DT_SUBVOL) + inum.subvol = le32_to_cpu(d.v->d_parent_subvol); + inum.inum = d.k->p.inode; + + bch2_trans_iter_exit(trans, &d_iter); + } + + if (orig_pos == path->pos) + prt_char(path, '/'); + + ret = path->allocation_failure ? -ENOMEM : 0; + if (ret) + goto err; + + reverse_bytes(path->buf + orig_pos, path->pos - orig_pos); + return 0; +err: + return ret; +} diff --git a/fs/bcachefs/fs-common.h b/fs/bcachefs/fs-common.h index c934e807b380..2b59210bb5e8 100644 --- a/fs/bcachefs/fs-common.h +++ b/fs/bcachefs/fs-common.h @@ -42,4 +42,6 @@ int bch2_rename_trans(struct btree_trans *, bool bch2_reinherit_attrs(struct bch_inode_unpacked *, struct bch_inode_unpacked *); +int bch2_inum_to_path(struct btree_trans *, subvol_inum, struct printbuf *); + #endif /* _BCACHEFS_FS_COMMON_H */ diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index d55e215e8aa6..ff8b8df50bf3 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -231,10 +231,12 @@ err: bch2_trans_iter_exit(trans, &iter); if (ret) { - bch_err_inum_offset_ratelimited(c, - iter.pos.inode, - iter.pos.offset << 9, - "read error %i from btree lookup", ret); + struct printbuf buf = PRINTBUF; + bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9); + prt_printf(&buf, "read error %i from btree lookup", ret); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + rbio->bio.bi_status = BLK_STS_IOERR; bio_endio(&rbio->bio); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index cc15ff135cd6..1a5a07112779 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -212,6 +212,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, { struct bch_fs *c = trans->c; struct qstr lostfound_str = QSTR("lost+found"); + struct btree_iter lostfound_iter = { NULL }; u64 inum = 0; unsigned d_type = 0; int ret; @@ -290,11 +291,16 @@ create_lostfound: * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - bch_notice(c, "creating lost+found in subvol %llu snapshot %u", - root_inum.subvol, le32_to_cpu(st.root_snapshot)); + struct printbuf path = PRINTBUF; + ret = bch2_inum_to_path(trans, root_inum, &path); + if (ret) + goto err; + + bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", + path.buf, root_inum.subvol, snapshot); + printbuf_exit(&path); u64 now = bch2_current_time(c); - struct btree_iter lostfound_iter = { NULL }; u64 cpu = raw_smp_processor_id(); bch2_inode_init_early(c, lostfound); diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 524e31e7411b..5353979117b0 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -113,11 +113,13 @@ int bch2_extent_fallocate(struct btree_trans *trans, err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); - if (should_print_err(ret)) - bch_err_inum_offset_ratelimited(c, - inum.inum, - iter->pos.offset << 9, - "%s(): error: %s", __func__, bch2_err_str(ret)); + if (should_print_err(ret)) { + struct printbuf buf = PRINTBUF; + bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9); + prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4b6b6d25725b..34a3569d085a 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -322,6 +322,20 @@ nopromote: /* Read */ +static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, + struct bch_read_bio *rbio, struct bpos read_pos) +{ + return bch2_inum_offset_err_msg_trans(trans, out, + (subvol_inum) { rbio->subvol, read_pos.inode }, + read_pos.offset << 9); +} + +static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, + struct bch_read_bio *rbio, struct bpos read_pos) +{ + bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); +} + #define READ_RETRY_AVOID 1 #define READ_RETRY 2 #define READ_ERR 3 @@ -500,6 +514,29 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, } } +static void bch2_read_io_err(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bio *bio = &rbio->bio; + struct bch_fs *c = rbio->c; + struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; + struct printbuf buf = PRINTBUF; + + bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); + prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); + + if (ca) { + bch2_io_error(ca, BCH_MEMBER_ERROR_read); + bch_err_ratelimited(ca, "%s", buf.buf); + } else { + bch_err_ratelimited(c, "%s", buf.buf); + } + + printbuf_exit(&buf); + bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); +} + static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, struct bch_read_bio *rbio) { @@ -563,6 +600,73 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) __bch2_rbio_narrow_crcs(trans, rbio)); } +static void bch2_read_csum_err(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; + struct bio *src = &rbio->bio; + struct bch_extent_crc_unpacked crc = rbio->pick.crc; + struct nonce nonce = extent_nonce(rbio->version, crc); + struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); + struct printbuf buf = PRINTBUF; + + bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); + prt_str(&buf, "data "); + bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); + + struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; + if (ca) { + bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); + bch_err_ratelimited(ca, "%s", buf.buf); + } else { + bch_err_ratelimited(c, "%s", buf.buf); + } + + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + printbuf_exit(&buf); +} + +static void bch2_read_decompress_err(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; + struct printbuf buf = PRINTBUF; + + bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); + prt_str(&buf, "decompression error"); + + struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; + if (ca) + bch_err_ratelimited(ca, "%s", buf.buf); + else + bch_err_ratelimited(c, "%s", buf.buf); + + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + printbuf_exit(&buf); +} + +static void bch2_read_decrypt_err(struct work_struct *work) +{ + struct bch_read_bio *rbio = + container_of(work, struct bch_read_bio, work); + struct bch_fs *c = rbio->c; + struct printbuf buf = PRINTBUF; + + bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); + prt_str(&buf, "decrypt error"); + + struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; + if (ca) + bch_err_ratelimited(ca, "%s", buf.buf); + else + bch_err_ratelimited(c, "%s", buf.buf); + + bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + printbuf_exit(&buf); +} + /* Inner part that may run in process context */ static void __bch2_read_endio(struct work_struct *work) { @@ -669,33 +773,13 @@ csum_err: goto out; } - struct printbuf buf = PRINTBUF; - buf.atomic++; - prt_str(&buf, "data "); - bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); - - struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; - if (ca) { - bch_err_inum_offset_ratelimited(ca, - rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "data %s", buf.buf); - bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); - } - printbuf_exit(&buf); - bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); + bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); goto out; decompression_err: - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "decompression error"); - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); goto out; decrypt_err: - bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode, - rbio->read_pos.offset << 9, - "decrypt error"); - bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); + bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); goto out; } @@ -716,16 +800,8 @@ static void bch2_read_endio(struct bio *bio) if (!rbio->split) rbio->bio.bi_end_io = rbio->end_io; - if (bio->bi_status) { - if (ca) { - bch_err_inum_offset_ratelimited(ca, - rbio->read_pos.inode, - rbio->read_pos.offset, - "data read error: %s", - bch2_blk_status_to_str(bio->bi_status)); - bch2_io_error(ca, BCH_MEMBER_ERROR_read); - } - bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); + if (unlikely(bio->bi_status)) { + bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); return; } @@ -832,25 +908,22 @@ retry_pick: if (unlikely(pick_ret < 0)) { struct printbuf buf = PRINTBUF; + bch2_read_err_msg_trans(trans, &buf, orig, read_pos); + prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret)); bch2_bkey_val_to_text(&buf, c, k); - bch_err_inum_offset_ratelimited(c, - read_pos.inode, read_pos.offset << 9, - "no device to read from: %s\n %s", - bch2_err_str(pick_ret), - buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); goto err; } if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { struct printbuf buf = PRINTBUF; + bch2_read_err_msg_trans(trans, &buf, orig, read_pos); + prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); bch2_bkey_val_to_text(&buf, c, k); - bch_err_inum_offset_ratelimited(c, - read_pos.inode, read_pos.offset << 9, - "attempting to read encrypted data without encryption key\n %s", - buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); goto err; } @@ -1036,11 +1109,15 @@ get_bio: } if (!rbio->pick.idx) { - if (!rbio->have_ioref) { - bch_err_inum_offset_ratelimited(c, - read_pos.inode, - read_pos.offset << 9, - "no device to read from"); + if (unlikely(!rbio->have_ioref)) { + struct printbuf buf = PRINTBUF; + bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); + prt_printf(&buf, "no device to read from:\n "); + bch2_bkey_val_to_text(&buf, c, k); + + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); goto out; } @@ -1202,16 +1279,20 @@ err: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - bch2_bkey_buf_exit(&sk, c); if (ret) { - bch_err_inum_offset_ratelimited(c, inum.inum, - bvec_iter.bi_sector << 9, - "read error %i from btree lookup", ret); + struct printbuf buf = PRINTBUF; + bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9); + prt_printf(&buf, "read error %i from btree lookup", ret); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + rbio->bio.bi_status = BLK_STS_IOERR; bch2_rbio_done(rbio); } + + bch2_trans_put(trans); + bch2_bkey_buf_exit(&sk, c); } void bch2_fs_io_read_exit(struct bch_fs *c) -- 2.51.0 From 097cc9d0d60a3671fd1adcda9b7c0324908e3fd7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 13 Nov 2024 23:08:57 -0500 Subject: [PATCH 08/16] bcachefs: Convert write path errors to inum_to_path() Signed-off-by: Kent Overstreet --- fs/bcachefs/io_write.c | 91 +++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index f97ebb30f6c0..bae045e76055 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -396,6 +396,21 @@ static int bch2_write_index_default(struct bch_write_op *op) /* Writes */ +static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, + u64 offset) +{ + bch2_inum_offset_err_msg(op->c, out, + (subvol_inum) { op->subvol, op->pos.inode, }, + offset << 9); + prt_printf(out, "write error%s: ", + op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); +} + +static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) +{ + __bch2_write_op_error(out, op, op->pos.offset); +} + void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k, @@ -532,14 +547,14 @@ static void __bch2_write_index(struct bch_write_op *op) op->written += sectors_start - keylist_sectors(keys); - if (ret && !bch2_err_matches(ret, EROFS)) { + if (unlikely(ret && !bch2_err_matches(ret, EROFS))) { struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - bch_err_inum_offset_ratelimited(c, - insert->k.p.inode, insert->k.p.offset << 9, - "%s write error while doing btree update: %s", - op->flags & BCH_WRITE_MOVE ? "move" : "user", - bch2_err_str(ret)); + struct printbuf buf = PRINTBUF; + __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); + prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); } if (ret) @@ -1081,11 +1096,14 @@ do_write: *_dst = dst; return more; csum_err: - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s write error: error verifying existing checksum while rewriting existing data (memory corruption?)", - op->flags & BCH_WRITE_MOVE ? "move" : "user"); + { + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)"); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } + ret = -EIO; err: if (to_wbio(dst)->bounce) @@ -1176,11 +1194,11 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) if (ret && !bch2_err_matches(ret, EROFS)) { struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); - bch_err_inum_offset_ratelimited(c, - insert->k.p.inode, insert->k.p.offset << 9, - "%s write error while doing btree update: %s", - op->flags & BCH_WRITE_MOVE ? "move" : "user", - bch2_err_str(ret)); + struct printbuf buf = PRINTBUF; + __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); + prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); } if (ret) { @@ -1340,17 +1358,19 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; + bch2_trans_put(trans); + darray_exit(&buckets); + if (ret) { - bch_err_inum_offset_ratelimited(c, - op->pos.inode, op->pos.offset << 9, - "%s: btree lookup error %s", __func__, bch2_err_str(ret)); + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); op->error = ret; op->flags |= BCH_WRITE_SUBMITTED; } - bch2_trans_put(trans); - darray_exit(&buckets); - /* fallback to cow write path? */ if (!(op->flags & BCH_WRITE_SUBMITTED)) { closure_sync(&op->cl); @@ -1463,14 +1483,14 @@ err: if (ret <= 0) { op->flags |= BCH_WRITE_SUBMITTED; - if (ret < 0) { - if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s(): %s error: %s", __func__, - op->flags & BCH_WRITE_MOVE ? "move" : "user", - bch2_err_str(ret)); + if (unlikely(ret < 0)) { + if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) { + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret)); + bch_err_ratelimited(c, "%s", buf.buf); + printbuf_exit(&buf); + } op->error = ret; break; } @@ -1596,12 +1616,11 @@ CLOSURE_CALLBACK(bch2_write) bch2_keylist_init(&op->insert_keys, op->inline_keys); wbio_init(bio)->put_bio = false; - if (bio->bi_iter.bi_size & (c->opts.block_size - 1)) { - bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s write error: misaligned write", - op->flags & BCH_WRITE_MOVE ? "move" : "user"); + if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) { + struct printbuf buf = PRINTBUF; + bch2_write_op_error(&buf, op); + prt_printf(&buf, "misaligned write"); + printbuf_exit(&buf); op->error = -EIO; goto err; } -- 2.51.0 From 7807b5b07de1d009275e00b7fa51db31071d57a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 19:13:54 -0500 Subject: [PATCH 09/16] bcachefs: list_pop_entry() Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 6 ++---- fs/bcachefs/io_write.c | 4 +--- fs/bcachefs/util.h | 13 +++++++++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index eaca4c39d703..df9c0e453391 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2456,11 +2456,9 @@ void bch2_fs_ec_exit(struct bch_fs *c) while (1) { mutex_lock(&c->ec_stripe_head_lock); - h = list_first_entry_or_null(&c->ec_stripe_head_list, - struct ec_stripe_head, list); - if (h) - list_del(&h->list); + h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); mutex_unlock(&c->ec_stripe_head_lock); + if (!h) break; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index bae045e76055..3e71860f66b9 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -637,9 +637,7 @@ void bch2_write_point_do_index_updates(struct work_struct *work) while (1) { spin_lock_irq(&wp->writes_lock); - op = list_first_entry_or_null(&wp->writes, struct bch_write_op, wp_list); - if (op) - list_del(&op->wp_list); + op = list_pop_entry(&wp->writes, struct bch_write_op, wp_list); wp_update_state(wp, op != NULL); spin_unlock_irq(&wp->writes_lock); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index fb02c1c36004..5e4820c8fa44 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -317,6 +317,19 @@ do { \ _ptr ? container_of(_ptr, type, member) : NULL; \ }) +static inline struct list_head *list_pop(struct list_head *head) +{ + if (list_empty(head)) + return NULL; + + struct list_head *ret = head->next; + list_del_init(ret); + return ret; +} + +#define list_pop_entry(head, type, member) \ + container_of_or_null(list_pop(head), type, member) + /* Does linear interpolation between powers of two */ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits) { -- 2.51.0 From 1302eeb7c5db1b9ac9db9d29c39e6a46bda718a6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:20:42 -0500 Subject: [PATCH 10/16] bcachefs: bkey_fsck_err now respects errors_silent Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index abaa9570cd62..9e34374960f3 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -476,11 +476,16 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return -BCH_ERR_fsck_delete_bkey; unsigned fsck_flags = 0; - if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) + if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { + if (test_bit(err, c->sb.errors_silent)) + return -BCH_ERR_fsck_delete_bkey; + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; + } + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + fsck_flags |= fsck_flags_extra[err]; struct printbuf buf = PRINTBUF; - va_list args; prt_printf(&buf, "invalid bkey in %s btree=", bch2_bkey_validate_contexts[from.from]); @@ -489,9 +494,12 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, "\n "); + + va_list args; va_start(args, fmt); prt_vprintf(&buf, fmt, args); va_end(args); + prt_str(&buf, ": delete?"); int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); -- 2.51.0 From b29769c72d0b6f842ae7a1e10e9cfb9a8fcc87fa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:17:00 -0500 Subject: [PATCH 11/16] bcachefs: If we did repair on a btree node, make sure we rewrite it Ensure that "invalid bkey" repair gets persisted, so that it doesn't repeatedly spam the logs. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index eedcb2445b99..9df9fc1c5e2b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -997,6 +997,7 @@ drop_this_key: got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); + set_btree_node_need_rewrite(b); } fsck_err: printbuf_exit(&buf); @@ -1259,6 +1260,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); + set_btree_node_need_rewrite(b); continue; } if (ret) @@ -1372,15 +1374,18 @@ start: rb->start_time); bio_put(&rb->bio); - if (saw_error && + if ((saw_error || + btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", - __func__, buf.buf); + if (saw_error) { + printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", + __func__, buf.buf); + } bch2_btree_node_rewrite_async(c, b); } -- 2.51.0 From c1f618f4f7cc7b8360e7362d3d18f3e244ded364 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:53:26 -0500 Subject: [PATCH 12/16] bcachefs: bch2_async_btree_node_rewrites_flush() Add a method to flush btree node rewrites at the end of recovery, to ensure that corrected errors are persisted. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 7 +- fs/bcachefs/btree_update_interior.c | 153 ++++++++++++++++------------ fs/bcachefs/btree_update_interior.h | 1 + fs/bcachefs/recovery.c | 2 + 4 files changed, 97 insertions(+), 66 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index c16937e54734..b12c9c78beec 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -829,9 +829,10 @@ struct bch_fs { struct work_struct btree_interior_update_work; struct workqueue_struct *btree_node_rewrite_worker; - - struct list_head pending_node_rewrites; - struct mutex pending_node_rewrites_lock; + struct list_head btree_node_rewrites; + struct list_head btree_node_rewrites_pending; + spinlock_t btree_node_rewrites_lock; + struct closure_waitlist btree_node_rewrites_wait; /* btree_io.c: */ spinlock_t btree_write_error_lock; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 5eabd532e388..f2a1d5d3d8d5 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2206,42 +2206,50 @@ struct async_btree_rewrite { struct list_head list; enum btree_id btree_id; unsigned level; - struct bpos pos; - __le64 seq; + struct bkey_buf key; }; static int async_btree_node_rewrite_trans(struct btree_trans *trans, struct async_btree_rewrite *a) { - struct bch_fs *c = trans->c; struct btree_iter iter; - struct btree *b; - int ret; - - bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, + bch2_trans_node_iter_init(trans, &iter, + a->btree_id, a->key.k->k.p, BTREE_MAX_DEPTH, a->level, 0); - b = bch2_btree_iter_peek_node(&iter); - ret = PTR_ERR_OR_ZERO(b); + struct btree *b = bch2_btree_iter_peek_node(&iter); + int ret = PTR_ERR_OR_ZERO(b); if (ret) goto out; - if (!b || b->data->keys.seq != a->seq) { + bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); + ret = found + ? bch2_btree_node_rewrite(trans, &iter, b, 0) + : -ENOENT; + +#if 0 + /* Tracepoint... */ + if (!ret || ret == -ENOENT) { + struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - if (b) - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - else - prt_str(&buf, "(null"); - bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", - __func__, a->seq, buf.buf); + if (!ret) { + prt_printf(&buf, "rewrite node:\n "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); + } else { + prt_printf(&buf, "node to rewrite not found:\n want: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); + prt_printf(&buf, "\n got: "); + if (b) + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + else + prt_str(&buf, "(null)"); + } + bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - goto out; } - - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); +#endif out: bch2_trans_iter_exit(trans, &iter); - return ret; } @@ -2252,81 +2260,96 @@ static void async_btree_node_rewrite_work(struct work_struct *work) struct bch_fs *c = a->c; int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); - bch_err_fn_ratelimited(c, ret); + if (ret != -ENOENT) + bch_err_fn_ratelimited(c, ret); + + spin_lock(&c->btree_node_rewrites_lock); + list_del(&a->list); + spin_unlock(&c->btree_node_rewrites_lock); + + closure_wake_up(&c->btree_node_rewrites_wait); + + bch2_bkey_buf_exit(&a->key, c); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); } void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) { - struct async_btree_rewrite *a; - int ret; - - a = kmalloc(sizeof(*a), GFP_NOFS); - if (!a) { - bch_err(c, "%s: error allocating memory", __func__); + struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); + if (!a) return; - } a->c = c; a->btree_id = b->c.btree_id; a->level = b->c.level; - a->pos = b->key.k.p; - a->seq = b->data->keys.seq; INIT_WORK(&a->work, async_btree_node_rewrite_work); - if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { - mutex_lock(&c->pending_node_rewrites_lock); - list_add(&a->list, &c->pending_node_rewrites); - mutex_unlock(&c->pending_node_rewrites_lock); - return; - } + bch2_bkey_buf_init(&a->key); + bch2_bkey_buf_copy(&a->key, c, &b->key); - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { - if (test_bit(BCH_FS_started, &c->flags)) { - bch_err(c, "%s: error getting c->writes ref", __func__); - kfree(a); - return; - } + bool now = false, pending = false; - ret = bch2_fs_read_write_early(c); - bch_err_msg(c, ret, "going read-write"); - if (ret) { - kfree(a); - return; - } + spin_lock(&c->btree_node_rewrites_lock); + if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { + list_add(&a->list, &c->btree_node_rewrites); + now = true; + } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { + list_add(&a->list, &c->btree_node_rewrites_pending); + pending = true; + } + spin_unlock(&c->btree_node_rewrites_lock); - bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + if (now) { + queue_work(c->btree_node_rewrite_worker, &a->work); + } else if (pending) { + /* bch2_do_pending_node_rewrites will execute */ + } else { + bch2_bkey_buf_exit(&a->key, c); + kfree(a); } +} - queue_work(c->btree_node_rewrite_worker, &a->work); +void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) +{ + closure_wait_event(&c->btree_node_rewrites_wait, + list_empty(&c->btree_node_rewrites)); } void bch2_do_pending_node_rewrites(struct bch_fs *c) { - struct async_btree_rewrite *a, *n; - - mutex_lock(&c->pending_node_rewrites_lock); - list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { - list_del(&a->list); + while (1) { + spin_lock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a = + list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + if (a) + list_add(&a->list, &c->btree_node_rewrites); + spin_unlock(&c->btree_node_rewrites_lock); + + if (!a) + break; bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); queue_work(c->btree_node_rewrite_worker, &a->work); } - mutex_unlock(&c->pending_node_rewrites_lock); } void bch2_free_pending_node_rewrites(struct bch_fs *c) { - struct async_btree_rewrite *a, *n; + while (1) { + spin_lock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a = + list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + spin_unlock(&c->btree_node_rewrites_lock); - mutex_lock(&c->pending_node_rewrites_lock); - list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { - list_del(&a->list); + if (!a) + break; + bch2_bkey_buf_exit(&a->key, c); kfree(a); } - mutex_unlock(&c->pending_node_rewrites_lock); } static int __bch2_btree_node_update_key(struct btree_trans *trans, @@ -2683,6 +2706,9 @@ void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) void bch2_fs_btree_interior_update_exit(struct bch_fs *c) { + WARN_ON(!list_empty(&c->btree_node_rewrites)); + WARN_ON(!list_empty(&c->btree_node_rewrites_pending)); + if (c->btree_node_rewrite_worker) destroy_workqueue(c->btree_node_rewrite_worker); if (c->btree_interior_update_worker) @@ -2698,8 +2724,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) mutex_init(&c->btree_interior_update_lock); INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); - INIT_LIST_HEAD(&c->pending_node_rewrites); - mutex_init(&c->pending_node_rewrites_lock); + INIT_LIST_HEAD(&c->btree_node_rewrites); + INIT_LIST_HEAD(&c->btree_node_rewrites_pending); + spin_lock_init(&c->btree_node_rewrites_lock); } int bch2_fs_btree_interior_update_init(struct bch_fs *c) diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 1c6cf3e2e6a9..7930ffea3075 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -334,6 +334,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, struct jset_entry *, unsigned long); +void bch2_async_btree_node_rewrites_flush(struct bch_fs *); void bch2_do_pending_node_rewrites(struct bch_fs *); void bch2_free_pending_node_rewrites(struct bch_fs *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c50dede64785..a342744fd275 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -931,6 +931,8 @@ use_clean: /* in case we don't run journal replay, i.e. norecovery mode */ set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_async_btree_node_rewrites_flush(c); + /* fsync if we fixed errors */ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_journal_flush_all_pins(&c->journal); -- 2.51.0 From 511ddcdb2d5e0bdb73c7968e4215268f4572a984 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Nov 2024 23:27:45 -0500 Subject: [PATCH 13/16] bcachefs: fix bch2_journal_key_insert_take() seq Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index de3db161d6ab..6d25e3f85ce8 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -259,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, * Ensure these keys are done last by journal replay, to unblock * journal reclaim: */ - .journal_seq = U32_MAX, + .journal_seq = U64_MAX, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); -- 2.51.0 From 5cdaec193a85e32235e7dccb95c085acc50b8dbd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Dec 2024 16:39:54 -0500 Subject: [PATCH 14/16] bcachefs: Improve "unable to allocate journal write" message Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 1627f3e16517..bb69d80886b5 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -2036,8 +2036,9 @@ CLOSURE_CALLBACK(bch2_journal_write) struct printbuf buf = PRINTBUF; buf.atomic++; - prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), le64_to_cpu(w->data->seq), + vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); __bch2_journal_debug_to_text(&buf, j); spin_unlock(&j->lock); -- 2.51.0 From 9c22dd02ae8b80bf662ab409091731cfb9a09348 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Dec 2024 23:36:38 -0500 Subject: [PATCH 15/16] bcachefs: Fix allocating too big journal entry The "journal space available" calculations didn't take into account mismatched bucket sizes; we need to take the minimum space available out of our devices. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 1aabbbe328d9..b7936ad3ae7f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -140,6 +140,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned pos, nr_devs = 0; struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; + unsigned min_bucket_size = U32_MAX; BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); @@ -148,6 +149,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne if (!ca->journal.nr) continue; + min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); + space = journal_dev_space_available(j, ca, from); if (!space.next_entry) continue; @@ -167,7 +170,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ - return dev_space[nr_devs_want - 1]; + space = dev_space[nr_devs_want - 1]; + space.next_entry = min(space.next_entry, min_bucket_size); + return space; } void bch2_journal_space_available(struct journal *j) -- 2.51.0 From d36b3e74b65f4ec68a38bdb717d94b32a81a355f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Dec 2024 17:40:10 +0100 Subject: [PATCH 16/16] bcachefs: BCACHEFS_PATH_TRACEPOINTS should depend on TRACING When tracing is disabled, there is no point in asking the user about enabling extra btree_path tracepoints in bcachefs. Fixes: 32ed4a620c5405be ("bcachefs: Btree path tracepoints") Signed-off-by: Geert Uytterhoeven Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index ab6c95b895b3..464b927e4fff 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -90,7 +90,7 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN config BCACHEFS_PATH_TRACEPOINTS bool "Extra btree_path tracepoints" - depends on BCACHEFS_FS + depends on BCACHEFS_FS && TRACING help Enable extra tracepoints for debugging btree_path operations; we don't normally want these enabled because they happen at very high rates. -- 2.51.0