From a55e2d78eac840cf156445492403ea3ac0a1b1eb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 22 Sep 2024 01:11:36 -0400 Subject: [PATCH 01/16] bcachefs: rcu_pending: don't invoke __call_rcu() under lock In userspace we don't (yet) have an SRCU implementation, so call_srcu() recurses. But we don't want to be invoking it under the lock anyways. Signed-off-by: Kent Overstreet --- fs/bcachefs/rcu_pending.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index 40a20192eee8..67522aa344a7 100644 --- a/fs/bcachefs/rcu_pending.c +++ b/fs/bcachefs/rcu_pending.c @@ -478,7 +478,9 @@ start_gp: */ if (!p->cb_armed) { p->cb_armed = true; + spin_unlock_irqrestore(&p->lock, flags); __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); + goto free_node; } else { __start_poll_synchronize_rcu(pending->srcu); } -- 2.51.0 From 179cdecf225dfe2ad88ca1fbcf776d7e6fc10c26 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 23:10:48 -0400 Subject: [PATCH 02/16] bcachefs: bch_verbose_ratelimited ratelimit "deleting unlinked inode" messages Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 8 ++++++++ fs/bcachefs/inode.c | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e94a83b8113e..7db81e182c3c 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -293,6 +293,8 @@ do { \ #define bch_info(c, fmt, ...) \ bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_info_ratelimited(c, fmt, ...) \ + bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_notice(c, fmt, ...) \ bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_warn(c, fmt, ...) \ @@ -352,6 +354,12 @@ do { \ bch_info(c, fmt, ##__VA_ARGS__); \ } while (0) +#define bch_verbose_ratelimited(c, fmt, ...) \ +do { \ + if ((c)->opts.verbose) \ + bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ +} while (0) + #define pr_verbose_init(opts, fmt, ...) \ do { \ if (opt_get(opts, verbose)) \ diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 039cb7a22244..43653cf050e9 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -1380,7 +1380,8 @@ again: NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ ret = may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass); if (ret > 0) { - bch_verbose(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); + bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", + k.k->p.offset, k.k->p.snapshot); ret = bch2_inode_rm_snapshot(trans, k.k->p.offset, k.k->p.snapshot); /* -- 2.51.0 From c07beca44ff181bad5928abccff6358ca9d9590b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:59:08 -0400 Subject: [PATCH 03/16] bcachefs: Pull disk accounting hooks out of trans_commit.c Also, fix a minor bug in the revert path, where we weren't checking the journal entry type correctly. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 35 +++++------------------------ fs/bcachefs/disk_accounting.h | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 9bf471fa4361..3d951846a1be 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -609,14 +609,6 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) return 0; } -static struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) -{ - return (struct bversion) { - .hi = res->seq >> 32, - .lo = (res->seq << 32) | (res->offset + offset), - }; -} - static inline int bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct btree_insert_entry **stopped_at, @@ -701,25 +693,14 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct jset_entry *entry = trans->journal_entries; percpu_down_read(&c->mark_lock); - for (entry = trans->journal_entries; entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); entry = vstruct_next(entry)) if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && entry->start->k.type == KEY_TYPE_accounting) { - BUG_ON(!trans->journal_res.ref); - - struct bkey_i_accounting *a = bkey_i_to_accounting(entry->start); - - a->k.bversion = journal_pos_to_bversion(&trans->journal_res, - (u64 *) entry - (u64 *) trans->journal_entries); - BUG_ON(bversion_zero(a->k.bversion)); - - if (likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal); - if (ret) - goto revert_fs_usage; - } + ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); + if (ret) + goto revert_fs_usage; } percpu_up_read(&c->mark_lock); @@ -833,13 +814,9 @@ revert_fs_usage: entry2 != entry; entry2 = vstruct_next(entry2)) if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && - entry2->start->k.type == KEY_TYPE_accounting) { - struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); - - bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); - bch2_accounting_neg(a); - } + entry2->start->k.type == KEY_TYPE_accounting) + bch2_accounting_trans_commit_revert(trans, + bkey_i_to_accounting(entry2->start), flags); percpu_up_read(&c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 4ea6c8a092bc..6639535dc91c 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_DISK_ACCOUNTING_H #define _BCACHEFS_DISK_ACCOUNTING_H +#include "btree_update.h" #include "eytzinger.h" #include "sb-members.h" @@ -204,6 +205,43 @@ static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, bch2_accounting_mem_read_counters(acc, idx, v, nr, false); } +static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) +{ + EBUG_ON(!res->ref); + + return (struct bversion) { + .hi = res->seq >> 32, + .lo = (res->seq << 32) | (res->offset + offset), + }; +} + +static inline int bch2_accounting_trans_commit_hook(struct btree_trans *trans, + struct bkey_i_accounting *a, + unsigned commit_flags) +{ + a->k.bversion = journal_pos_to_bversion(&trans->journal_res, + (u64 *) a - (u64 *) trans->journal_entries); + + EBUG_ON(bversion_zero(a->k.bversion)); + + return likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply)) + ? bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), BCH_ACCOUNTING_normal) + : 0; +} + +static inline void bch2_accounting_trans_commit_revert(struct btree_trans *trans, + struct bkey_i_accounting *a_i, + unsigned commit_flags) +{ + if (likely(!(commit_flags & BCH_TRANS_COMMIT_skip_accounting_apply))) { + struct bkey_s_accounting a = accounting_i_to_s(a_i); + + bch2_accounting_neg(a); + bch2_accounting_mem_mod_locked(trans, a.c, BCH_ACCOUNTING_normal); + bch2_accounting_neg(a); + } +} + int bch2_fs_replicas_usage_read(struct bch_fs *, darray_char *); int bch2_fs_accounting_read(struct bch_fs *, darray_char *, unsigned); void bch2_fs_accounting_to_text(struct printbuf *, struct bch_fs *); -- 2.51.0 From bf4e42d158baff9b67ef8f7bd3caa0801bee6374 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Fri, 27 Sep 2024 22:26:53 +0800 Subject: [PATCH 04/16] bcachefs: Delete dead code lock_fail_root_changed has not been used since commit 0d7009d7ca99 ("bcachefs: Delete old deadlock avoidance code") Remove it. Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 2 -- fs/bcachefs/errcode.h | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index eef9b89c561d..01152fd5ac57 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -748,8 +748,6 @@ static inline int btree_path_lock_root(struct btree_trans *trans, ret = btree_node_lock(trans, path, &b->c, path->level, lock_type, trace_ip); if (unlikely(ret)) { - if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed)) - continue; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; BUG(); diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 9c4fe5cdbfb7..e3b0ec7a0f73 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -164,7 +164,6 @@ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ x(0, backpointer_to_overwritten_btree_node) \ - x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ -- 2.51.0 From fe818d2039e74fac314e4032b51f057a7f313ad0 Mon Sep 17 00:00:00 2001 From: Thomas Bertschinger Date: Fri, 13 Sep 2024 18:11:22 -0600 Subject: [PATCH 05/16] bcachefs: move bch2_xattr_handlers to .rodata A series posted previously moved all of the `struct xattr_handler` tables to .rodata for each filesystem [1]. However, this appears to have been done shortly before bcachefs was merged, so bcachefs was missed at that time. Link: https://lkml.kernel.org/r/20230930050033.41174-1-wedsonaf@gmail.com [1] Cc: Wedson Almeida Filho Signed-off-by: Thomas Bertschinger Signed-off-by: Kent Overstreet --- fs/bcachefs/xattr.c | 2 +- fs/bcachefs/xattr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 952aca400faf..bf3c6bb50495 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -609,7 +609,7 @@ static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { #endif /* NO_BCACHEFS_FS */ -const struct xattr_handler *bch2_xattr_handlers[] = { +const struct xattr_handler * const bch2_xattr_handlers[] = { &bch_xattr_user_handler, &bch_xattr_trusted_handler, &bch_xattr_security_handler, diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index c188a5ad64ce..2c96de051f3e 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -44,6 +44,6 @@ int bch2_xattr_set(struct btree_trans *, subvol_inum, ssize_t bch2_xattr_list(struct dentry *, char *, size_t); -extern const struct xattr_handler *bch2_xattr_handlers[]; +extern const struct xattr_handler * const bch2_xattr_handlers[]; #endif /* _BCACHEFS_XATTR_H */ -- 2.51.0 From d6cf895847f60af83bad62b15f1da14abd331fae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2024 05:08:39 -0400 Subject: [PATCH 06/16] bcachefs: Remove unnecessary peek_slot() hash_lookup() used to return an errorcode, and a peek_slot() call was required to get the key it looked up. But we're adding fault injection for transaction restarts, so fix this old unconverted code. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 6b2ddbabe3e7..c96025b8b65d 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -170,7 +170,7 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans, if (ret) return ret; - struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); *target = le64_to_cpu(d.v->d_inum); *type = d.v->d_type; bch2_trans_iter_exit(trans, &iter); -- 2.51.0 From 4e1c6ac05a2348be6b74db63b406c10dd553f1ae Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:11:41 -0400 Subject: [PATCH 07/16] bcachefs: kill btree_trans_restart_nounlock() Redundant, the normal btree_trans_restart() doesn't unlock. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 7 +++---- fs/bcachefs/btree_trans_commit.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 0bda054f80d7..24406f723283 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -341,21 +341,20 @@ static inline void bch2_trans_verify_not_unlocked(struct btree_trans *trans) } __always_inline -static int btree_trans_restart_nounlock(struct btree_trans *trans, int err) +static int btree_trans_restart_ip(struct btree_trans *trans, int err, unsigned long ip) { BUG_ON(err <= 0); BUG_ON(!bch2_err_matches(-err, BCH_ERR_transaction_restart)); trans->restarted = err; - trans->last_restarted_ip = _THIS_IP_; + trans->last_restarted_ip = ip; return -err; } __always_inline static int btree_trans_restart(struct btree_trans *trans, int err) { - btree_trans_restart_nounlock(trans, err); - return -err; + return btree_trans_restart_ip(trans, err, _THIS_IP_); } bool bch2_btree_node_upgrade(struct btree_trans *, diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 3d951846a1be..b47f11881fe4 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -624,7 +624,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (race_fault()) { trace_and_count(c, trans_restart_fault_inject, trans, trace_ip); - return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject); + return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject); } /* -- 2.51.0 From 71008e5d6f097794d188d91e7c83c13f777b45ce Mon Sep 17 00:00:00 2001 From: Dennis Lam Date: Wed, 11 Sep 2024 21:16:28 -0400 Subject: [PATCH 08/16] docs: filesystems: bcachefs: fixed some spelling mistakes in the bcachefs coding style page Specifically, fixed spelling of "commit" and pluralization of last sentence. Signed-off-by: Dennis Lam Signed-off-by: Kent Overstreet --- Documentation/filesystems/bcachefs/CodingStyle.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/bcachefs/CodingStyle.rst b/Documentation/filesystems/bcachefs/CodingStyle.rst index 01de555e21d8..b29562a6bf55 100644 --- a/Documentation/filesystems/bcachefs/CodingStyle.rst +++ b/Documentation/filesystems/bcachefs/CodingStyle.rst @@ -183,4 +183,4 @@ even better as a code comment. A good code comment is wonderful, but even better is the comment that didn't need to exist because the code was so straightforward as to be obvious; organized into small clean and tidy modules, with clear and descriptive names -for functions and variable, where every line of code has a clear purpose. +for functions and variables, where every line of code has a clear purpose. -- 2.51.0 From 03525de50638ac0801e5296826e3cdebe4cb553f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 23 Sep 2024 16:20:29 +0200 Subject: [PATCH 09/16] bcachefs: Remove duplicate included headers The header files dirent_format.h and disk_groups_format.h are included twice. Remove the redundant includes and the following warnings reported by make includecheck: disk_groups_format.h is included more than once dirent_format.h is included more than once Reviewed-by: Hongbo Li Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 5004f6ba997c..6a67df2a2fcd 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -499,8 +499,6 @@ struct bch_sb_field { #include "disk_groups_format.h" #include "extents_format.h" #include "ec_format.h" -#include "dirent_format.h" -#include "disk_groups_format.h" #include "inode_format.h" #include "journal_seq_blacklist_format.h" #include "logged_ops_format.h" -- 2.51.0 From 55f524b706b48229685a61e8d5349b484f683e34 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 23 Sep 2024 16:44:53 +0200 Subject: [PATCH 10/16] bcachefs: Use FOREACH_ACL_ENTRY() macro to iterate over acl entries Use the existing FOREACH_ACL_ENTRY() macro to iterate over POSIX acl entries and remove the custom acl_for_each_entry() macro. Signed-off-by: Thorsten Blum Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 87f1be9d4db4..99487727ae64 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -184,11 +184,6 @@ invalid: return ERR_PTR(-EINVAL); } -#define acl_for_each_entry(acl, acl_e) \ - for (acl_e = acl->a_entries; \ - acl_e < acl->a_entries + acl->a_count; \ - acl_e++) - /* * Convert from in-memory to filesystem representation. */ @@ -199,11 +194,11 @@ bch2_acl_to_xattr(struct btree_trans *trans, { struct bkey_i_xattr *xattr; bch_acl_header *acl_header; - const struct posix_acl_entry *acl_e; + const struct posix_acl_entry *acl_e, *pe; void *outptr; unsigned nr_short = 0, nr_long = 0, acl_len, u64s; - acl_for_each_entry(acl, acl_e) { + FOREACH_ACL_ENTRY(acl_e, acl, pe) { switch (acl_e->e_tag) { case ACL_USER: case ACL_GROUP: @@ -241,7 +236,7 @@ bch2_acl_to_xattr(struct btree_trans *trans, outptr = (void *) acl_header + sizeof(*acl_header); - acl_for_each_entry(acl, acl_e) { + FOREACH_ACL_ENTRY(acl_e, acl, pe) { bch_acl_entry *entry = outptr; entry->e_tag = cpu_to_le16(acl_e->e_tag); -- 2.51.0 From eba3d7e57d2a4e0c1f28b5c2e3bb691279ab6eaf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 18:11:07 -0400 Subject: [PATCH 11/16] bcachefs: add more path idx debug asserts Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 24406f723283..550db3654f2c 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -23,6 +23,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path { unsigned idx = path - trans->paths; + EBUG_ON(idx >= trans->nr_paths); EBUG_ON(!test_bit(idx, trans->paths_allocated)); if (unlikely(path->ref == U8_MAX)) { bch2_dump_trans_paths_updates(trans); @@ -36,6 +37,7 @@ static inline void __btree_path_get(struct btree_trans *trans, struct btree_path static inline bool __btree_path_put(struct btree_trans *trans, struct btree_path *path, bool intent) { + EBUG_ON(path - trans->paths >= trans->nr_paths); EBUG_ON(!test_bit(path - trans->paths, trans->paths_allocated)); EBUG_ON(!path->ref); EBUG_ON(!path->intent_ref && intent); -- 2.51.0 From 18f5b84a04707565b926b3dcdfbc6f88ef53988a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 20:21:18 -0400 Subject: [PATCH 12/16] bcachefs: bch2_run_explicit_recovery_pass() returns different error when not in recovery if we're not in recovery then there's no way to rewind recovery - give this a different errcode so that any error messages will give us a better idea of what happened. Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 4 +++- fs/bcachefs/recovery_passes.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index e3b0ec7a0f73..40bf1e5775a9 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -172,7 +172,9 @@ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ - x(0, restart_recovery) \ + x(EINVAL, restart_recovery) \ + x(EINVAL, not_in_recovery) \ + x(EINVAL, cannot_rewind_recovery) \ x(0, data_update_done) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index dff589ddc984..1cc010bf1695 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -106,6 +106,9 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, if (c->opts.recovery_passes & BIT_ULL(pass)) return 0; + if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) + return -BCH_ERR_not_in_recovery; + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); -- 2.51.0 From 26c79fdc580d27c08c050789c523ce89e9a0da44 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 23:22:48 -0400 Subject: [PATCH 13/16] bcachefs: lru, accounting are alloc btrees They can be regenerated by fsck and don't require a btree node scan, like other alloc btrees. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 6a67df2a2fcd..79a80a78c2d8 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1359,6 +1359,8 @@ static inline bool btree_id_is_alloc(enum btree_id id) case BTREE_ID_need_discard: case BTREE_ID_freespace: case BTREE_ID_bucket_gens: + case BTREE_ID_lru: + case BTREE_ID_accounting: return true; default: return false; -- 2.51.0 From d65d126c0256cf2349e118a3e8627d8281295eee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 23:27:59 -0400 Subject: [PATCH 14/16] bcachefs: Add locking for bch_fs.curr_recovery_pass Recovery can rewind in certain situations - when we discover we need to run a pass that doesn't normally run. This can happen from another thread for btree node read errors, so we need a bit of locking. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/recovery_passes.c | 76 ++++++++++++++++++++++++++--------- fs/bcachefs/super.c | 1 + 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 7db81e182c3c..fbd89f91625d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1060,6 +1060,7 @@ struct bch_fs { u64 recovery_passes_complete; /* never rewinds version of curr_recovery_pass */ enum bch_recovery_pass recovery_pass_done; + spinlock_t recovery_pass_lock; struct semaphore online_fsck_mutex; /* DEBUG JUNK */ diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 1cc010bf1695..5e7722cc0879 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -100,8 +100,8 @@ u64 bch2_recovery_passes_from_stable(u64 v) /* * For when we need to rewind recovery passes and run a pass we skipped: */ -int bch2_run_explicit_recovery_pass(struct bch_fs *c, - enum bch_recovery_pass pass) +static int __bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) { if (c->opts.recovery_passes & BIT_ULL(pass)) return 0; @@ -109,6 +109,13 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, if (c->curr_recovery_pass == ARRAY_SIZE(recovery_pass_fns)) return -BCH_ERR_not_in_recovery; + if (pass < BCH_RECOVERY_PASS_set_may_go_rw && + c->curr_recovery_pass >= BCH_RECOVERY_PASS_set_may_go_rw) { + bch_info(c, "need recovery pass %s (%u), but already rw", + bch2_recovery_passes[pass], pass); + return -BCH_ERR_cannot_rewind_recovery; + } + bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)", bch2_recovery_passes[pass], pass, bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass); @@ -124,6 +131,16 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, } } +int bch2_run_explicit_recovery_pass(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + unsigned long flags; + spin_lock_irqsave(&c->recovery_pass_lock, flags); + int ret = __bch2_run_explicit_recovery_pass(c, pass); + spin_unlock_irqrestore(&c->recovery_pass_lock, flags); + return ret; +} + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { @@ -237,30 +254,51 @@ int bch2_run_recovery_passes(struct bch_fs *c) c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw; while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) { + spin_lock_irq(&c->recovery_pass_lock); + unsigned pass = c->curr_recovery_pass; + if (c->opts.recovery_pass_last && - c->curr_recovery_pass > c->opts.recovery_pass_last) + c->curr_recovery_pass > c->opts.recovery_pass_last) { + spin_unlock_irq(&c->recovery_pass_lock); break; + } - if (should_run_recovery_pass(c, c->curr_recovery_pass)) { - unsigned pass = c->curr_recovery_pass; - - ret = bch2_run_recovery_pass(c, c->curr_recovery_pass) ?: - bch2_journal_flush(&c->journal); - if (bch2_err_matches(ret, BCH_ERR_restart_recovery) || - (ret && c->curr_recovery_pass < pass)) - continue; - if (ret) - break; - - c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass); + if (!should_run_recovery_pass(c, pass)) { + c->curr_recovery_pass++; + c->recovery_pass_done = max(c->recovery_pass_done, pass); + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } + spin_unlock_irq(&c->recovery_pass_lock); + + ret = bch2_run_recovery_pass(c, pass) ?: + bch2_journal_flush(&c->journal); + + spin_lock_irq(&c->recovery_pass_lock); + if (c->curr_recovery_pass < pass) { + /* + * bch2_run_explicit_recovery_pass() was called: we + * can't always catch -BCH_ERR_restart_recovery because + * it may have been called from another thread (btree + * node read completion) + */ + spin_unlock_irq(&c->recovery_pass_lock); + continue; + } else if (c->curr_recovery_pass == pass) { + c->curr_recovery_pass++; + } else { + BUG(); } + spin_unlock_irq(&c->recovery_pass_lock); - c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass); + if (ret) + break; - if (!test_bit(BCH_FS_error, &c->flags)) - bch2_clear_recovery_pass_required(c, c->curr_recovery_pass); + c->recovery_passes_complete |= BIT_ULL(pass); + c->recovery_pass_done = max(c->recovery_pass_done, pass); - c->curr_recovery_pass++; + if (!test_bit(BCH_FS_error, &c->flags)) + bch2_clear_recovery_pass_required(c, pass); } return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 17442df7326d..d6411324cd3f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -766,6 +766,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) refcount_set(&c->ro_ref, 1); init_waitqueue_head(&c->ro_ref_wait); + spin_lock_init(&c->recovery_pass_lock); sema_init(&c->online_fsck_mutex, 1); init_rwsem(&c->gc_lock); -- 2.51.0 From e3c43dbe8e5ff64e77b6f927b32f489bccc7d75e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 21 Sep 2024 23:40:01 -0400 Subject: [PATCH 15/16] bcachefs: bch2_btree_lost_data() now uses run_explicit_rceovery_pass_persistent() Also get a bit more fine grained about which passes to run for which btrees. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 63 +++++++++++++++++++++++------------ fs/bcachefs/recovery.h | 2 +- fs/bcachefs/recovery_passes.c | 11 ++++++ fs/bcachefs/recovery_passes.h | 1 + 4 files changed, 54 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3c7f941dde39..b1c83e72c0d8 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -34,21 +34,52 @@ #define QSTR(n) { { { .len = strlen(n) } }, .name = n } -void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { - if (btree >= BTREE_ID_NR_MAX) - return; - u64 b = BIT_ULL(btree); + int ret = 0; + + mutex_lock(&c->sb_lock); if (!(c->sb.btrees_lost_data & b)) { bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); - - mutex_lock(&c->sb_lock); bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); } + + switch (btree) { + case BTREE_ID_alloc: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_backpointers: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + goto out; + case BTREE_ID_need_discard: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_freespace: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_bucket_gens: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_lru: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_accounting: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + goto out; + default: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + goto out; + } +out: + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; } /* for -o reconstruct_alloc: */ @@ -524,22 +555,10 @@ static int read_btree_roots(struct bch_fs *c) c, btree_root_read_error, "error reading btree root %s l=%u: %s", bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) { - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + if (btree_id_is_alloc(i)) r->error = 0; - } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { - bch_info(c, "will run btree node scan"); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - } - ret = 0; - bch2_btree_lost_data(c, i); + ret = bch2_btree_lost_data(c, i); } } diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index 4bf818de1f2f..b0d55754b21b 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_RECOVERY_H #define _BCACHEFS_RECOVERY_H -void bch2_btree_lost_data(struct bch_fs *, enum btree_id); +int bch2_btree_lost_data(struct bch_fs *, enum btree_id); int bch2_journal_replay(struct bch_fs *); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 5e7722cc0879..1240c5c19fea 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -141,6 +141,17 @@ int bch2_run_explicit_recovery_pass(struct bch_fs *c, return ret; } +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *c, + enum bch_recovery_pass pass) +{ + lockdep_assert_held(&c->sb_lock); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + __set_bit_le64(bch2_recovery_pass_to_stable(pass), ext->recovery_passes_required); + + return bch2_run_explicit_recovery_pass(c, pass); +} + int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c, enum bch_recovery_pass pass) { diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h index 99b464e127b8..7d7339c8fa29 100644 --- a/fs/bcachefs/recovery_passes.h +++ b/fs/bcachefs/recovery_passes.h @@ -9,6 +9,7 @@ u64 bch2_recovery_passes_from_stable(u64 v); u64 bch2_fsck_recovery_passes(void); int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass); +int bch2_run_explicit_recovery_pass_persistent_locked(struct bch_fs *, enum bch_recovery_pass); int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass); int bch2_run_online_recovery_passes(struct bch_fs *); -- 2.51.0 From 0269e27ce3f7be2bd1e565cc17a88e4074facad1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 9 Oct 2024 21:26:05 -0400 Subject: [PATCH 16/16] bcachefs: improved bkey_val_copy() Factor out some common code, add typechecking. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.h | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 550db3654f2c..dda07a320488 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -594,13 +594,18 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +static inline void __bkey_val_copy(void *dst_v, unsigned dst_size, struct bkey_s_c src_k) +{ + unsigned b = min_t(unsigned, dst_size, bkey_val_bytes(src_k.k)); + memcpy(dst_v, src_k.v, b); + if (unlikely(b < dst_size)) + memset(dst_v + b, 0, dst_size - b); +} + #define bkey_val_copy(_dst_v, _src_k) \ do { \ - unsigned b = min_t(unsigned, sizeof(*_dst_v), \ - bkey_val_bytes(_src_k.k)); \ - memcpy(_dst_v, _src_k.v, b); \ - if (b < sizeof(*_dst_v)) \ - memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ + BUILD_BUG_ON(!__typecheck(*_dst_v, *_src_k.v)); \ + __bkey_val_copy(_dst_v, sizeof(*_dst_v), _src_k.s_c); \ } while (0) static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, @@ -609,17 +614,10 @@ static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned val_size, void *val) { struct btree_iter iter; - struct bkey_s_c k; - int ret; - - k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); - ret = bkey_err(k); + struct bkey_s_c k = __bch2_bkey_get_iter(trans, &iter, btree_id, pos, flags, type); + int ret = bkey_err(k); if (!ret) { - unsigned b = min_t(unsigned, bkey_val_bytes(k.k), val_size); - - memcpy(val, k.v, b); - if (unlikely(b < sizeof(*val))) - memset((void *) val + b, 0, sizeof(*val) - b); + __bkey_val_copy(val, val_size, k); bch2_trans_iter_exit(trans, &iter); } -- 2.51.0