From 1302eeb7c5db1b9ac9db9d29c39e6a46bda718a6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:20:42 -0500 Subject: [PATCH 01/16] bcachefs: bkey_fsck_err now respects errors_silent Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index abaa9570cd62..9e34374960f3 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -476,11 +476,16 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, return -BCH_ERR_fsck_delete_bkey; unsigned fsck_flags = 0; - if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) + if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { + if (test_bit(err, c->sb.errors_silent)) + return -BCH_ERR_fsck_delete_bkey; + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; + } + if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) + fsck_flags |= fsck_flags_extra[err]; struct printbuf buf = PRINTBUF; - va_list args; prt_printf(&buf, "invalid bkey in %s btree=", bch2_bkey_validate_contexts[from.from]); @@ -489,9 +494,12 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, "\n "); + + va_list args; va_start(args, fmt); prt_vprintf(&buf, fmt, args); va_end(args); + prt_str(&buf, ": delete?"); int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); -- 2.51.0 From b29769c72d0b6f842ae7a1e10e9cfb9a8fcc87fa Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:17:00 -0500 Subject: [PATCH 02/16] bcachefs: If we did repair on a btree node, make sure we rewrite it Ensure that "invalid bkey" repair gets persisted, so that it doesn't repeatedly spam the logs. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index eedcb2445b99..9df9fc1c5e2b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -997,6 +997,7 @@ drop_this_key: got_good_key: le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); + set_btree_node_need_rewrite(b); } fsck_err: printbuf_exit(&buf); @@ -1259,6 +1260,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); + set_btree_node_need_rewrite(b); continue; } if (ret) @@ -1372,15 +1374,18 @@ start: rb->start_time); bio_put(&rb->bio); - if (saw_error && + if ((saw_error || + btree_node_need_rewrite(b)) && !btree_node_read_error(b) && c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { - printbuf_reset(&buf); - bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); - prt_str(&buf, " "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", - __func__, buf.buf); + if (saw_error) { + printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); + prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch_err_ratelimited(c, "%s: rewriting btree node at due to error\n %s", + __func__, buf.buf); + } bch2_btree_node_rewrite_async(c, b); } -- 2.51.0 From c1f618f4f7cc7b8360e7362d3d18f3e244ded364 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 29 Nov 2024 18:53:26 -0500 Subject: [PATCH 03/16] bcachefs: bch2_async_btree_node_rewrites_flush() Add a method to flush btree node rewrites at the end of recovery, to ensure that corrected errors are persisted. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 7 +- fs/bcachefs/btree_update_interior.c | 153 ++++++++++++++++------------ fs/bcachefs/btree_update_interior.h | 1 + fs/bcachefs/recovery.c | 2 + 4 files changed, 97 insertions(+), 66 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index c16937e54734..b12c9c78beec 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -829,9 +829,10 @@ struct bch_fs { struct work_struct btree_interior_update_work; struct workqueue_struct *btree_node_rewrite_worker; - - struct list_head pending_node_rewrites; - struct mutex pending_node_rewrites_lock; + struct list_head btree_node_rewrites; + struct list_head btree_node_rewrites_pending; + spinlock_t btree_node_rewrites_lock; + struct closure_waitlist btree_node_rewrites_wait; /* btree_io.c: */ spinlock_t btree_write_error_lock; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 5eabd532e388..f2a1d5d3d8d5 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2206,42 +2206,50 @@ struct async_btree_rewrite { struct list_head list; enum btree_id btree_id; unsigned level; - struct bpos pos; - __le64 seq; + struct bkey_buf key; }; static int async_btree_node_rewrite_trans(struct btree_trans *trans, struct async_btree_rewrite *a) { - struct bch_fs *c = trans->c; struct btree_iter iter; - struct btree *b; - int ret; - - bch2_trans_node_iter_init(trans, &iter, a->btree_id, a->pos, + bch2_trans_node_iter_init(trans, &iter, + a->btree_id, a->key.k->k.p, BTREE_MAX_DEPTH, a->level, 0); - b = bch2_btree_iter_peek_node(&iter); - ret = PTR_ERR_OR_ZERO(b); + struct btree *b = bch2_btree_iter_peek_node(&iter); + int ret = PTR_ERR_OR_ZERO(b); if (ret) goto out; - if (!b || b->data->keys.seq != a->seq) { + bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); + ret = found + ? bch2_btree_node_rewrite(trans, &iter, b, 0) + : -ENOENT; + +#if 0 + /* Tracepoint... */ + if (!ret || ret == -ENOENT) { + struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; - if (b) - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - else - prt_str(&buf, "(null"); - bch_info(c, "%s: node to rewrite not found:, searching for seq %llu, got\n%s", - __func__, a->seq, buf.buf); + if (!ret) { + prt_printf(&buf, "rewrite node:\n "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); + } else { + prt_printf(&buf, "node to rewrite not found:\n want: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); + prt_printf(&buf, "\n got: "); + if (b) + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + else + prt_str(&buf, "(null)"); + } + bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - goto out; } - - ret = bch2_btree_node_rewrite(trans, &iter, b, 0); +#endif out: bch2_trans_iter_exit(trans, &iter); - return ret; } @@ -2252,81 +2260,96 @@ static void async_btree_node_rewrite_work(struct work_struct *work) struct bch_fs *c = a->c; int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); - bch_err_fn_ratelimited(c, ret); + if (ret != -ENOENT) + bch_err_fn_ratelimited(c, ret); + + spin_lock(&c->btree_node_rewrites_lock); + list_del(&a->list); + spin_unlock(&c->btree_node_rewrites_lock); + + closure_wake_up(&c->btree_node_rewrites_wait); + + bch2_bkey_buf_exit(&a->key, c); bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite); kfree(a); } void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) { - struct async_btree_rewrite *a; - int ret; - - a = kmalloc(sizeof(*a), GFP_NOFS); - if (!a) { - bch_err(c, "%s: error allocating memory", __func__); + struct async_btree_rewrite *a = kmalloc(sizeof(*a), GFP_NOFS); + if (!a) return; - } a->c = c; a->btree_id = b->c.btree_id; a->level = b->c.level; - a->pos = b->key.k.p; - a->seq = b->data->keys.seq; INIT_WORK(&a->work, async_btree_node_rewrite_work); - if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { - mutex_lock(&c->pending_node_rewrites_lock); - list_add(&a->list, &c->pending_node_rewrites); - mutex_unlock(&c->pending_node_rewrites_lock); - return; - } + bch2_bkey_buf_init(&a->key); + bch2_bkey_buf_copy(&a->key, c, &b->key); - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { - if (test_bit(BCH_FS_started, &c->flags)) { - bch_err(c, "%s: error getting c->writes ref", __func__); - kfree(a); - return; - } + bool now = false, pending = false; - ret = bch2_fs_read_write_early(c); - bch_err_msg(c, ret, "going read-write"); - if (ret) { - kfree(a); - return; - } + spin_lock(&c->btree_node_rewrites_lock); + if (bch2_write_ref_tryget(c, BCH_WRITE_REF_node_rewrite)) { + list_add(&a->list, &c->btree_node_rewrites); + now = true; + } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { + list_add(&a->list, &c->btree_node_rewrites_pending); + pending = true; + } + spin_unlock(&c->btree_node_rewrites_lock); - bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); + if (now) { + queue_work(c->btree_node_rewrite_worker, &a->work); + } else if (pending) { + /* bch2_do_pending_node_rewrites will execute */ + } else { + bch2_bkey_buf_exit(&a->key, c); + kfree(a); } +} - queue_work(c->btree_node_rewrite_worker, &a->work); +void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) +{ + closure_wait_event(&c->btree_node_rewrites_wait, + list_empty(&c->btree_node_rewrites)); } void bch2_do_pending_node_rewrites(struct bch_fs *c) { - struct async_btree_rewrite *a, *n; - - mutex_lock(&c->pending_node_rewrites_lock); - list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { - list_del(&a->list); + while (1) { + spin_lock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a = + list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + if (a) + list_add(&a->list, &c->btree_node_rewrites); + spin_unlock(&c->btree_node_rewrites_lock); + + if (!a) + break; bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite); queue_work(c->btree_node_rewrite_worker, &a->work); } - mutex_unlock(&c->pending_node_rewrites_lock); } void bch2_free_pending_node_rewrites(struct bch_fs *c) { - struct async_btree_rewrite *a, *n; + while (1) { + spin_lock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a = + list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + spin_unlock(&c->btree_node_rewrites_lock); - mutex_lock(&c->pending_node_rewrites_lock); - list_for_each_entry_safe(a, n, &c->pending_node_rewrites, list) { - list_del(&a->list); + if (!a) + break; + bch2_bkey_buf_exit(&a->key, c); kfree(a); } - mutex_unlock(&c->pending_node_rewrites_lock); } static int __bch2_btree_node_update_key(struct btree_trans *trans, @@ -2683,6 +2706,9 @@ void bch2_btree_reserve_cache_to_text(struct printbuf *out, struct bch_fs *c) void bch2_fs_btree_interior_update_exit(struct bch_fs *c) { + WARN_ON(!list_empty(&c->btree_node_rewrites)); + WARN_ON(!list_empty(&c->btree_node_rewrites_pending)); + if (c->btree_node_rewrite_worker) destroy_workqueue(c->btree_node_rewrite_worker); if (c->btree_interior_update_worker) @@ -2698,8 +2724,9 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c) mutex_init(&c->btree_interior_update_lock); INIT_WORK(&c->btree_interior_update_work, btree_interior_update_work); - INIT_LIST_HEAD(&c->pending_node_rewrites); - mutex_init(&c->pending_node_rewrites_lock); + INIT_LIST_HEAD(&c->btree_node_rewrites); + INIT_LIST_HEAD(&c->btree_node_rewrites_pending); + spin_lock_init(&c->btree_node_rewrites_lock); } int bch2_fs_btree_interior_update_init(struct bch_fs *c) diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 1c6cf3e2e6a9..7930ffea3075 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -334,6 +334,7 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *, struct jset_entry *); struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, struct jset_entry *, unsigned long); +void bch2_async_btree_node_rewrites_flush(struct bch_fs *); void bch2_do_pending_node_rewrites(struct bch_fs *); void bch2_free_pending_node_rewrites(struct bch_fs *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c50dede64785..a342744fd275 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -931,6 +931,8 @@ use_clean: /* in case we don't run journal replay, i.e. norecovery mode */ set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_async_btree_node_rewrites_flush(c); + /* fsync if we fixed errors */ if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_journal_flush_all_pins(&c->journal); -- 2.51.0 From 511ddcdb2d5e0bdb73c7968e4215268f4572a984 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 30 Nov 2024 23:27:45 -0500 Subject: [PATCH 04/16] bcachefs: fix bch2_journal_key_insert_take() seq Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_journal_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index de3db161d6ab..6d25e3f85ce8 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -259,7 +259,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, * Ensure these keys are done last by journal replay, to unblock * journal reclaim: */ - .journal_seq = U32_MAX, + .journal_seq = U64_MAX, }; struct journal_keys *keys = &c->journal_keys; size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); -- 2.51.0 From 5cdaec193a85e32235e7dccb95c085acc50b8dbd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Dec 2024 16:39:54 -0500 Subject: [PATCH 05/16] bcachefs: Improve "unable to allocate journal write" message Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 1627f3e16517..bb69d80886b5 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -2036,8 +2036,9 @@ CLOSURE_CALLBACK(bch2_journal_write) struct printbuf buf = PRINTBUF; buf.atomic++; - prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), + prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), le64_to_cpu(w->data->seq), + vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); __bch2_journal_debug_to_text(&buf, j); spin_unlock(&j->lock); -- 2.51.0 From 9c22dd02ae8b80bf662ab409091731cfb9a09348 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 2 Dec 2024 23:36:38 -0500 Subject: [PATCH 06/16] bcachefs: Fix allocating too big journal entry The "journal space available" calculations didn't take into account mismatched bucket sizes; we need to take the minimum space available out of our devices. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_reclaim.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 1aabbbe328d9..b7936ad3ae7f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -140,6 +140,7 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned pos, nr_devs = 0; struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX]; + unsigned min_bucket_size = U32_MAX; BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space)); @@ -148,6 +149,8 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne if (!ca->journal.nr) continue; + min_bucket_size = min(min_bucket_size, ca->mi.bucket_size); + space = journal_dev_space_available(j, ca, from); if (!space.next_entry) continue; @@ -167,7 +170,9 @@ static struct journal_space __journal_space_available(struct journal *j, unsigne * We sorted largest to smallest, and we want the smallest out of the * @nr_devs_want largest devices: */ - return dev_space[nr_devs_want - 1]; + space = dev_space[nr_devs_want - 1]; + space.next_entry = min(space.next_entry, min_bucket_size); + return space; } void bch2_journal_space_available(struct journal *j) -- 2.51.0 From d36b3e74b65f4ec68a38bdb717d94b32a81a355f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 3 Dec 2024 17:40:10 +0100 Subject: [PATCH 07/16] bcachefs: BCACHEFS_PATH_TRACEPOINTS should depend on TRACING When tracing is disabled, there is no point in asking the user about enabling extra btree_path tracepoints in bcachefs. Fixes: 32ed4a620c5405be ("bcachefs: Btree path tracepoints") Signed-off-by: Geert Uytterhoeven Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index ab6c95b895b3..464b927e4fff 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -90,7 +90,7 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN config BCACHEFS_PATH_TRACEPOINTS bool "Extra btree_path tracepoints" - depends on BCACHEFS_FS + depends on BCACHEFS_FS && TRACING help Enable extra tracepoints for debugging btree_path operations; we don't normally want these enabled because they happen at very high rates. -- 2.51.0 From ad0b2544ec827e03b75143bed83338bda7f6fe21 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 3 Dec 2024 21:22:26 -0500 Subject: [PATCH 08/16] bcachefs: rcu_pending now works in userspace Introduce a typedef to handle the difference between unsigned long/struct urcu_gp_poll_state. Signed-off-by: Kent Overstreet --- fs/bcachefs/rcu_pending.c | 40 ++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index 67522aa344a7..bef2aa1b8bcd 100644 --- a/fs/bcachefs/rcu_pending.c +++ b/fs/bcachefs/rcu_pending.c @@ -25,21 +25,37 @@ enum rcu_pending_special { #define RCU_PENDING_KVFREE_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE) #define RCU_PENDING_CALL_RCU_FN ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU) -static inline unsigned long __get_state_synchronize_rcu(struct srcu_struct *ssp) +#ifdef __KERNEL__ +typedef unsigned long rcu_gp_poll_state_t; + +static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) +{ + return l == r; +} +#else +typedef struct urcu_gp_poll_state rcu_gp_poll_state_t; + +static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r) +{ + return l.grace_period_id == r.grace_period_id; +} +#endif + +static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? get_state_synchronize_srcu(ssp) : get_state_synchronize_rcu(); } -static inline unsigned long __start_poll_synchronize_rcu(struct srcu_struct *ssp) +static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp) { return ssp ? start_poll_synchronize_srcu(ssp) : start_poll_synchronize_rcu(); } -static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, unsigned long cookie) +static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie) { return ssp ? poll_state_synchronize_srcu(ssp, cookie) @@ -71,13 +87,13 @@ struct rcu_pending_seq { GENRADIX(struct rcu_head *) objs; size_t nr; struct rcu_head **cursor; - unsigned long seq; + rcu_gp_poll_state_t seq; }; struct rcu_pending_list { struct rcu_head *head; struct rcu_head *tail; - unsigned long seq; + rcu_gp_poll_state_t seq; }; struct rcu_pending_pcpu { @@ -316,10 +332,10 @@ static void rcu_pending_rcu_cb(struct rcu_head *rcu) } static __always_inline struct rcu_pending_seq * -get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) +get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq) { darray_for_each_reverse(p->objs, objs) - if (objs->seq == seq) + if (rcu_gp_poll_cookie_eq(objs->seq, seq)) return objs; if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC)) @@ -329,7 +345,7 @@ get_object_radix(struct rcu_pending_pcpu *p, unsigned long seq) } static noinline bool -rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, +rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq, struct rcu_head *head, void *ptr, unsigned long *flags) { @@ -364,7 +380,7 @@ rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, unsigned long seq, again: for (struct rcu_pending_list *i = p->lists; i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) { - if (i->seq == seq) { + if (rcu_gp_poll_cookie_eq(i->seq, seq)) { rcu_pending_list_add(i, head); return false; } @@ -408,7 +424,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, struct rcu_pending_pcpu *p; struct rcu_pending_seq *objs; struct genradix_node *new_node = NULL; - unsigned long seq, flags; + unsigned long flags; bool start_gp = false; BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); @@ -416,7 +432,7 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, local_irq_save(flags); p = this_cpu_ptr(pending->p); spin_lock(&p->lock); - seq = __get_state_synchronize_rcu(pending->srcu); + rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && unlikely(process_finished_items(pending, p, flags))) @@ -478,9 +494,7 @@ start_gp: */ if (!p->cb_armed) { p->cb_armed = true; - spin_unlock_irqrestore(&p->lock, flags); __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb); - goto free_node; } else { __start_poll_synchronize_rcu(pending->srcu); } -- 2.51.0 From f78760dede23affb50a6fe62b1230849e1a5d15f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Dec 2024 21:35:11 -0500 Subject: [PATCH 09/16] bcachefs: logged ops only use inum 0 of logged ops btree we wish to use the logged ops btree for other items that aren't strictly logged ops: cursors for inode allocation There's no reason to create another cached btree for inode allocator cursors - so reserve different parts of the keyspace for different purposes. Older versions will ignore or delete the cursors. Signed-off-by: Kent Overstreet --- fs/bcachefs/logged_ops.c | 10 +++++----- fs/bcachefs/logged_ops_format.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 60e00702d1a4..1ac51af16299 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -63,8 +63,9 @@ fsck_err: int bch2_resume_logged_ops(struct bch_fs *c) { int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_logged_ops, POS_MIN, + for_each_btree_key_max(trans, iter, + BTREE_ID_logged_ops, + POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX), BTREE_ITER_prefetch, k, resume_logged_op(trans, &iter, k))); bch_err_fn(c, ret); @@ -74,9 +75,8 @@ int bch2_resume_logged_ops(struct bch_fs *c) static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) { struct btree_iter iter; - int ret; - - ret = bch2_bkey_get_empty_slot(trans, &iter, BTREE_ID_logged_ops, POS_MAX); + int ret = bch2_bkey_get_empty_slot(trans, &iter, + BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX)); if (ret) return ret; diff --git a/fs/bcachefs/logged_ops_format.h b/fs/bcachefs/logged_ops_format.h index 6a4bf7129dba..0b370a963ac6 100644 --- a/fs/bcachefs/logged_ops_format.h +++ b/fs/bcachefs/logged_ops_format.h @@ -2,6 +2,8 @@ #ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H #define _BCACHEFS_LOGGED_OPS_FORMAT_H +#define LOGGED_OPS_INUM 0 + struct bch_logged_op_truncate { struct bch_val v; __le32 subvol; -- 2.51.0 From 8dabb19ff4b802131ebfc1024de132b601c3c23d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 3 Dec 2024 22:03:18 -0500 Subject: [PATCH 10/16] bcachefs: Simplify disk accounting validate late The validate late path was iterating over accounting entries in eytzinger order, which is unnecessarily tricky when we may have to remove entries. Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.h | 2 +- fs/bcachefs/disk_accounting.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 8f4c3f0665c4..c6151495985f 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -83,7 +83,7 @@ int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); for (typeof(&(_d).data[0]) _i = (_d).data; _i < (_d).data + (_d).nr; _i++) #define darray_for_each_reverse(_d, _i) \ - for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data; --_i) + for (typeof(&(_d).data[0]) _i = (_d).data + (_d).nr - 1; _i >= (_d).data && (_d).nr; --_i) #define darray_init(_d) \ do { \ diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 71c49a7ee2fe..a915d9dc8de4 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -765,15 +765,16 @@ int bch2_accounting_read(struct bch_fs *c) keys->gap = keys->nr = dst - keys->data; percpu_down_write(&c->mark_lock); - unsigned i = 0; - while (i < acc->k.nr) { - unsigned idx = inorder_to_eytzinger0(i, acc->k.nr); + darray_for_each_reverse(acc->k, i) { struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos); + bpos_to_disk_accounting_pos(&acc_k, i->pos); u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false); + memset(v, 0, sizeof(v)); + + for (unsigned j = 0; j < i->nr_counters; j++) + v[j] = percpu_u64_get(i->v[0] + j); /* * If the entry counters are zeroed, it should be treated as @@ -782,26 +783,25 @@ int bch2_accounting_read(struct bch_fs *c) * Remove it, so that if it's re-added it gets re-marked in the * superblock: */ - ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters) + ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters) ? -BCH_ERR_remove_disk_accounting_entry - : bch2_disk_accounting_validate_late(trans, acc_k, - v, acc->k.data[idx].nr_counters); + : bch2_disk_accounting_validate_late(trans, acc_k, v, i->nr_counters); if (ret == -BCH_ERR_remove_disk_accounting_entry) { - free_percpu(acc->k.data[idx].v[0]); - free_percpu(acc->k.data[idx].v[1]); - darray_remove_item(&acc->k, &acc->k.data[idx]); - eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, NULL); + free_percpu(i->v[0]); + free_percpu(i->v[1]); + darray_remove_item(&acc->k, i); ret = 0; continue; } if (ret) goto fsck_err; - i++; } + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); + preempt_disable(); struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); -- 2.51.0 From e3474394eb1a0e4ebf4a5e0e2531671fa96add16 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 01:19:28 -0500 Subject: [PATCH 11/16] bcachefs: Advance to next bp on BCH_ERR_backpointer_to_overwritten_btree_node Don't spin. Fixes: de95cc201a97 ("bcachefs: Kill bch2_get_next_backpointer()") Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 460175464762..6f21e36d89f7 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -785,7 +785,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, b = bch2_backpointer_get_node(trans, bp, &iter); ret = PTR_ERR_OR_ZERO(b); if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) - continue; + goto next; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) -- 2.51.0 From 400af9a398186851103e27d848ef42be8870072b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 17:44:25 -0500 Subject: [PATCH 12/16] bcachefs: trace_accounting_mem_insert Add a tracepoint for inserting new accounting entries: we're seeing odd spinning behaviour in accounting read. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 8 ++++++++ fs/bcachefs/trace.h | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index a915d9dc8de4..a0061bcf9159 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -324,6 +324,14 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); + + if (trace_accounting_mem_insert_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_accounting_to_text(&buf, c, a.s_c); + trace_accounting_mem_insert(c, buf.buf); + printbuf_exit(&buf); + } return 0; err: free_percpu(n.v[1]); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 2d5932d2881e..7baf66beee22 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -199,6 +199,30 @@ DECLARE_EVENT_CLASS(bio, (unsigned long long)__entry->sector, __entry->nr_sector) ); +/* disk_accounting.c */ + +TRACE_EVENT(accounting_mem_insert, + TP_PROTO(struct bch_fs *c, const char *acc), + TP_ARGS(c, acc), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(unsigned, new_nr ) + __string(acc, acc ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->new_nr = c->accounting.k.nr; + __assign_str(acc); + ), + + TP_printk("%d,%d entries %u added %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->new_nr, + __get_str(acc)) +); + /* fs.c: */ TRACE_EVENT(bch2_sync_fs, TP_PROTO(struct super_block *sb, int wait), -- 2.51.0 From 3f1cf04ff9877bf043795d05bb6704d0a85bcd80 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 17:48:06 -0500 Subject: [PATCH 13/16] bcachefs: Silence "unable to allocate journal write" if we're already RO Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index bb69d80886b5..e7a43400a587 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -2032,7 +2032,7 @@ CLOSURE_CALLBACK(bch2_journal_write) bch2_journal_do_discards(j); } - if (ret) { + if (ret && !bch2_journal_error(j)) { struct printbuf buf = PRINTBUF; buf.atomic++; @@ -2044,8 +2044,9 @@ CLOSURE_CALLBACK(bch2_journal_write) spin_unlock(&j->lock); bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); - goto err; } + if (ret) + goto err; /* * write is allocated, no longer need to account for it in -- 2.51.0 From 6728f8f829cf68ae25cc664d3b1ba7034bc81fd4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 17:53:38 -0500 Subject: [PATCH 14/16] bcachefs: BCH_ERR_insufficient_journal_devices kill another standard error code use Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/journal_io.c | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 47387f7d6202..5e4dd85ac669 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -263,6 +263,7 @@ x(EIO, missing_indirect_extent) \ x(EIO, invalidate_stripe_to_dev) \ x(EIO, no_encryption_key) \ + x(EIO, insufficient_journal_devices) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_fixable) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_want_retry) \ x(BCH_ERR_btree_node_read_err, btree_node_read_err_must_retry) \ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index e7a43400a587..e5fce5e497f2 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1503,8 +1503,7 @@ retry: devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); - __journal_write_alloc(j, w, &devs_sorted, - sectors, &replicas, replicas_want); + __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want); if (replicas >= replicas_want) goto done; @@ -1544,7 +1543,7 @@ done: BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); - return replicas >= replicas_need ? 0 : -EROFS; + return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices; } static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) -- 2.51.0 From 49833ce27ed2eed91915a4c25690d82aae5b6a0b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 18:16:25 -0500 Subject: [PATCH 15/16] bcachefs: Fix failure to allocate journal write on discard retry When allocating a journal write fails, then retries after doing discards, we were failing to count already allocated replicas. Signed-off-by: Kent Overstreet --- fs/bcachefs/journal_io.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index e5fce5e497f2..d7dfea5f0181 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1498,6 +1498,15 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) READ_ONCE(c->opts.metadata_replicas_required)); rcu_read_lock(); + + /* We might run more than once if we have to stop and do discards: */ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key)); + bkey_for_each_ptr(ptrs, p) { + struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev); + if (ca) + replicas += ca->mi.durability; + } + retry: devs = target_rw_devs(c, BCH_DATA_journal, target); -- 2.51.0 From 47d6ee766f8033563aff333f326378cd4b36a170 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 4 Dec 2024 19:21:22 -0500 Subject: [PATCH 16/16] bcachefs: dev_alloc_list.devs -> dev_alloc_list.data This lets us use darray macros on dev_alloc_list (and it will become a darray eventually, when we increase the maximum number of devices). Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 60 ++++++++++++++-------------------- fs/bcachefs/alloc_foreground.h | 2 +- fs/bcachefs/journal_io.c | 21 +++++------- 3 files changed, 34 insertions(+), 49 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 095bfe7c53bd..49c9275465f9 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -626,9 +626,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c, unsigned i; for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX) - ret.devs[ret.nr++] = i; + ret.data[ret.nr++] = i; - bubble_sort(ret.devs, ret.nr, dev_stripe_cmp); + bubble_sort(ret.data, ret.nr, dev_stripe_cmp); return ret; } @@ -700,18 +700,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct closure *cl) { struct bch_fs *c = trans->c; - struct dev_alloc_list devs_sorted = - bch2_dev_alloc_list(c, stripe, devs_may_alloc); int ret = -BCH_ERR_insufficient_devices; BUG_ON(*nr_effective >= nr_replicas); - for (unsigned i = 0; i < devs_sorted.nr; i++) { - struct bch_dev_usage usage; - struct open_bucket *ob; - - unsigned dev = devs_sorted.devs[i]; - struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev); + struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); + darray_for_each(devs_sorted, i) { + struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i); if (!ca) continue; @@ -720,8 +715,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, continue; } - ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, - cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); + struct bch_dev_usage usage; + struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, + cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); if (!IS_ERR(ob)) bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_put(ca); @@ -765,10 +761,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, struct closure *cl) { struct bch_fs *c = trans->c; - struct dev_alloc_list devs_sorted; - struct ec_stripe_head *h; - struct open_bucket *ob; - unsigned i, ec_idx; int ret = 0; if (nr_replicas < 2) @@ -777,34 +769,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans, if (ec_open_bucket(c, ptrs)) return 0; - h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); + struct ec_stripe_head *h = + bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl); if (IS_ERR(h)) return PTR_ERR(h); if (!h) return 0; - devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); - - for (i = 0; i < devs_sorted.nr; i++) - for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { + struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc); + darray_for_each(devs_sorted, i) + for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) { if (!h->s->blocks[ec_idx]) continue; - ob = c->open_buckets + h->s->blocks[ec_idx]; - if (ob->dev == devs_sorted.devs[i] && - !test_and_set_bit(ec_idx, h->s->blocks_allocated)) - goto got_bucket; + struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx]; + if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) { + ob->ec_idx = ec_idx; + ob->ec = h->s; + ec_stripe_new_get(h->s, STRIPE_REF_io); + + ret = add_new_bucket(c, ptrs, devs_may_alloc, + nr_replicas, nr_effective, + have_cache, ob); + goto out; + } } - goto out_put_head; -got_bucket: - ob->ec_idx = ec_idx; - ob->ec = h->s; - ec_stripe_new_get(h->s, STRIPE_REF_io); - - ret = add_new_bucket(c, ptrs, devs_may_alloc, - nr_replicas, nr_effective, - have_cache, ob); -out_put_head: +out: bch2_ec_stripe_head_put(c, h); return ret; } diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 4f87745df97e..f25481a0d1a0 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -20,7 +20,7 @@ void bch2_reset_alloc_cursors(struct bch_fs *); struct dev_alloc_list { unsigned nr; - u8 devs[BCH_SB_MEMBERS_MAX]; + u8 data[BCH_SB_MEMBERS_MAX]; }; struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index d7dfea5f0181..9a1647297d11 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1422,25 +1422,22 @@ fsck_err: static void __journal_write_alloc(struct journal *j, struct journal_buf *w, - struct dev_alloc_list *devs_sorted, + struct dev_alloc_list *devs, unsigned sectors, unsigned *replicas, unsigned replicas_want) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct journal_device *ja; - struct bch_dev *ca; - unsigned i; if (*replicas >= replicas_want) return; - for (i = 0; i < devs_sorted->nr; i++) { - ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); + darray_for_each(*devs, i) { + struct bch_dev *ca = rcu_dereference(c->devs[*i]); if (!ca) continue; - ja = &ca->journal; + struct journal_device *ja = &ca->journal; /* * Check that we can use this device, and aren't already using @@ -1486,13 +1483,11 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_devs_mask devs; - struct journal_device *ja; - struct bch_dev *ca; struct dev_alloc_list devs_sorted; unsigned sectors = vstruct_sectors(w->data, c->block_bits); unsigned target = c->opts.metadata_target ?: c->opts.foreground_target; - unsigned i, replicas = 0, replicas_want = + unsigned replicas = 0, replicas_want = READ_ONCE(c->opts.metadata_replicas); unsigned replicas_need = min_t(unsigned, replicas_want, READ_ONCE(c->opts.metadata_replicas_required)); @@ -1517,12 +1512,12 @@ retry: if (replicas >= replicas_want) goto done; - for (i = 0; i < devs_sorted.nr; i++) { - ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); + darray_for_each(devs_sorted, i) { + struct bch_dev *ca = rcu_dereference(c->devs[*i]); if (!ca) continue; - ja = &ca->journal; + struct journal_device *ja = &ca->journal; if (sectors > ja->sectors_free && sectors <= ca->mi.bucket_size && -- 2.51.0