From 3bcde88d381a336ff252d67867c186ee602e6656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 12 Mar 2025 17:21:31 -0400 Subject: [PATCH 01/16] bcachefs: fix tiny leak in bch2_dev_add() Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 6d97d412fed9..0459c875e189 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; up_write(&c->state_lock); - return 0; +out: + printbuf_exit(&label); + printbuf_exit(&errbuf); + bch_err_fn(c, ret); + return ret; err_unlock: mutex_unlock(&c->sb_lock); @@ -1820,10 +1824,7 @@ err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); - printbuf_exit(&label); - printbuf_exit(&errbuf); - bch_err_fn(c, ret); - return ret; + goto out; err_late: up_write(&c->state_lock); ca = NULL; -- 2.51.0 From 69a5a13a22b1def29dce62b5b7c86e6098c20c68 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 09:56:07 -0400 Subject: [PATCH 02/16] bcachefs: target_congested -> get_random_u32_below() get_random_u32_below() has a better algorithm than bch2_rand_range(), it just didn't exist at the time. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 726da68073e2..aa91fcf51eec 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target) } rcu_read_unlock(); - return bch2_rand_range(nr * CONGESTED_MAX) < total; + return get_random_u32_below(nr * CONGESTED_MAX) < total; } #else -- 2.51.0 From 9c18ea7ffee090b47afaa7dc41903fb1b436d7bd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 13 Mar 2025 11:16:28 -0400 Subject: [PATCH 03/16] bcachefs: bch2_get_random_u64_below() steal the (clever) algorithm from get_random_u32_below() this fixes a bug where we were passing roundup_pow_of_two() a 64 bit number - we're squaring device latencies now: [ +1.681698] ------------[ cut here ]------------ [ +0.000010] UBSAN: shift-out-of-bounds in ./include/linux/log2.h:57:13 [ +0.000011] shift exponent 64 is too large for 64-bit type 'long unsigned int' [ +0.000011] CPU: 1 UID: 0 PID: 196 Comm: kworker/u32:13 Not tainted 6.14.0-rc6-dave+ #10 [ +0.000012] Hardware name: ASUS System Product Name/PRIME B460I-PLUS, BIOS 1301 07/13/2021 [ +0.000005] Workqueue: events_unbound __bch2_read_endio [bcachefs] [ +0.000354] Call Trace: [ +0.000005] [ +0.000007] dump_stack_lvl+0x5d/0x80 [ +0.000018] ubsan_epilogue+0x5/0x30 [ +0.000008] __ubsan_handle_shift_out_of_bounds.cold+0x61/0xe6 [ +0.000011] bch2_rand_range.cold+0x17/0x20 [bcachefs] [ +0.000231] bch2_bkey_pick_read_device+0x547/0x920 [bcachefs] [ +0.000229] __bch2_read_extent+0x1e4/0x18e0 [bcachefs] [ +0.000241] ? bch2_btree_iter_peek_slot+0x3df/0x800 [bcachefs] [ +0.000180] ? bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_read_retry_nodecode+0x270/0x330 [bcachefs] [ +0.000230] bch2_rbio_retry+0x1fa/0x600 [bcachefs] [ +0.000224] ? bch2_printbuf_make_room+0x71/0xb0 [bcachefs] [ +0.000243] ? bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000278] bch2_read_csum_err+0x4a4/0x610 [bcachefs] [ +0.000227] ? __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000220] __bch2_read_endio+0x58b/0x870 [bcachefs] [ +0.000268] ? try_to_wake_up+0x31c/0x7f0 [ +0.000011] ? process_one_work+0x176/0x330 [ +0.000008] process_one_work+0x176/0x330 [ +0.000008] worker_thread+0x252/0x390 [ +0.000008] ? __pfx_worker_thread+0x10/0x10 [ +0.000006] kthread+0xec/0x230 [ +0.000011] ? __pfx_kthread+0x10/0x10 [ +0.000009] ret_from_fork+0x31/0x50 [ +0.000009] ? __pfx_kthread+0x10/0x10 [ +0.000008] ret_from_fork_asm+0x1a/0x30 [ +0.000012] [ +0.000046] ---[ end trace ]--- Reported-by: Roland Vet Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 2 +- fs/bcachefs/util.c | 23 ++++++++++++++--------- fs/bcachefs/util.h | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 05d5f71a7ca9..2d8042f853dc 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c, /* Pick at random, biased in favor of the faster device: */ - return bch2_rand_range(l1 + l2) > l1; + return bch2_get_random_u64_below(l1 + l2) > l1; } if (bch2_force_reconstruct_read) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index e0a876cbaa6b..8e3ab4bf79a9 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -653,19 +653,24 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) return 0; } -size_t bch2_rand_range(size_t max) +u64 bch2_get_random_u64_below(u64 ceil) { - size_t rand; + if (ceil <= U32_MAX) + return __get_random_u32_below(ceil); - if (!max) - return 0; + /* this is the same (clever) algorithm as in __get_random_u32_below() */ + u64 rand = get_random_u64(); + u64 mult = ceil * rand; - do { - rand = get_random_long(); - rand &= roundup_pow_of_two(max) - 1; - } while (rand >= max); + if (unlikely(mult < ceil)) { + u64 bound = -ceil % ceil; + while (unlikely(mult < bound)) { + rand = get_random_u64(); + mult = ceil * rand; + } + } - return rand; + return mul_u64_u64_shr(ceil, rand, 64); } void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index e7c3541b38f3..f4a4783219d9 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -401,7 +401,7 @@ do { \ _ret; \ }) -size_t bch2_rand_range(size_t); +u64 bch2_get_random_u64_below(u64); void memcpy_to_bio(struct bio *, struct bvec_iter, const void *); void memcpy_from_bio(void *, struct bio *, struct bvec_iter); -- 2.51.0 From 90fd9ad5b0c981693c8512d9da01f14fb6596e9d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 09:54:43 -0400 Subject: [PATCH 04/16] bcachefs: Change btree wb assert to runtime error We just had a report of the assert for "btree in write buffer for non-write buffer btree" popping during the 6.14 upgrade. - 150TB filesystem, after a reboot the upgrade was able to continue from where it left off, so no major damage. But with 6.14 about to come out we want to get this tracked down asap, and need more data if other users hit this. Convert the BUG_ON() to an emergency read-only, and print out btree, the key itself, and stack trace from the original write buffer update (which did not have this check before). Reported-by: Stijn Tintel Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update.h | 8 ++++++++ fs/bcachefs/btree_write_buffer.c | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 8f22ef9a7651..47d8690f01bf 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); +int bch2_btree_write_buffer_insert_err(struct btree_trans *, + enum btree_id, struct bkey_i *); + static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { + if (unlikely(!btree_type_uses_write_buffer(btree))) { + int ret = bch2_btree_write_buffer_insert_err(trans, btree, k); + dump_stack(); + return ret; + } /* * Most updates skip the btree write buffer until journal replay is * finished because synchronization with journal replay relies on having diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index b56c4987b8c9..2c09d19dd621 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -264,6 +264,22 @@ out: BUG_ON(wb->sorted.size < wb->flushing.keys.nr); } +int bch2_btree_write_buffer_insert_err(struct btree_trans *trans, + enum btree_id btree, struct bkey_i *k) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "attempting to do write buffer update on non wb btree="); + bch2_btree_id_to_text(&buf, btree); + prt_str(&buf, "\n"); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); + + bch2_fs_inconsistent(c, "%s", buf.buf); + printbuf_exit(&buf); + return -EROFS; +} + static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) { struct bch_fs *c = trans->c; @@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - BUG_ON(!btree_type_uses_write_buffer(k->btree)); + if (unlikely(!btree_type_uses_write_buffer(k->btree))) { + ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k); + goto err; + } for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) prefetch(&wb->flushing.keys.data[n->idx]); -- 2.51.0 From 1a2b74d0a2a46c219b25fdb0efcf9cd7f55cfe5e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 14 Mar 2025 18:20:20 -0400 Subject: [PATCH 05/16] bcachefs: fix build on 32 bit in get_random_u64_below() bare 64 bit divides not allowed, whoops arm-linux-gnueabi-ld: drivers/char/random.o: in function `__get_random_u64_below': drivers/char/random.c:602:(.text+0xc70): undefined reference to `__aeabi_uldivmod' Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 8e3ab4bf79a9..da2cd11b3025 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -663,7 +663,8 @@ u64 bch2_get_random_u64_below(u64 ceil) u64 mult = ceil * rand; if (unlikely(mult < ceil)) { - u64 bound = -ceil % ceil; + u64 bound; + div64_u64_rem(-ceil, ceil, &bound); while (unlikely(mult < bound)) { rand = get_random_u64(); mult = ceil * rand; -- 2.51.0 From be212d86b19c0d83bceeb1bec0805fd69cebf95c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 20 Feb 2025 15:03:38 -0500 Subject: [PATCH 06/16] bcachefs: bs > ps support bcachefs removed most PAGE_SIZE references long ago, so this is easy; only readpage_bio_extend() has to be tweaked to respect the minimum order. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 12 +++++++++++- fs/bcachefs/fs.c | 3 ++- fs/bcachefs/super-io.c | 9 --------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index ab1d5db2fa56..d9a360782946 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -110,11 +110,21 @@ static int readpage_bio_extend(struct btree_trans *trans, if (!get_more) break; + unsigned sectors_remaining = sectors_this_extent - bio_sectors(bio); + + if (sectors_remaining < PAGE_SECTORS << mapping_min_folio_order(iter->mapping)) + break; + + unsigned order = ilog2(rounddown_pow_of_two(sectors_remaining) / PAGE_SECTORS); + + /* ensure proper alignment */ + order = min(order, __ffs(folio_offset|BIT(31))); + folio = xa_load(&iter->mapping->i_pages, folio_offset); if (folio && !xa_is_value(folio)) break; - folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), 0); + folio = filemap_alloc_folio(readahead_gfp_mask(iter->mapping), order); if (!folio) break; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 90ade8f648d9..5d910f1c671c 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1802,7 +1802,8 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, break; } - mapping_set_large_folios(inode->v.i_mapping); + mapping_set_folio_min_order(inode->v.i_mapping, + get_order(trans->c->opts.block_size)); } static void bch2_free_inode(struct inode *vinode) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index a81a7b6c0989..5bd7bb90ee48 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -372,7 +372,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct bch_sb *sb = disk_sb->sb; struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; - u16 block_size; int ret; ret = bch2_sb_compatible(sb, out); @@ -391,14 +390,6 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, return -BCH_ERR_invalid_sb_features; } - block_size = le16_to_cpu(sb->block_size); - - if (block_size > PAGE_SECTORS) { - prt_printf(out, "Block size too big (got %u, max %u)", - block_size, PAGE_SECTORS); - return -BCH_ERR_invalid_sb_block_size; - } - if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { prt_printf(out, "Bad user UUID (got zeroes)"); return -BCH_ERR_invalid_sb_uuid; -- 2.51.0 From 2deae558043392e76ec64642e8fd0db3cce987a3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 25 Feb 2025 15:04:58 -0500 Subject: [PATCH 07/16] bcachefs: btree_node_(rewrite|update_key) cleanup Factor out get_iter_to_node() and use it for btree_node_rewrite_get_iter(), to be used for fixing btree node write error behaviour. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 112 ++++++++++++++-------------- fs/bcachefs/btree_update_interior.h | 3 + fs/bcachefs/errcode.h | 1 + 3 files changed, 61 insertions(+), 55 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index e4e7c804625e..05aa9e32adf4 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2126,6 +2126,31 @@ err_free_update: goto out; } +static int get_iter_to_node(struct btree_trans *trans, struct btree_iter *iter, + struct btree *b) +{ + bch2_trans_node_iter_init(trans, iter, b->c.btree_id, b->key.k.p, + BTREE_MAX_DEPTH, b->c.level, + BTREE_ITER_intent); + int ret = bch2_btree_iter_traverse(iter); + if (ret) + goto err; + + /* has node been freed? */ + if (btree_iter_path(trans, iter)->l[b->c.level].b != b) { + /* node has been freed: */ + BUG_ON(!btree_node_dying(b)); + ret = -BCH_ERR_btree_node_dying; + goto err; + } + + BUG_ON(!btree_node_hashed(b)); + return 0; +err: + bch2_trans_iter_exit(trans, iter); + return ret; +} + int bch2_btree_node_rewrite(struct btree_trans *trans, struct btree_iter *iter, struct btree *b, @@ -2191,66 +2216,58 @@ err: goto out; } -struct async_btree_rewrite { - struct bch_fs *c; - struct work_struct work; - struct list_head list; - enum btree_id btree_id; - unsigned level; - struct bkey_buf key; -}; - -static int async_btree_node_rewrite_trans(struct btree_trans *trans, - struct async_btree_rewrite *a) +static int bch2_btree_node_rewrite_key(struct btree_trans *trans, + enum btree_id btree, unsigned level, + struct bkey_i *k, unsigned flags) { struct btree_iter iter; bch2_trans_node_iter_init(trans, &iter, - a->btree_id, a->key.k->k.p, - BTREE_MAX_DEPTH, a->level, 0); + btree, k->k.p, + BTREE_MAX_DEPTH, level, 0); struct btree *b = bch2_btree_iter_peek_node(&iter); int ret = PTR_ERR_OR_ZERO(b); if (ret) goto out; - bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(a->key.k); + bool found = b && btree_ptr_hash_val(&b->key) == btree_ptr_hash_val(k); ret = found - ? bch2_btree_node_rewrite(trans, &iter, b, 0) + ? bch2_btree_node_rewrite(trans, &iter, b, flags) : -ENOENT; +out: + bch2_trans_iter_exit(trans, &iter); + return ret; +} -#if 0 - /* Tracepoint... */ - if (!ret || ret == -ENOENT) { - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; +int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *trans, + struct btree *b, unsigned flags) +{ + struct btree_iter iter; + int ret = get_iter_to_node(trans, &iter, b); + if (ret) + return ret == -BCH_ERR_btree_node_dying ? 0 : ret; - if (!ret) { - prt_printf(&buf, "rewrite node:\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); - } else { - prt_printf(&buf, "node to rewrite not found:\n want: "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(a->key.k)); - prt_printf(&buf, "\n got: "); - if (b) - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); - else - prt_str(&buf, "(null)"); - } - bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); - } -#endif -out: + ret = bch2_btree_node_rewrite(trans, &iter, b, flags); bch2_trans_iter_exit(trans, &iter); return ret; } +struct async_btree_rewrite { + struct bch_fs *c; + struct work_struct work; + struct list_head list; + enum btree_id btree_id; + unsigned level; + struct bkey_buf key; +}; + static void async_btree_node_rewrite_work(struct work_struct *work) { struct async_btree_rewrite *a = container_of(work, struct async_btree_rewrite, work); struct bch_fs *c = a->c; - int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a)); + int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, + a->btree_id, a->level, a->key.k, 0)); if (ret != -ENOENT) bch_err_fn_ratelimited(c, ret); @@ -2494,30 +2511,15 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, unsigned commit_flags, bool skip_triggers) { struct btree_iter iter; - int ret; - - bch2_trans_node_iter_init(trans, &iter, b->c.btree_id, b->key.k.p, - BTREE_MAX_DEPTH, b->c.level, - BTREE_ITER_intent); - ret = bch2_btree_iter_traverse(&iter); + int ret = get_iter_to_node(trans, &iter, b); if (ret) - goto out; - - /* has node been freed? */ - if (btree_iter_path(trans, &iter)->l[b->c.level].b != b) { - /* node has been freed: */ - BUG_ON(!btree_node_dying(b)); - goto out; - } - - BUG_ON(!btree_node_hashed(b)); + return ret == -BCH_ERR_btree_node_dying ? 0 : ret; bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); ret = bch2_btree_node_update_key(trans, &iter, b, new_key, commit_flags, skip_triggers); -out: bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 26d646e1275c..9261a9a341fb 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -169,6 +169,9 @@ static inline int bch2_foreground_maybe_merge(struct btree_trans *trans, int bch2_btree_node_rewrite(struct btree_trans *, struct btree_iter *, struct btree *, unsigned); +int bch2_btree_node_rewrite_key_get_iter(struct btree_trans *, + struct btree *, unsigned); + void bch2_btree_node_rewrite_async(struct bch_fs *, struct btree *); int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *, struct btree *, struct bkey_i *, diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 4590cd0c7c90..712877036612 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -119,6 +119,7 @@ x(ENOENT, ENOENT_dev_idx_not_found) \ x(ENOENT, ENOENT_inode_no_backpointer) \ x(ENOENT, ENOENT_no_snapshot_tree_subvol) \ + x(ENOENT, btree_node_dying) \ x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \ x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \ x(EEXIST, EEXIST_str_hash_set) \ -- 2.51.0 From c3c9957c818f1cb2de2865f223fad80afa28ffad Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Feb 2025 13:33:08 -0500 Subject: [PATCH 08/16] bcachefs: check_bp_exists() check for backpointers for stale pointers Early version of 'bcachefs_metadata_version_cached_backpointers' was creating backpointers for stale cached pointers - whoops. Now we have to repair those. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index ebeb6a5ff9d2..1d30066e63dc 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -514,6 +514,22 @@ check_existing_bp: if (!other_extent.k) goto missing; + rcu_read_lock(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode); + if (ca) { + struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent); + bkey_for_each_ptr(other_extent_ptrs, ptr) + if (ptr->dev == bp->k.p.inode && + dev_ptr_stale_rcu(ca, ptr)) { + ret = drop_dev_and_update(trans, other_bp.v->btree_id, + other_extent, bp->k.p.inode); + if (ret) + goto err; + goto out; + } + } + rcu_read_unlock(); + if (bch2_extents_match(orig_k, other_extent)) { printbuf_reset(&buf); prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); -- 2.51.0 From e5a63ad343cc19c64875f2496ce5f7b992ef0c32 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 29 Jan 2025 15:51:37 -0500 Subject: [PATCH 09/16] bcachefs: Fix missing increment of move_extent_write counter Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 642fbc60ecab..e2050256136e 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -436,6 +436,8 @@ void bch2_data_update_read_done(struct data_update *m, m->op.crc = crc; m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; + this_cpu_add(m->op.c->counters[BCH_COUNTER_move_extent_write], m->k.k->k.size); + closure_call(&m->op.cl, bch2_write, NULL, NULL); } -- 2.51.0 From 55a132c37acded02391664c108fcea5006c72fd5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Jan 2025 03:41:31 -0500 Subject: [PATCH 10/16] bcachefs: Don't inc io_(read|write) counters for moves This makes 'bcachefs fs top' more useful; we can now see at a glance whether the IO to the device is being done for user reads/writes, or copygc/rebalance. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 3 ++- fs/bcachefs/io_write.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index aa91fcf51eec..b2b0280d8365 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1091,7 +1091,8 @@ get_bio: if (rbio->bounce) trace_and_count(c, read_bounce, &rbio->bio); - this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); + if (!(flags & BCH_READ_NODECODE)) + this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); /* diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 03892388832b..970f3f0959a4 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1644,7 +1644,8 @@ CLOSURE_CALLBACK(bch2_write) goto err; } - this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); + if (!(op->flags & BCH_WRITE_MOVE)) + this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); bch2_increment_clock(c, bio_sectors(bio), WRITE); data_len = min_t(u64, bio->bi_iter.bi_size, -- 2.51.0 From 78c9c6f6cd25c2aabc29aa8129996031fdfeb31b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 22 Jan 2025 12:07:54 -0500 Subject: [PATCH 11/16] bcachefs: Move write_points to debugfs this was hitting the sysfs 4k limit Signed-off-by: Kent Overstreet --- fs/bcachefs/debug.c | 32 +++++++++++++++++++++++++++++--- fs/bcachefs/sysfs.c | 5 ----- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 55333e82d1fe..03a3b62d19a9 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -7,6 +7,7 @@ */ #include "bcachefs.h" +#include "alloc_foreground.h" #include "bkey_methods.h" #include "btree_cache.h" #include "btree_io.h" @@ -844,8 +845,11 @@ restart: seqmutex_unlock(&c->btree_trans_lock); } -static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) +typedef void (*fs_to_text_fn)(struct printbuf *, struct bch_fs *); + +static ssize_t bch2_simple_print(struct file *file, char __user *buf, + size_t size, loff_t *ppos, + fs_to_text_fn fn) { struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; @@ -856,7 +860,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, i->ret = 0; if (!i->iter) { - btree_deadlock_to_text(&i->buf, c); + fn(&i->buf, c); i->iter++; } @@ -869,6 +873,12 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, return ret ?: i->ret; } +static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + return bch2_simple_print(file, buf, size, ppos, btree_deadlock_to_text); +} + static const struct file_operations btree_deadlock_ops = { .owner = THIS_MODULE, .open = bch2_dump_open, @@ -876,6 +886,19 @@ static const struct file_operations btree_deadlock_ops = { .read = bch2_btree_deadlock_read, }; +static ssize_t bch2_write_points_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + return bch2_simple_print(file, buf, size, ppos, bch2_write_points_to_text); +} + +static const struct file_operations write_points_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_write_points_read, +}; + void bch2_fs_debug_exit(struct bch_fs *c) { if (!IS_ERR_OR_NULL(c->fs_debug_dir)) @@ -927,6 +950,9 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("btree_deadlock", 0400, c->fs_debug_dir, c->btree_debug, &btree_deadlock_ops); + debugfs_create_file("write_points", 0400, c->fs_debug_dir, + c->btree_debug, &write_points_ops); + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index a7eb1f511484..b3f2c651c1f8 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -176,7 +176,6 @@ read_attribute(btree_reserve_cache); read_attribute(stripes_heap); read_attribute(open_buckets); read_attribute(open_buckets_partial); -read_attribute(write_points); read_attribute(nocow_lock_table); #ifdef BCH_WRITE_REF_DEBUG @@ -364,9 +363,6 @@ SHOW(bch2_fs) if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); - if (attr == &sysfs_write_points) - bch2_write_points_to_text(out, c); - if (attr == &sysfs_compression_stats) bch2_compression_stats_to_text(out, c); @@ -569,7 +565,6 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_stripes_heap, &sysfs_open_buckets, &sysfs_open_buckets_partial, - &sysfs_write_points, #ifdef BCH_WRITE_REF_DEBUG &sysfs_write_refs, #endif -- 2.51.0 From 999cc1bb6888c484fbd5dd9c8397c200772bd243 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 22 Jan 2025 14:38:59 -0500 Subject: [PATCH 12/16] bcachefs: Separate running/runnable in wp stats We've got per-writepoint statistics to see how well the writepoint index update threads are pipelining; this separates running vs. runnable so we can see at a glance if they're blocking. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_types.h | 2 ++ fs/bcachefs/io_write.c | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index 4aa8ee026cb8..8f79f46c2a78 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -90,6 +90,7 @@ struct dev_stripe_state { x(stopped) \ x(waiting_io) \ x(waiting_work) \ + x(runnable) \ x(running) enum write_point_state { @@ -125,6 +126,7 @@ struct write_point { enum write_point_state state; u64 last_state_change; u64 time[WRITE_POINT_STATE_NR]; + u64 last_runtime; } __aligned(SMP_CACHE_BYTES); }; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 970f3f0959a4..a903f39caa3e 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -587,7 +587,15 @@ err: static inline void __wp_update_state(struct write_point *wp, enum write_point_state state) { if (state != wp->state) { + struct task_struct *p = current; u64 now = ktime_get_ns(); + u64 runtime = p->se.sum_exec_runtime + + (now - p->se.exec_start); + + if (state == WRITE_POINT_runnable) + wp->last_runtime = runtime; + else if (wp->state == WRITE_POINT_runnable) + wp->time[WRITE_POINT_running] += runtime - wp->last_runtime; if (wp->last_state_change && time_after64(now, wp->last_state_change)) @@ -601,7 +609,7 @@ static inline void wp_update_state(struct write_point *wp, bool running) { enum write_point_state state; - state = running ? WRITE_POINT_running : + state = running ? WRITE_POINT_runnable: !list_empty(&wp->writes) ? WRITE_POINT_waiting_io : WRITE_POINT_stopped; -- 2.51.0 From bbd804f2ad3034ac1053b9188e6bb3d705570e25 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Jan 2025 03:28:27 -0500 Subject: [PATCH 13/16] bcachefs: enum bch_persistent_counters_stable Persistent counters, like recovery passes, include a stable enum in their definition - but this was never correctly plumbed. This allows us to add new counters and properly organize them with a non-stable "presentation order", which can also be used in userspace by the new 'bcachefs fs top' tool. Fortunatel, since we haven't yet added any new counters where presentation order ID doesn't match stable ID, this won't cause any reordering issues. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-counters.c | 51 ++++++++++++++++++-------------- fs/bcachefs/sb-counters_format.h | 7 +++++ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/fs/bcachefs/sb-counters.c b/fs/bcachefs/sb-counters.c index 6992e7469112..5153a47ec7d4 100644 --- a/fs/bcachefs/sb-counters.c +++ b/fs/bcachefs/sb-counters.c @@ -5,11 +5,10 @@ /* BCH_SB_FIELD_counters */ -static const char * const bch2_counter_names[] = { -#define x(t, n, ...) (#t), +static const u8 counters_to_stable_map[] = { +#define x(n, id, ...) [BCH_COUNTER_##n] = BCH_COUNTER_STABLE_##n, BCH_PERSISTENT_COUNTERS() #undef x - NULL }; static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs) @@ -18,13 +17,13 @@ static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs) return 0; return (__le64 *) vstruct_end(&ctrs->field) - &ctrs->d[0]; -}; +} static int bch2_sb_counters_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { return 0; -}; +} static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field *f) @@ -32,50 +31,56 @@ static void bch2_sb_counters_to_text(struct printbuf *out, struct bch_sb *sb, struct bch_sb_field_counters *ctrs = field_to_type(f, counters); unsigned int nr = bch2_sb_counter_nr_entries(ctrs); - for (unsigned i = 0; i < nr; i++) - prt_printf(out, "%s \t%llu\n", - i < BCH_COUNTER_NR ? bch2_counter_names[i] : "(unknown)", - le64_to_cpu(ctrs->d[i])); -}; + for (unsigned i = 0; i < BCH_COUNTER_NR; i++) { + unsigned stable = counters_to_stable_map[i]; + if (stable < nr) + prt_printf(out, "%s \t%llu\n", + bch2_counter_names[i], + le64_to_cpu(ctrs->d[stable])); + } +} int bch2_sb_counters_to_cpu(struct bch_fs *c) { struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); - unsigned int i; unsigned int nr = bch2_sb_counter_nr_entries(ctrs); - u64 val = 0; - for (i = 0; i < BCH_COUNTER_NR; i++) + for (unsigned i = 0; i < BCH_COUNTER_NR; i++) c->counters_on_mount[i] = 0; - for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) { - val = le64_to_cpu(ctrs->d[i]); - percpu_u64_set(&c->counters[i], val); - c->counters_on_mount[i] = val; + for (unsigned i = 0; i < BCH_COUNTER_NR; i++) { + unsigned stable = counters_to_stable_map[i]; + if (stable < nr) { + u64 v = le64_to_cpu(ctrs->d[stable]); + percpu_u64_set(&c->counters[i], v); + c->counters_on_mount[i] = v; + } } + return 0; -}; +} int bch2_sb_counters_from_cpu(struct bch_fs *c) { struct bch_sb_field_counters *ctrs = bch2_sb_field_get(c->disk_sb.sb, counters); struct bch_sb_field_counters *ret; - unsigned int i; unsigned int nr = bch2_sb_counter_nr_entries(ctrs); if (nr < BCH_COUNTER_NR) { ret = bch2_sb_field_resize(&c->disk_sb, counters, - sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR); - + sizeof(*ctrs) / sizeof(u64) + BCH_COUNTER_NR); if (ret) { ctrs = ret; nr = bch2_sb_counter_nr_entries(ctrs); } } + for (unsigned i = 0; i < BCH_COUNTER_NR; i++) { + unsigned stable = counters_to_stable_map[i]; + if (stable < nr) + ctrs->d[stable] = cpu_to_le64(percpu_u64_get(&c->counters[i])); + } - for (i = 0; i < min_t(unsigned int, nr, BCH_COUNTER_NR); i++) - ctrs->d[i] = cpu_to_le64(percpu_u64_get(&c->counters[i])); return 0; } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index fdcf598f08b1..cb44d9ee1ac5 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -95,6 +95,13 @@ enum bch_persistent_counters { BCH_COUNTER_NR }; +enum bch_persistent_counters_stable { +#define x(t, n, ...) BCH_COUNTER_STABLE_##t = n, + BCH_PERSISTENT_COUNTERS() +#undef x + BCH_COUNTER_STABLE_NR +}; + struct bch_sb_field_counters { struct bch_sb_field field; __le64 d[]; -- 2.51.0 From 5ee760f667e09b95a19beea265077eb2f69cd12b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Jan 2025 03:33:16 -0500 Subject: [PATCH 14/16] bcachefs: BCH_COUNTER_bucket_discard_fast Add a separate counter for fastpath bucket discards, which don't require a journal flush. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 5 ++++- fs/bcachefs/sb-counters_format.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 3ea809990ef1..43c29b0d2d20 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1897,7 +1897,10 @@ commit: if (ret) goto out; - count_event(c, bucket_discard); + if (!fastpath) + count_event(c, bucket_discard); + else + count_event(c, bucket_discard_fast); out: fsck_err: if (discard_locked) diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index cb44d9ee1ac5..d0391c5d4c48 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -13,6 +13,7 @@ enum counters_flags { x(io_move, 2, TYPE_SECTORS) \ x(bucket_invalidate, 3, TYPE_COUNTER) \ x(bucket_discard, 4, TYPE_COUNTER) \ + x(bucket_discard_fast, 79, TYPE_COUNTER) \ x(bucket_alloc, 5, TYPE_COUNTER) \ x(bucket_alloc_fail, 6, TYPE_COUNTER) \ x(btree_cache_scan, 7, TYPE_COUNTER) \ -- 2.51.0 From 50ca857457e0a983bc6f881fcb1c47f8322a2c48 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 26 Jan 2025 22:05:02 -0500 Subject: [PATCH 15/16] bcachefs: BCH_IOCTL_QUERY_COUNTERS Add an ioctl for querying counters, the same ones provided in /sys/fs/bcachefs//counters/, but more suitable for a 'bcachefs top' command. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 10 +++++++++ fs/bcachefs/chardev.c | 3 +++ fs/bcachefs/sb-counters.c | 43 ++++++++++++++++++++++++++++++++++++ fs/bcachefs/sb-counters.h | 4 ++++ 4 files changed, 60 insertions(+) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 3c23bdf788ce..f1b746fac007 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -87,6 +87,7 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_FSCK_OFFLINE _IOW(0xbc, 19, struct bch_ioctl_fsck_offline) #define BCH_IOCTL_FSCK_ONLINE _IOW(0xbc, 20, struct bch_ioctl_fsck_online) #define BCH_IOCTL_QUERY_ACCOUNTING _IOW(0xbc, 21, struct bch_ioctl_query_accounting) +#define BCH_IOCTL_QUERY_COUNTERS _IOW(0xbc, 21, struct bch_ioctl_query_counters) /* ioctl below act on a particular file, not the filesystem as a whole: */ @@ -443,4 +444,13 @@ struct bch_ioctl_query_accounting { struct bkey_i_accounting accounting[]; }; +#define BCH_IOCTL_QUERY_COUNTERS_MOUNT (1 << 0) + +struct bch_ioctl_query_counters { + __u16 nr; + __u16 flags; + __u32 pad; + __u64 d[]; +}; + #endif /* _BCACHEFS_IOCTL_H */ diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 46e9e32105a9..bab49d5ee598 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -11,6 +11,7 @@ #include "move.h" #include "recovery_passes.h" #include "replicas.h" +#include "sb-counters.h" #include "super-io.h" #include "thread_with_file.h" @@ -710,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online); case BCH_IOCTL_QUERY_ACCOUNTING: return bch2_ioctl_query_accounting(c, arg); + case BCH_IOCTL_QUERY_COUNTERS: + return bch2_ioctl_query_counters(c, arg); default: return -ENOTTY; } diff --git a/fs/bcachefs/sb-counters.c b/fs/bcachefs/sb-counters.c index 5153a47ec7d4..2b4b8445d418 100644 --- a/fs/bcachefs/sb-counters.c +++ b/fs/bcachefs/sb-counters.c @@ -11,6 +11,13 @@ static const u8 counters_to_stable_map[] = { #undef x }; +const char * const bch2_counter_names[] = { +#define x(t, n, ...) (#t), + BCH_PERSISTENT_COUNTERS() +#undef x + NULL +}; + static size_t bch2_sb_counter_nr_entries(struct bch_sb_field_counters *ctrs) { if (!ctrs) @@ -102,3 +109,39 @@ const struct bch_sb_field_ops bch_sb_field_ops_counters = { .validate = bch2_sb_counters_validate, .to_text = bch2_sb_counters_to_text, }; + +#ifndef NO_BCACHEFS_CHARDEV +long bch2_ioctl_query_counters(struct bch_fs *c, + struct bch_ioctl_query_counters __user *user_arg) +{ + struct bch_ioctl_query_counters arg; + int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)); + if (ret) + return ret; + + if ((arg.flags & ~BCH_IOCTL_QUERY_COUNTERS_MOUNT) || + arg.pad) + return -EINVAL; + + arg.nr = min(arg.nr, BCH_COUNTER_NR); + ret = put_user(arg.nr, &user_arg->nr); + if (ret) + return ret; + + for (unsigned i = 0; i < BCH_COUNTER_NR; i++) { + unsigned stable = counters_to_stable_map[i]; + + if (stable < arg.nr) { + u64 v = !(arg.flags & BCH_IOCTL_QUERY_COUNTERS_MOUNT) + ? percpu_u64_get(&c->counters[i]) + : c->counters_on_mount[i]; + + ret = put_user(v, &user_arg->d[stable]); + if (ret) + return ret; + } + } + + return 0; +} +#endif diff --git a/fs/bcachefs/sb-counters.h b/fs/bcachefs/sb-counters.h index 81f8aec9fcb1..a4329ad8dd1b 100644 --- a/fs/bcachefs/sb-counters.h +++ b/fs/bcachefs/sb-counters.h @@ -11,6 +11,10 @@ int bch2_sb_counters_from_cpu(struct bch_fs *); void bch2_fs_counters_exit(struct bch_fs *); int bch2_fs_counters_init(struct bch_fs *); +extern const char * const bch2_counter_names[]; extern const struct bch_sb_field_ops bch_sb_field_ops_counters; +long bch2_ioctl_query_counters(struct bch_fs *, + struct bch_ioctl_query_counters __user *); + #endif // _BCACHEFS_SB_COUNTERS_H -- 2.51.0 From 3075e68d268844b50b7a8a078510d1c960a1325f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 31 Dec 2024 18:16:17 -0500 Subject: [PATCH 16/16] bcachefs: bch2_data_update_inflight_to_text() Add a new helper for bch2_moving_ctxt_to_text(), which may be used to debug if moving_ios are getting stuck. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 17 ++++++++++++++++- fs/bcachefs/data_update.h | 2 ++ fs/bcachefs/move.c | 5 ++--- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index e2050256136e..1cfb86823fdf 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -430,6 +430,8 @@ int bch2_data_update_index_update(struct bch_write_op *op) void bch2_data_update_read_done(struct data_update *m, struct bch_extent_crc_unpacked crc) { + m->read_done = true; + /* write bio must own pages: */ BUG_ON(!m->op.wbio.bio.bi_vcnt); @@ -541,7 +543,8 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - printbuf_tabstop_push(out, 20); + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); prt_str_indented(out, "rewrite ptrs:\t"); bch2_prt_u64_base2(out, data_opts->rewrite_ptrs); @@ -565,6 +568,7 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, prt_str_indented(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); + prt_newline(out); } void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) @@ -576,6 +580,17 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); } +void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update *m) +{ + bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k)); + prt_newline(out); + printbuf_indent_add(out, 2); + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); + prt_printf(out, "read_done:\t\%u\n", m->read_done); + bch2_write_op_to_text(out, &m->op); + printbuf_indent_sub(out, 2); +} + int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index e4b50723428e..7a200e6b770b 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -22,6 +22,7 @@ void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, struct data_update { /* extent being updated: */ + bool read_done; enum btree_id btree_id; struct bkey_buf k; struct data_update_opts data_opts; @@ -31,6 +32,7 @@ struct data_update { }; void bch2_data_update_to_text(struct printbuf *, struct data_update *); +void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *); int bch2_data_update_index_update(struct bch_write_op *); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 160b4374160a..2e09f980f3f5 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -88,13 +88,12 @@ static void move_free(struct moving_io *io) if (io->b) atomic_dec(&io->b->count); - bch2_data_update_exit(&io->write); - mutex_lock(&ctxt->lock); list_del(&io->io_list); wake_up(&ctxt->wait); mutex_unlock(&ctxt->lock); + bch2_data_update_exit(&io->write); kfree(io); } @@ -1216,7 +1215,7 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str mutex_lock(&ctxt->lock); list_for_each_entry(io, &ctxt->ios, io_list) - bch2_write_op_to_text(out, &io->write.op); + bch2_data_update_inflight_to_text(out, &io->write); mutex_unlock(&ctxt->lock); printbuf_indent_sub(out, 4); -- 2.51.0