From d02755b8c5f38e737037e0ff0820eb66ab63c4f5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Apr 2025 14:29:31 -0400 Subject: [PATCH] bcachefs: trace bch2_trans_kmalloc() We're occasionally seeing the WARN_ON() for bump allocator usage exceeding BTREE_TRANS_MEM_MAX; add some tracing so we can see what's going on. Signed-off-by: Kent Overstreet --- fs/bcachefs/Kconfig | 4 +++ fs/bcachefs/bcachefs.h | 3 ++ fs/bcachefs/btree_iter.c | 51 ++++++++++++++++++++++++++++++++-- fs/bcachefs/btree_iter.h | 56 +++++++++++++++++++++++++++++--------- fs/bcachefs/btree_types.h | 9 ++++++ fs/bcachefs/btree_update.h | 4 +-- fs/bcachefs/debug.c | 6 ++++ 7 files changed, 115 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 07709b0d7688..a14e4a60b187 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -103,6 +103,10 @@ config BCACHEFS_PATH_TRACEPOINTS Enable extra tracepoints for debugging btree_path operations; we don't normally want these enabled because they happen at very high rates. +config BCACHEFS_TRANS_KMALLOC_TRACE + bool "Trace bch2_trans_kmalloc() calls" + depends on BCACHEFS_FS + config MEAN_AND_VARIANCE_UNIT_TEST tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 75f7408da173..24eed2b3be4d 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -650,6 +650,9 @@ struct btree_transaction_stats { unsigned nr_max_paths; unsigned journal_entries_size; unsigned max_mem; +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_trans_kmalloc_trace trans_kmalloc_trace; +#endif char *max_paths_text; }; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index ac5f2046550d..cfd6363dfc39 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -3095,7 +3095,19 @@ void bch2_trans_copy_iter(struct btree_trans *trans, dst->key_cache_path = 0; } -void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE +void bch2_trans_kmalloc_trace_to_text(struct printbuf *out, + darray_trans_kmalloc_trace *trace) +{ + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 60); + + darray_for_each(*trace, i) + prt_printf(out, "%pS\t%zu\n", (void *) i->ip, i->bytes); +} +#endif + +void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long ip) { struct bch_fs *c = trans->c; unsigned new_top = trans->mem_top + size; @@ -3105,14 +3117,35 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) void *new_mem; void *p; - WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + struct printbuf buf = PRINTBUF; + bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); +#endif + } ret = trans_maybe_inject_restart(trans, _RET_IP_); if (ret) return ERR_PTR(ret); struct btree_transaction_stats *s = btree_trans_stats(trans); - s->max_mem = max(s->max_mem, new_bytes); + if (new_bytes > s->max_mem) { + mutex_lock(&s->lock); +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); + s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, + trans->trans_kmalloc_trace.nr); + + memcpy(s->trans_kmalloc_trace.data, + trans->trans_kmalloc_trace.data, + sizeof(s->trans_kmalloc_trace.data[0]) * + s->trans_kmalloc_trace.nr); +#endif + s->max_mem = new_bytes; + mutex_unlock(&s->lock); + } if (trans->used_mempool) { if (trans->mem_bytes >= new_bytes) @@ -3172,6 +3205,8 @@ out_new_mem: BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } out_change_top: + bch2_trans_kmalloc_trace(trans, size, ip); + p = trans->mem + trans->mem_top; trans->mem_top += size; memset(p, 0, size); @@ -3285,6 +3320,10 @@ u32 bch2_trans_begin(struct btree_trans *trans) } #endif +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + trans->trans_kmalloc_trace.nr = 0; +#endif + trans_set_locked(trans, false); if (trans->restarted) { @@ -3454,6 +3493,9 @@ void bch2_trans_put(struct btree_trans *trans) #ifdef CONFIG_BCACHEFS_DEBUG darray_exit(&trans->last_restarted_trace); #endif +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_exit(&trans->trans_kmalloc_trace); +#endif unsigned long *paths_allocated = trans->paths_allocated; trans->paths_allocated = NULL; @@ -3608,6 +3650,9 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) for (s = c->btree_transaction_stats; s < c->btree_transaction_stats + ARRAY_SIZE(c->btree_transaction_stats); s++) { +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_exit(&s->trans_kmalloc_trace); +#endif kfree(s->max_paths_text); bch2_time_stats_exit(&s->lock_hold_times); } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 9d2cccf5d21a..78a805a89860 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -543,43 +543,73 @@ void bch2_trans_copy_iter(struct btree_trans *, struct btree_iter *, struct btre void bch2_set_btree_iter_dontneed(struct btree_trans *, struct btree_iter *); -void *__bch2_trans_kmalloc(struct btree_trans *, size_t); +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE +void bch2_trans_kmalloc_trace_to_text(struct printbuf *, + darray_trans_kmalloc_trace *); +#endif -/** - * bch2_trans_kmalloc - allocate memory for use by the current transaction - * - * Must be called after bch2_trans_begin, which on second and further calls - * frees all memory allocated in this transaction - */ -static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +void *__bch2_trans_kmalloc(struct btree_trans *, size_t, unsigned long); + +static inline void bch2_trans_kmalloc_trace(struct btree_trans *trans, size_t size, + unsigned long ip) +{ +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_push(&trans->trans_kmalloc_trace, + ((struct trans_kmalloc_trace) { .ip = ip, .bytes = size })); +#endif +} + +static __always_inline void *bch2_trans_kmalloc_nomemzero_ip(struct btree_trans *trans, size_t size, + unsigned long ip) { size = roundup(size, 8); + bch2_trans_kmalloc_trace(trans, size, ip); + if (likely(trans->mem_top + size <= trans->mem_bytes)) { void *p = trans->mem + trans->mem_top; trans->mem_top += size; - memset(p, 0, size); return p; } else { - return __bch2_trans_kmalloc(trans, size); + return __bch2_trans_kmalloc(trans, size, ip); } } -static inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size) +static __always_inline void *bch2_trans_kmalloc_ip(struct btree_trans *trans, size_t size, + unsigned long ip) { - size = round_up(size, 8); + size = roundup(size, 8); + + bch2_trans_kmalloc_trace(trans, size, ip); if (likely(trans->mem_top + size <= trans->mem_bytes)) { void *p = trans->mem + trans->mem_top; trans->mem_top += size; + memset(p, 0, size); return p; } else { - return __bch2_trans_kmalloc(trans, size); + return __bch2_trans_kmalloc(trans, size, ip); } } +/** + * bch2_trans_kmalloc - allocate memory for use by the current transaction + * + * Must be called after bch2_trans_begin, which on second and further calls + * frees all memory allocated in this transaction + */ +static __always_inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size) +{ + return bch2_trans_kmalloc_ip(trans, size, _THIS_IP_); +} + +static __always_inline void *bch2_trans_kmalloc_nomemzero(struct btree_trans *trans, size_t size) +{ + return bch2_trans_kmalloc_nomemzero_ip(trans, size, _THIS_IP_); +} + static inline struct bkey_s_c __bch2_bkey_get_iter(struct btree_trans *trans, struct btree_iter *iter, unsigned btree_id, struct bpos pos, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 023c472dc9ee..81175c1344d2 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -477,6 +477,12 @@ struct btree_trans_paths { struct btree_path paths[]; }; +struct trans_kmalloc_trace { + unsigned long ip; + size_t bytes; +}; +typedef DARRAY(struct trans_kmalloc_trace) darray_trans_kmalloc_trace; + struct btree_trans { struct bch_fs *c; @@ -488,6 +494,9 @@ struct btree_trans { void *mem; unsigned mem_top; unsigned mem_bytes; +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + darray_trans_kmalloc_trace trans_kmalloc_trace; +#endif btree_path_idx_t nr_sorted; btree_path_idx_t nr_paths; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 568e56c91190..e674419c299e 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -222,7 +222,7 @@ static inline void bch2_trans_reset_updates(struct btree_trans *trans) trans->extra_disk_res = 0; } -static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k, +static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k, unsigned type, unsigned min_bytes) { unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k)); @@ -245,7 +245,7 @@ static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *t return mut; } -static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k) +static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k) { return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 5a8bc7013512..2c52a2c6502b 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -770,6 +770,12 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, mutex_lock(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); +#ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE + printbuf_indent_add(&i->buf, 2); + bch2_trans_kmalloc_trace_to_text(&i->buf, &s->trans_kmalloc_trace); + printbuf_indent_sub(&i->buf, 2); +#endif + prt_printf(&i->buf, "Transaction duration:\n"); printbuf_indent_add(&i->buf, 2); -- 2.50.1