From c73e680d1f84059e1b1ea82a537f6ccc1c563eb4 Mon Sep 17 00:00:00 2001 From: Sungjong Seo Date: Thu, 27 Mar 2025 00:01:16 +0900 Subject: [PATCH 01/16] exfat: call bh_read in get_block only when necessary With commit 11a347fb6cef ("exfat: change to get file size from DataLength"), exfat_get_block() can now handle valid_size. However, most partial unwritten blocks that could be mapped with other blocks are being inefficiently processed separately as individual blocks. Except for partial unwritten blocks that require independent processing, let's handle them simply as before. Signed-off-by: Sungjong Seo Reviewed-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/inode.c | 159 +++++++++++++++++++++++------------------------ 1 file changed, 77 insertions(+), 82 deletions(-) diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index a23677de4544..b22c02d6000f 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -274,9 +274,11 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, sector_t last_block; sector_t phys = 0; sector_t valid_blks; + loff_t i_size; mutex_lock(&sbi->s_lock); - last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size_read(inode), sb); + i_size = i_size_read(inode); + last_block = EXFAT_B_TO_BLK_ROUND_UP(i_size, sb); if (iblock >= last_block && !create) goto done; @@ -305,102 +307,95 @@ static int exfat_get_block(struct inode *inode, sector_t iblock, if (buffer_delay(bh_result)) clear_buffer_delay(bh_result); - if (create) { + /* + * In most cases, we just need to set bh_result to mapped, unmapped + * or new status as follows: + * 1. i_size == valid_size + * 2. write case (create == 1) + * 3. direct_read (!bh_result->b_folio) + * -> the unwritten part will be zeroed in exfat_direct_IO() + * + * Otherwise, in the case of buffered read, it is necessary to take + * care the last nested block if valid_size is not equal to i_size. + */ + if (i_size == ei->valid_size || create || !bh_result->b_folio) valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb); + else + valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb); - if (iblock + max_blocks < valid_blks) { - /* The range has been written, map it */ - goto done; - } else if (iblock < valid_blks) { - /* - * The range has been partially written, - * map the written part. - */ - max_blocks = valid_blks - iblock; - goto done; - } + /* The range has been fully written, map it */ + if (iblock + max_blocks < valid_blks) + goto done; - /* The area has not been written, map and mark as new. */ - set_buffer_new(bh_result); + /* The range has been partially written, map the written part */ + if (iblock < valid_blks) { + max_blocks = valid_blks - iblock; + goto done; + } + /* The area has not been written, map and mark as new for create case */ + if (create) { + set_buffer_new(bh_result); ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb); mark_inode_dirty(inode); - } else { - valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb); + goto done; + } - if (iblock + max_blocks < valid_blks) { - /* The range has been written, map it */ - goto done; - } else if (iblock < valid_blks) { - /* - * The area has been partially written, - * map the written part. - */ - max_blocks = valid_blks - iblock; + /* + * The area has just one block partially written. + * In that case, we should read and fill the unwritten part of + * a block with zero. + */ + if (bh_result->b_folio && iblock == valid_blks && + (ei->valid_size & (sb->s_blocksize - 1))) { + loff_t size, pos; + void *addr; + + max_blocks = 1; + + /* + * No buffer_head is allocated. + * (1) bmap: It's enough to set blocknr without I/O. + * (2) read: The unwritten part should be filled with zero. + * If a folio does not have any buffers, + * let's returns -EAGAIN to fallback to + * block_read_full_folio() for per-bh IO. + */ + if (!folio_buffers(bh_result->b_folio)) { + err = -EAGAIN; goto done; - } else if (iblock == valid_blks && - (ei->valid_size & (sb->s_blocksize - 1))) { - /* - * The block has been partially written, - * zero the unwritten part and map the block. - */ - loff_t size, pos; - void *addr; - - max_blocks = 1; - - /* - * For direct read, the unwritten part will be zeroed in - * exfat_direct_IO() - */ - if (!bh_result->b_folio) - goto done; - - /* - * No buffer_head is allocated. - * (1) bmap: It's enough to fill bh_result without I/O. - * (2) read: The unwritten part should be filled with 0 - * If a folio does not have any buffers, - * let's returns -EAGAIN to fallback to - * per-bh IO like block_read_full_folio(). - */ - if (!folio_buffers(bh_result->b_folio)) { - err = -EAGAIN; - goto done; - } + } - pos = EXFAT_BLK_TO_B(iblock, sb); - size = ei->valid_size - pos; - addr = folio_address(bh_result->b_folio) + - offset_in_folio(bh_result->b_folio, pos); + pos = EXFAT_BLK_TO_B(iblock, sb); + size = ei->valid_size - pos; + addr = folio_address(bh_result->b_folio) + + offset_in_folio(bh_result->b_folio, pos); - /* Check if bh->b_data points to proper addr in folio */ - if (bh_result->b_data != addr) { - exfat_fs_error_ratelimit(sb, + /* Check if bh->b_data points to proper addr in folio */ + if (bh_result->b_data != addr) { + exfat_fs_error_ratelimit(sb, "b_data(%p) != folio_addr(%p)", bh_result->b_data, addr); - err = -EINVAL; - goto done; - } - - /* Read a block */ - err = bh_read(bh_result, 0); - if (err < 0) - goto done; + err = -EINVAL; + goto done; + } - /* Zero unwritten part of a block */ - memset(bh_result->b_data + size, 0, - bh_result->b_size - size); + /* Read a block */ + err = bh_read(bh_result, 0); + if (err < 0) + goto done; - err = 0; - } else { - /* - * The range has not been written, clear the mapped flag - * to only zero the cache and do not read from disk. - */ - clear_buffer_mapped(bh_result); - } + /* Zero unwritten part of a block */ + memset(bh_result->b_data + size, 0, bh_result->b_size - size); + err = 0; + goto done; } + + /* + * The area has not been written, clear mapped for read/bmap cases. + * If so, it will be filled with zero without reading from disk. + */ + clear_buffer_mapped(bh_result); done: bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb); if (err < 0) -- 2.51.0 From 7337f9f14e0e2dbd2da50ade0cd7e58df6c7af6d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 28 Mar 2025 12:15:32 -0400 Subject: [PATCH 02/16] bcachefs: bch2_count_fsck_err() Factor out a helper from __bch2_fsck_err(), for counting the error in the superblock and deciding whether to print or ratelimit - will be used to replace some log_fsck_err() calls, where we want to lift out printing the error message. Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 96 ++++++++++++++++++++++++++++----------------- fs/bcachefs/error.h | 8 +++- fs/bcachefs/fsck.c | 2 + 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index d5f00aa252f6..db0f0fb547e5 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -34,6 +34,7 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) journal_cur_seq(&c->journal)); return true; case BCH_ON_ERROR_panic: + bch2_print_string_as_lines(KERN_ERR, out->buf); panic(bch2_fmt(c, "panic after error")); return true; default: @@ -268,7 +269,8 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) #endif -static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) +static struct fsck_err_state *fsck_err_get(struct bch_fs *c, + enum bch_sb_error_id id) { struct fsck_err_state *s; @@ -276,7 +278,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) return NULL; list_for_each_entry(s, &c->fsck_error_msgs, list) - if (s->fmt == fmt) { + if (s->id == id) { /* * move it to the head of the list: repeated fsck errors * are common @@ -294,7 +296,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt) } INIT_LIST_HEAD(&s->list); - s->fmt = fmt; + s->id = id; list_add(&s->list, &c->fsck_error_msgs); return s; } @@ -344,15 +346,59 @@ static int do_fsck_ask_yn(struct bch_fs *c, return ask; } +static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, + enum bch_sb_error_id id, const char *msg, + bool *repeat, bool *print, bool *suppress) +{ + bch2_sb_error_count(c, id); + + struct fsck_err_state *s = fsck_err_get(c, id); + if (s) { + /* + * We may be called multiple times for the same error on + * transaction restart - this memoizes instead of asking the user + * multiple times for the same error: + */ + if (s->last_msg && !strcmp(msg, s->last_msg)) { + *repeat = true; + *print = false; + return s; + } + + kfree(s->last_msg); + s->last_msg = kstrdup(msg, GFP_KERNEL); + + if (c->opts.ratelimit_errors && + s->nr >= FSCK_ERR_RATELIMIT_NR) { + if (s->nr == FSCK_ERR_RATELIMIT_NR) + *suppress = true; + else + *print = false; + } + + s->nr++; + } + return s; +} + +void __bch2_count_fsck_err(struct bch_fs *c, + enum bch_sb_error_id id, const char *msg, + bool *repeat, bool *print, bool *suppress) +{ + bch2_sb_error_count(c, id); + + mutex_lock(&c->fsck_error_msgs_lock); + count_fsck_err_locked(c, id, msg, repeat, print, suppress); + mutex_unlock(&c->fsck_error_msgs_lock); +} + int __bch2_fsck_err(struct bch_fs *c, struct btree_trans *trans, enum bch_fsck_flags flags, enum bch_sb_error_id err, const char *fmt, ...) { - struct fsck_err_state *s = NULL; va_list args; - bool print = true, suppressing = false, inconsistent = false, exiting = false; struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; const char *action_orig = "fix?", *action = action_orig; @@ -387,8 +433,6 @@ int __bch2_fsck_err(struct bch_fs *c, ? -BCH_ERR_fsck_fix : -BCH_ERR_fsck_ignore; - bch2_sb_error_count(c, err); - printbuf_indent_add_nextline(out, 2); #ifdef BCACHEFS_LOG_PREFIX @@ -414,35 +458,13 @@ int __bch2_fsck_err(struct bch_fs *c, } mutex_lock(&c->fsck_error_msgs_lock); - s = fsck_err_get(c, fmt); - if (s) { - /* - * We may be called multiple times for the same error on - * transaction restart - this memoizes instead of asking the user - * multiple times for the same error: - */ - if (s->last_msg && !strcmp(buf.buf, s->last_msg)) { - ret = s->ret; - goto err_unlock; - } - - kfree(s->last_msg); - s->last_msg = kstrdup(buf.buf, GFP_KERNEL); - if (!s->last_msg) { - ret = -ENOMEM; - goto err_unlock; - } - - if (c->opts.ratelimit_errors && - !(flags & FSCK_NO_RATELIMIT) && - s->nr >= FSCK_ERR_RATELIMIT_NR) { - if (s->nr == FSCK_ERR_RATELIMIT_NR) - suppressing = true; - else - print = false; - } - - s->nr++; + bool repeat = false, print = true, suppress = false; + bool inconsistent = false, exiting = false; + struct fsck_err_state *s = + count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress); + if (repeat) { + ret = s->ret; + goto err_unlock; } if ((flags & FSCK_AUTOFIX) && @@ -528,7 +550,7 @@ print: __bch2_inconsistent_error(c, out); else if (exiting) prt_printf(out, "Unable to continue, halting\n"); - else if (suppressing) + else if (suppress) prt_printf(out, "Ratelimiting new instances of previous error\n"); if (print) { diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index e3f72e26abdd..542bdbb8d8be 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -67,7 +67,7 @@ int bch2_fs_topology_error(struct bch_fs *, const char *, ...); struct fsck_err_state { struct list_head list; - const char *fmt; + enum bch_sb_error_id id; u64 nr; bool ratelimited; int ret; @@ -77,6 +77,12 @@ struct fsck_err_state { #define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err) +void __bch2_count_fsck_err(struct bch_fs *, + enum bch_sb_error_id, const char *, + bool *, bool *, bool *); +#define bch2_count_fsck_err(_c, _err, ...) \ + __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__) + __printf(5, 6) __cold int __bch2_fsck_err(struct bch_fs *, struct btree_trans *, enum bch_fsck_flags, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d6a9430d479b..52320295dcf6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1632,6 +1632,8 @@ static int overlapping_extents_found(struct btree_trans *trans, bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(c, &res); + bch_info(c, "repair ret %s", bch2_err_str(ret)); + if (ret) goto err; -- 2.51.0 From 6d77ce4a273b319f6e9e8d2b6b2415a13bdea66d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Mar 2025 10:41:33 -0400 Subject: [PATCH 03/16] bcachefs: Better printing of inconsistency errors Build up and emit the error message for an inconsistency error all at once, instead of spread over multiple printk calls, so they're not jumbled in the dmesg log. Also, add better indenting. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 15 ++- fs/bcachefs/btree_io.c | 59 ++++++------ fs/bcachefs/btree_iter.c | 10 -- fs/bcachefs/btree_iter.h | 1 - fs/bcachefs/btree_node_scan.c | 10 +- fs/bcachefs/btree_update_interior.c | 55 ++++++----- fs/bcachefs/buckets.c | 137 ++++++++++++++++------------ fs/bcachefs/error.c | 13 +-- fs/bcachefs/error.h | 1 - fs/bcachefs/sb-errors_format.h | 3 +- 10 files changed, 153 insertions(+), 151 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 4da67ba8b7ab..21d1d86d5008 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -96,6 +96,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; + int ret = 0; if (insert) { prt_printf(&buf, "existing backpointer found when inserting "); @@ -125,17 +126,15 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, prt_printf(&buf, "for "); bch2_bkey_val_to_text(&buf, c, orig_k); - - bch_err(c, "%s", buf.buf); } - printbuf_exit(&buf); + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers && + __bch2_inconsistent_error(c, &buf)) + ret = -BCH_ERR_erofs_unfixed_errors; - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { - return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; - } else { - return 0; - } + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + return ret; } int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index c3224a920758..1d94a2bf706d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -548,32 +548,39 @@ static int __btree_err(int ret, enum bch_sb_error_id err_type, const char *fmt, ...) { - struct printbuf out = PRINTBUF; bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; - va_list args; + + if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) + ret = -BCH_ERR_btree_node_read_err_fixable; + if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) + ret = -BCH_ERR_btree_node_read_err_bad_node; + + if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) + bch2_sb_error_count(c, err_type); + + struct printbuf out = PRINTBUF; + if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) { + printbuf_indent_add_nextline(&out, 2); +#ifdef BCACHEFS_LOG_PREFIX + prt_printf(&out, bch2_log_msg(c, "")); +#endif + } btree_err_msg(&out, c, ca, b, i, k, b->written, write); + va_list args; va_start(args, fmt); prt_vprintf(&out, fmt, args); va_end(args); if (write == WRITE) { - bch2_print_string_as_lines(KERN_ERR, out.buf); - ret = c->opts.errors == BCH_ON_ERROR_continue - ? 0 - : -BCH_ERR_fsck_errors_not_fixed; - goto out; + prt_str(&out, ", "); + ret = __bch2_inconsistent_error(c, &out) + ? -BCH_ERR_fsck_errors_not_fixed + : 0; + silent = false; } - if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) - ret = -BCH_ERR_btree_node_read_err_fixable; - if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) - ret = -BCH_ERR_btree_node_read_err_bad_node; - - if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) - bch2_sb_error_count(c, err_type); - switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret = !silent @@ -583,25 +590,21 @@ static int __btree_err(int ret, ret != -BCH_ERR_fsck_ignore) goto fsck_err; ret = -BCH_ERR_fsck_fix; - break; - case -BCH_ERR_btree_node_read_err_want_retry: - case -BCH_ERR_btree_node_read_err_must_retry: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); - break; + goto out; case -BCH_ERR_btree_node_read_err_bad_node: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); - ret = bch2_topology_error(c); + prt_str(&out, ", "); + ret = __bch2_topology_error(c, &out); + if (ret) + silent = false; break; case -BCH_ERR_btree_node_read_err_incompatible: - if (!silent) - bch2_print_string_as_lines(KERN_ERR, out.buf); ret = -BCH_ERR_fsck_errors_not_fixed; + silent = false; break; - default: - BUG(); } + + if (!silent) + bch2_print_string_as_lines(KERN_ERR, out.buf); out: fsck_err: printbuf_exit(&out); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 1a1e8e61535e..a9c110b846b5 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1495,16 +1495,6 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans) printbuf_indent_sub(buf, 2); } -noinline __cold -void bch2_dump_trans_updates(struct btree_trans *trans) -{ - struct printbuf buf = PRINTBUF; - - bch2_trans_updates_to_text(&buf, trans); - bch2_print_str(trans->c, buf.buf); - printbuf_exit(&buf); -} - static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx) { struct btree_path *path = trans->paths + path_idx; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 8823eec6b284..e6f51a3b8187 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -9,7 +9,6 @@ void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); -void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); static inline int __bkey_err(const struct bkey *k) diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index de02c0e378c9..25d54b77cdc2 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -579,10 +579,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, found_btree_node_to_key(&tmp.k, &n); - struct printbuf buf = PRINTBUF; - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); - bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); + if (c->opts.verbose) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); + bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); + printbuf_exit(&buf); + } BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), (struct bkey_validate_context) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 4c6ade8c10a2..37c4ba529e3a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -54,6 +54,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) struct bkey_buf prev; int ret = 0; + printbuf_indent_add_nextline(&buf, 2); + BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key, b->data->min_key)); @@ -64,19 +66,20 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) if (b == btree_node_root(c, b)) { if (!bpos_eq(b->data->min_key, POS_MIN)) { - printbuf_reset(&buf); + ret = __bch2_topology_error(c, &buf); + bch2_bpos_to_text(&buf, b->data->min_key); log_fsck_err(trans, btree_root_bad_min_key, "btree root with incorrect min_key: %s", buf.buf); - goto topology_repair; + goto out; } if (!bpos_eq(b->data->max_key, SPOS_MAX)) { - printbuf_reset(&buf); + ret = __bch2_topology_error(c, &buf); bch2_bpos_to_text(&buf, b->data->max_key); log_fsck_err(trans, btree_root_bad_max_key, "btree root with incorrect max_key: %s", buf.buf); - goto topology_repair; + goto out; } } @@ -94,9 +97,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : bpos_successor(prev.k->k.p); if (!bpos_eq(expected_min, bp.v->min_key)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); prt_str(&buf, "end of prev node doesn't match start of next node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); @@ -107,7 +109,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bch2_bkey_val_to_text(&buf, c, k); log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf); - goto topology_repair; + goto out; } bch2_bkey_buf_reassemble(&prev, c, k); @@ -115,20 +117,17 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) } if (bkey_deleted(&prev.k->k)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); prt_str(&buf, "empty interior node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf); - goto topology_repair; } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) { - bch2_topology_error(c); + ret = __bch2_topology_error(c, &buf); - printbuf_reset(&buf); prt_str(&buf, "last child node doesn't end at end of parent node\nin "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); prt_str(&buf, " node "); @@ -137,7 +136,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k)); log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf); - goto topology_repair; } out: fsck_err: @@ -145,9 +143,6 @@ fsck_err: bch2_bkey_buf_exit(&prev, c); printbuf_exit(&buf); return ret; -topology_repair: - ret = bch2_topology_error(c); - goto out; } /* Calculate ideal packed bkey format for new btree nodes: */ @@ -2007,18 +2002,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { - struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF; - - bch2_bpos_to_text(&buf1, prev->data->max_key); - bch2_bpos_to_text(&buf2, next->data->min_key); - bch_err(c, - "%s(): btree topology error:\n" - " prev ends at %s\n" - " next starts at %s", - __func__, buf1.buf, buf2.buf); - printbuf_exit(&buf1); - printbuf_exit(&buf2); - ret = bch2_topology_error(c); + struct printbuf buf = PRINTBUF; + + printbuf_indent_add_nextline(&buf, 2); + prt_printf(&buf, "%s(): ", __func__); + ret = __bch2_topology_error(c, &buf); + prt_newline(&buf); + + prt_printf(&buf, "prev ends at "); + bch2_bpos_to_text(&buf, prev->data->max_key); + prt_newline(&buf); + + prt_printf(&buf, "next starts at "); + bch2_bpos_to_text(&buf, next->data->min_key); + + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); goto err; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index e56ef623ebc1..1c6fc45e1449 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -381,6 +381,36 @@ err: return ret; } +static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, + struct bkey_s_c k, bool insert, enum bch_sb_error_id id) +{ + struct bch_fs *c = trans->c; + bool repeat = false, print = true, suppress = false; + + prt_printf(buf, "\nwhile marking "); + bch2_bkey_val_to_text(buf, c, k); + prt_newline(buf); + + __bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress); + + int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); + + if (insert) { + print = true; + suppress = false; + + bch2_trans_updates_to_text(buf, trans); + __bch2_inconsistent_error(c, buf); + ret = -BCH_ERR_bucket_ref_update; + } + + if (suppress) + prt_printf(buf, "Ratelimiting new instances of previous error\n"); + if (print) + bch2_print_string_as_lines(KERN_ERR, buf->buf); + return ret; +} + int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, struct bkey_s_c k, const struct bch_extent_ptr *ptr, @@ -396,32 +426,29 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, BUG_ON(!sectors); - if (gen_after(ptr->gen, b_gen)) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_gen_newer_than_bucket_gen, - "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" - "while marking %s", + if (unlikely(gen_after(ptr->gen, b_gen))) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); goto out; } - if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_too_stale, - "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" - "while marking %s", + if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s: ptr gen %u too stale", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_too_stale); goto out; } @@ -430,62 +457,50 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, goto out; } - if (b_gen != ptr->gen) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, stale_dirty_ptr, - "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" - "while marking %s", + if (unlikely(b_gen != ptr->gen)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)", ptr->dev, bucket_nr, b_gen, bucket_gen_get(ca, bucket_nr), bch2_data_type_str(bucket_data_type ?: ptr_data_type), - ptr->gen, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + ptr->gen); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_stale_dirty_ptr); goto out; } - if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, ptr_bucket_data_type_mismatch, - "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" - "while marking %s", - ptr->dev, bucket_nr, b_gen, - bch2_data_type_str(bucket_data_type), - bch2_data_type_str(ptr_data_type), - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s", + ptr->dev, bucket_nr, b_gen, + bch2_data_type_str(bucket_data_type), + bch2_data_type_str(ptr_data_type)); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); goto out; } - if ((u64) *bucket_sectors + sectors > U32_MAX) { - bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations); - log_fsck_err(trans, bucket_sector_count_overflow, - "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" - "while marking %s", + if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { + bch2_log_msg_start(c, &buf); + prt_printf(&buf, + "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX", ptr->dev, bucket_nr, b_gen, bch2_data_type_str(bucket_data_type ?: ptr_data_type), - *bucket_sectors, sectors, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (inserting) - goto err; + *bucket_sectors, sectors); + + ret = bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_bucket_sector_count_overflow); sectors = -*bucket_sectors; + goto out; } *bucket_sectors += sectors; out: printbuf_exit(&buf); return ret; -err: -fsck_err: - bch2_dump_trans_updates(trans); - bch2_inconsistent_error(c); - ret = -BCH_ERR_bucket_ref_update; - goto out; } void bch2_trans_account_disk_usage_change(struct btree_trans *trans) @@ -677,11 +692,13 @@ err: if (!m || !m->alive) { gc_stripe_unlock(m); struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", + (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", - (u64) p.ec.idx, buf.buf); + __bch2_inconsistent_error(c, &buf); + bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); - bch2_inconsistent_error(c); return -BCH_ERR_trigger_stripe_pointer; } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index db0f0fb547e5..d4dfd13a8076 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -92,11 +92,13 @@ bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...) return ret; } -int bch2_topology_error(struct bch_fs *c) +int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) { + prt_printf(out, "btree topology error: "); + set_bit(BCH_FS_topology_error, &c->flags); if (!test_bit(BCH_FS_recovery_running, &c->flags)) { - bch2_inconsistent_error(c); + __bch2_inconsistent_error(c, out); return -BCH_ERR_btree_need_topology_repair; } else { return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: @@ -104,13 +106,6 @@ int bch2_topology_error(struct bch_fs *c) } } -int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) -{ - prt_printf(out, "btree topology error: "); - - return bch2_topology_error(c); -} - int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 542bdbb8d8be..d0d024dc714b 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -55,7 +55,6 @@ bool bch2_trans_inconsistent(struct btree_trans *, const char *, ...); _ret; \ }) -int bch2_topology_error(struct bch_fs *); int __bch2_topology_error(struct bch_fs *, struct printbuf *); __printf(2, 3) int bch2_fs_topology_error(struct bch_fs *, const char *, ...); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 1736abea9ed1..5d43e3504386 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -5,8 +5,7 @@ enum bch_fsck_flags { FSCK_CAN_FIX = 1 << 0, FSCK_CAN_IGNORE = 1 << 1, - FSCK_NO_RATELIMIT = 1 << 2, - FSCK_AUTOFIX = 1 << 3, + FSCK_AUTOFIX = 1 << 2, }; #define BCH_SB_ERRS() \ -- 2.51.0 From 63c3b8f616cc95bb1fcc6101c92485d41c535d7c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 14:22:29 -0400 Subject: [PATCH 04/16] bcachefs: Change btree_insert_node() assertion to error Debug for https://github.com/koverstreet/bcachefs/issues/843 Print useful debug info and go emergency read-only. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 37c4ba529e3a..987fdfc043b8 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -35,6 +35,8 @@ static const char * const bch2_btree_update_modes[] = { NULL }; +static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *); + static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *, btree_path_idx_t, struct btree *, struct keylist *); static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *); @@ -1777,11 +1779,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t int ret; lockdep_assert_held(&c->gc_lock); - BUG_ON(!btree_node_intent_locked(path, b->c.level)); BUG_ON(!b->c.level); BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); + if (!btree_node_intent_locked(path, b->c.level)) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + prt_printf(&buf, "%s(): node not locked at level %u\n", + __func__, b->c.level); + bch2_btree_update_to_text(&buf, as); + bch2_btree_path_to_text(&buf, trans, path_idx); + + bch2_print_string_as_lines(KERN_ERR, buf.buf); + printbuf_exit(&buf); + bch2_fs_emergency_read_only(c); + return -EIO; + } + ret = bch2_btree_node_lock_write(trans, path, &b->c); if (ret) return ret; -- 2.51.0 From b3981564ca8fa341a57c16dcbed1a9bd7f4e3be1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 17:59:50 -0400 Subject: [PATCH 05/16] bcachefs: Clear fs_path_parent on subvolume unlink This fixes recursive subvolume removal. Subvolume deletion is asynchronous; fs_path_parent, and thus the entry in the subvolume_children btree, need to be cleared when the subvolume is unlinked from the fs heirarchy - else we'll spuriously think a subvolume has children and deletion will fail. Signed-off-by: Kent Overstreet --- fs/bcachefs/subvolume.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index b7b96283c316..cd0d8e5e44e7 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -561,6 +561,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid) } SET_BCH_SUBVOLUME_UNLINKED(&n->v, true); + n->v.fs_path_parent = 0; bch2_trans_iter_exit(trans, &iter); return ret; } -- 2.51.0 From 707549600c4a012ed71c0204a7992a679880bf33 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 19:01:09 -0400 Subject: [PATCH 06/16] bcachefs: bch2_ioctl_subvolume_destroy() fixes bch2_evict_subvolume_inodes() was getting stuck - due to incorrectly pruning the dcache. Also, fix missing permissions checks. Reported-by: Alexander Viro Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-ioctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index e3a3230fc652..0273130f18dc 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -541,10 +541,12 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, ret = -ENOENT; goto err; } - ret = __bch2_unlink(dir, victim, true); + + ret = inode_permission(file_mnt_idmap(filp), d_inode(victim), MAY_WRITE) ?: + __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); - d_delete(victim); + d_invalidate(victim); } err: inode_unlock(dir); -- 2.51.0 From 458e2ef882d2e2ac4748ca802227a5e050d6aba1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 19:29:33 -0400 Subject: [PATCH 07/16] bcachefs: fix units in rebalance_status Signed-off-by: Kent Overstreet --- fs/bcachefs/rebalance.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 29a569384146..a480c3cb6712 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -605,7 +605,7 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); prt_printf(out, "pending work:\t"); - prt_human_readable_u64(out, v); + prt_human_readable_u64(out, v << 9); prt_printf(out, "\n\n"); prt_str(out, bch2_rebalance_state_strs[r->state]); -- 2.51.0 From f548db4d312a4d71e4f65cc43c724cfd46784ab8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 20:02:44 -0400 Subject: [PATCH 08/16] bcachefs: Silence errors after emergency shutdown We don't care about errors from asynchronous ops that were because we did an emergency shutdown; silence them. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 7 +++++-- fs/bcachefs/journal_io.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 987fdfc043b8..bf7e1dac7f46 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1268,7 +1268,8 @@ err: bch2_btree_update_free(as, trans); if (!bch2_err_matches(ret, ENOSPC) && !bch2_err_matches(ret, EROFS) && - ret != -BCH_ERR_journal_reclaim_would_deadlock) + ret != -BCH_ERR_journal_reclaim_would_deadlock && + ret != -BCH_ERR_journal_shutdown) bch_err_fn_ratelimited(c, ret); return ERR_PTR(ret); } @@ -2302,7 +2303,9 @@ static void async_btree_node_rewrite_work(struct work_struct *work) int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans, a->btree_id, a->level, a->key.k, 0)); - if (ret != -ENOENT) + if (ret != -ENOENT && + !bch2_err_matches(ret, EROFS) && + ret != -BCH_ERR_journal_shutdown) bch_err_fn_ratelimited(c, ret); spin_lock(&c->btree_node_rewrites_lock); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 24eaec1d406c..f461cb06df90 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1623,7 +1623,8 @@ static CLOSURE_CALLBACK(journal_write_done) : j->noflush_write_time, j->write_start_time); if (!w->devs_written.nr) { - bch_err(c, "unable to write journal to sufficient devices"); + if (!bch2_journal_error(j)) + bch_err(c, "unable to write journal to sufficient devices"); err = -BCH_ERR_journal_write_err; } else { bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, -- 2.51.0 From 393a05a7413aa325a15c6d3b35867843f91f1646 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Mar 2025 20:58:32 -0400 Subject: [PATCH 09/16] bcachefs: Don't use designated initializers for disk_accounting_pos Not all compilers fully initialize these - they're not guaranteed to because of the union shenanigans. Fixes: https://github.com/koverstreet/bcachefs/issues/844 Signed-off-by: Kent Overstreet --- fs/bcachefs/buckets.c | 24 ++++++++++++------------ fs/bcachefs/chardev.c | 6 ++---- fs/bcachefs/disk_accounting.c | 25 +++++++++++++------------ fs/bcachefs/ec.c | 20 +++++++++++--------- fs/bcachefs/progress.c | 6 ++---- fs/bcachefs/rebalance.c | 3 ++- fs/bcachefs/sysfs.c | 6 ++---- 7 files changed, 44 insertions(+), 46 deletions(-) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 1c6fc45e1449..0903311cc71e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -666,9 +666,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans, stripe_blockcount_get(&s->v, p.ec.block) + sectors); - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = data_type; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); @@ -704,9 +704,9 @@ err: m->block_sectors[p.ec.block] += sectors; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); gc_stripe_unlock(m); @@ -734,12 +734,12 @@ static int __trigger_extent(struct btree_trans *trans, : BCH_DATA_user; int ret = 0; - struct disk_accounting_pos acc_replicas_key = { - .type = BCH_DISK_ACCOUNTING_replicas, - .replicas.data_type = data_type, - .replicas.nr_devs = 0, - .replicas.nr_required = 1, - }; + struct disk_accounting_pos acc_replicas_key; + memset(&acc_replicas_key, 0, sizeof(acc_replicas_key)); + acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas; + acc_replicas_key.replicas.data_type = data_type; + acc_replicas_key.replicas.nr_devs = 0; + acc_replicas_key.replicas.nr_required = 1; unsigned cur_compression_type = 0; u64 compression_acct[3] = { 1, 0, 0 }; diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 57d55b3ddc71..584f4a3eb670 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -426,10 +426,8 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, arg.replica_entries_bytes = replicas.nr; for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) { - struct disk_accounting_pos k = { - .type = BCH_DISK_ACCOUNTING_persistent_reserved, - .persistent_reserved.nr_replicas = i, - }; + struct disk_accounting_pos k; + disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i); bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&k), diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 651e1b2521a6..a59f6c12529b 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,10 +114,9 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, unsigned dev, s64 sectors, bool gc) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; - + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_replicas_entry_cached(&acc.replicas, dev); return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); @@ -737,7 +736,9 @@ int bch2_accounting_read(struct bch_fs *c) break; if (!bch2_accounting_is_mem(acc_k)) { - struct disk_accounting_pos next = { .type = acc_k.type + 1 }; + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); continue; } @@ -893,15 +894,13 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) int bch2_dev_usage_init(struct bch_dev *ca, bool gc) { struct bch_fs *c = ca->fs; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_dev_data_type, - .dev_data_type.dev = ca->dev_idx, - .dev_data_type.data_type = BCH_DATA_free, - }; u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; int ret = bch2_trans_do(c, ({ - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?: + bch2_disk_accounting_mod2(trans, gc, + v, dev_data_type, + .dev = ca->dev_idx, + .data_type = BCH_DATA_free) ?: (!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0); })); bch_err_fn(c, ret); @@ -928,7 +927,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c) break; if (!bch2_accounting_is_mem(acc_k)) { - struct disk_accounting_pos next = { .type = acc_k.type + 1 }; + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next)); continue; } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 0c23d749621a..6faeda7ad03d 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -453,9 +453,9 @@ int bch2_trigger_stripe(struct btree_trans *trans, if (new_s) { s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, new); int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) @@ -468,9 +468,9 @@ int bch2_trigger_stripe(struct btree_trans *trans, if (old_s) { s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, old); int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); if (ret) @@ -2110,14 +2110,14 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ if (ret) return ret; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + struct disk_accounting_pos acc; s64 sectors = 0; for (unsigned i = 0; i < s->v.nr_blocks; i++) sectors -= stripe_blockcount_get(&s->v, i); + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); @@ -2131,6 +2131,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ sectors = -sectors; + memset(&acc, 0, sizeof(acc)); + acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index bafd1c91a802..d09898566abe 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -16,10 +16,8 @@ void bch2_progress_init(struct progress_indicator_state *s, if (!(btree_id_mask & BIT_ULL(i))) continue; - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_btree, - .btree.id = i, - }; + struct disk_accounting_pos acc; + disk_accounting_key_init(acc, btree, .id = i); u64 v; bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index a480c3cb6712..b9bde04b66c0 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -600,7 +600,8 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c) struct bch_fs_rebalance *r = &c->rebalance; /* print pending work */ - struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_rebalance_work, }; + struct disk_accounting_pos acc; + disk_accounting_key_init(acc, rebalance_work); u64 v; bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 74c186d65d1f..e5f003c29369 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -257,10 +257,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c prt_printf(out, "type\tcompressed\runcompressed\raverage extent size\r\n"); for (unsigned i = 1; i < BCH_COMPRESSION_TYPE_NR; i++) { - struct disk_accounting_pos a = { - .type = BCH_DISK_ACCOUNTING_compression, - .compression.type = i, - }; + struct disk_accounting_pos a; + disk_accounting_key_init(a, compression, .type = i); struct bpos p = disk_accounting_pos_to_bpos(&a); u64 v[3]; bch2_accounting_mem_read(c, p, v, ARRAY_SIZE(v)); -- 2.51.0 From 2b47102b933a5f28a08f4811835cc3a7cdb1b324 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 30 Mar 2025 09:30:04 -0400 Subject: [PATCH 10/16] bcachefs: Reorder error messages that include journal debug Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 10 +++++----- fs/bcachefs/journal_io.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index ecb97d435f6a..8a36d5536668 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -168,11 +168,11 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) return stuck; } j->err_seq = journal_cur_seq(j); - spin_unlock(&j->lock); - bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)", - bch2_err_str(error)); - bch2_journal_debug_to_text(&buf, j); + __bch2_journal_debug_to_text(&buf, j); + spin_unlock(&j->lock); + prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), + bch2_err_str(error)); bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_reset(&buf); @@ -727,10 +727,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, remaining_wait)) return ret; - bch_err(c, "Journal stuck? Waited for 10 seconds, err %s", bch2_err_str(ret)); struct printbuf buf = PRINTBUF; bch2_journal_debug_to_text(&buf, j); bch2_print_string_as_lines(KERN_ERR, buf.buf); + prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); printbuf_exit(&buf); closure_wait_event(&j->async_wait, diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index f461cb06df90..228e531921b7 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -2082,12 +2082,12 @@ CLOSURE_CALLBACK(bch2_journal_write) struct printbuf buf = PRINTBUF; buf.atomic++; + __bch2_journal_debug_to_text(&buf, j); + spin_unlock(&j->lock); prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), le64_to_cpu(w->data->seq), vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); - __bch2_journal_debug_to_text(&buf, j); - spin_unlock(&j->lock); bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } -- 2.51.0 From edaed8ee8cb3fdb6b9fcde65ff31e99e4db59cab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 30 Mar 2025 16:50:59 -0400 Subject: [PATCH 11/16] bcachefs: BCH_JSET_ENTRY_log_bkey Add a journal entry type for logging - but logging a bkey, not a string; to be used for data move path debugging. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/btree_update.c | 13 +++++++++++++ fs/bcachefs/btree_update.h | 2 ++ fs/bcachefs/journal_io.c | 17 +++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index e96d87767020..a3db328dee31 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1143,7 +1143,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb) x(log, 9) \ x(overwrite, 10) \ x(write_buffer_keys, 11) \ - x(datetime, 12) + x(datetime, 12) \ + x(log_bkey, 13) enum bch_jset_entry_type { #define x(f, nr) BCH_JSET_ENTRY_##f = nr, diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index bd2eb42edb24..c05394f56424 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -846,6 +846,19 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) return 0; } +int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree, + unsigned level, struct bkey_i *k) +{ + struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); + int ret = PTR_ERR_OR_ZERO(e); + if (ret) + return ret; + + journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s); + bkey_copy(e->start, k); + return 0; +} + __printf(3, 0) static int __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index d2e1c04353f6..568e56c91190 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -170,6 +170,8 @@ void bch2_trans_commit_hook(struct btree_trans *, int __bch2_trans_commit(struct btree_trans *, unsigned); int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); +int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *); + __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 228e531921b7..2debc213e47c 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -764,6 +764,23 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs journal_entry_btree_keys_to_text(out, c, entry); } +static int journal_entry_log_bkey_validate(struct bch_fs *c, + struct jset *jset, + struct jset_entry *entry, + unsigned version, int big_endian, + struct bkey_validate_context from) +{ + from.flags = 0; + return journal_entry_btree_keys_validate(c, jset, entry, + version, big_endian, from); +} + +static void journal_entry_log_bkey_to_text(struct printbuf *out, struct bch_fs *c, + struct jset_entry *entry) +{ + journal_entry_btree_keys_to_text(out, c, entry); +} + static int journal_entry_write_buffer_keys_validate(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, -- 2.51.0 From 7fdc3fa3cb5fb561f5945b4de418d48d1a726a8d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 30 Mar 2025 16:57:21 -0400 Subject: [PATCH 12/16] bcachefs: Log original key being moved in data updates There's something going on with the data move path; log the original key being moved for debugging. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 22 +++++++++++++++++++++- fs/bcachefs/data_update.h | 12 ++++++++++++ fs/bcachefs/io_read.c | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0ec273daccb7..fe400dfc5d76 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -22,6 +22,13 @@ #include +static const char * const bch2_data_update_type_strs[] = { +#define x(t, n, ...) [n] = #t, + BCH_DATA_UPDATE_TYPES() +#undef x + NULL +}; + static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -181,6 +188,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, container_of(op, struct data_update, op); struct keylist *keys = &op->insert_keys; struct bkey_buf _new, _insert; + struct printbuf journal_msg = PRINTBUF; int ret = 0; bch2_bkey_buf_init(&_new); @@ -372,7 +380,12 @@ restart_drop_extra_replicas: printbuf_exit(&buf); } - ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, + printbuf_reset(&journal_msg); + prt_str(&journal_msg, bch2_data_update_type_strs[m->type]); + + ret = bch2_trans_log_msg(trans, &journal_msg) ?: + bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?: + bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, insert->k.p) ?: @@ -417,6 +430,7 @@ nowork: goto next; } out: + printbuf_exit(&journal_msg); bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&_insert, c); bch2_bkey_buf_exit(&_new, c); @@ -577,6 +591,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) { + prt_str(out, bch2_data_update_type_strs[m->type]); + prt_newline(out); + bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts); prt_newline(out); @@ -738,6 +755,9 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); + m->type = data_opts.btree_insert_flags & BCH_WATERMARK_copygc + ? BCH_DATA_UPDATE_copygc + : BCH_DATA_UPDATE_rebalance; m->btree_id = btree_id; m->data_opts = data_opts; m->ctxt = ctxt; diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index c194cbbf5b51..ed05125867da 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -24,7 +24,19 @@ struct data_update_opts { void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *, struct bch_io_opts *, struct data_update_opts *); +#define BCH_DATA_UPDATE_TYPES() \ + x(copygc, 0) \ + x(rebalance, 1) \ + x(promote, 2) + +enum bch_data_update_types { +#define x(n, id) BCH_DATA_UPDATE_##n = id, + BCH_DATA_UPDATE_TYPES() +#undef x +}; + struct data_update { + enum bch_data_update_types type; /* extent being updated: */ bool read_done; enum btree_id btree_id; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fafd00a3d6c9..fd01e67b3e84 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -259,6 +259,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, &orig->opts, update_opts, btree_id, k); + op->write.type = BCH_DATA_UPDATE_promote; /* * possible errors: -BCH_ERR_nocow_lock_blocked, * -BCH_ERR_ENOSPC_disk_reservation: -- 2.51.0 From 650f5353dcc9b6e690a1c763754fa1e98d217bfc Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 30 Mar 2025 20:04:16 -0400 Subject: [PATCH 13/16] bcachefs: fix bch2_write_point_to_text() units Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 0cac65347a5d..da0d72928b5b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1560,7 +1560,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, unsigned i; prt_printf(out, "%lu: ", wp->write_point); - prt_human_readable_u64(out, wp->sectors_allocated); + prt_human_readable_u64(out, wp->sectors_allocated << 9); prt_printf(out, " last wrote: "); bch2_pr_time_units(out, sched_clock() - wp->last_used); -- 2.51.0 From c1f4534b213d7be41b5d8b815a42d201a8f2978f Mon Sep 17 00:00:00 2001 From: Andrei Lalaev Date: Mon, 31 Mar 2025 06:17:52 +0000 Subject: [PATCH 14/16] scripts: generate_rust_analyzer: fix pin-init name in kernel deps Because of different crate names ("pin-init" and "pin_init") passed to "append_crate" and "append_crate_with_generated", the script fails with "KeyError: 'pin-init'". To overcome the issue, pass the same name to both functions. Signed-off-by: Andrei Lalaev Link: https://lore.kernel.org/r/AM9PR03MB7074692E5D24C288D2BBC801C8AD2@AM9PR03MB7074.eurprd03.prod.outlook.com Fixes: 4e82c87058f4 ("Merge tag 'rust-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/ojeda/linux") [ Made author match the Signed-off-by one. Added newline. - Miguel ] Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index b0d7dc1e9267..cd41bc906fbd 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -133,7 +133,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs): append_crate_with_generated("bindings", ["core"]) append_crate_with_generated("uapi", ["core"]) - append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin-init", "bindings", "uapi"]) + append_crate_with_generated("kernel", ["core", "macros", "build_error", "pin_init", "bindings", "uapi"]) def is_root_crate(build_file, target): try: -- 2.51.0 From 1e7857b28020ba57ca7fdafae7ac855ba326c697 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 31 Mar 2025 14:19:55 -0700 Subject: [PATCH 15/16] x86: don't re-generate cpufeaturemasks.h so eagerly It turns out the code to generate the x86 cpufeaturemasks.h header was way too aggressive, and would re-generate it whenever the timestamp on the kernel config file changed. Now, the regular 'make *config' tools are fairly careful to not rewrite the kernel config file unless the contents change, but other usecases aren't that careful. Michael Kelley reports that 'make-kpkg' ends up doing "make syncconfig" multiple times in prepping to build, and will modify the config file in the process (and then modify it back, but by then the timestamps have changed). Jakub Kicinski reports that the netdev CI does something similar in how it generates the config file in multiple steps. In both cases, the config file timestamp updates then cause the cpufeaturemasks.h file to be regenerated, and that in turn then causes lots of unnecessary rebuilds due to all the normal dependencies. Fix it by using our 'filechk' infrastructure in the Makefile to generate the header file. That will only write a new version of the file if the contents of the file have actually changed. Fixes: 841326332bcb ("x86/cpufeatures: Generate the header based on build config") Reported-by: Michael Kelley Reported-by: Jakub Kicinski Link: https://lore.kernel.org/all/SN6PR02MB415756D1829740F6E8AC11D1D4D82@SN6PR02MB4157.namprd02.prod.outlook.com/ Link: https://lore.kernel.org/all/20250328162311.08134fa6@kernel.org/ Cc: Peter Anvin Signed-off-by: Linus Torvalds --- arch/x86/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0fc7e8fd1a2e..27efe2dc2aa8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -277,12 +277,11 @@ cpufeaturemasks.hdr := arch/x86/include/generated/asm/cpufeaturemasks.h cpufeaturemasks.awk := $(srctree)/arch/x86/tools/cpufeaturemasks.awk cpufeatures_hdr := $(srctree)/arch/x86/include/asm/cpufeatures.h targets += $(cpufeaturemasks.hdr) -quiet_cmd_gen_featuremasks = GEN $@ - cmd_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) > $@ + filechk_gen_featuremasks = $(AWK) -f $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) $(cpufeaturemasks.hdr): $(cpufeaturemasks.awk) $(cpufeatures_hdr) $(KCONFIG_CONFIG) FORCE $(shell mkdir -p $(dir $@)) - $(call if_changed,gen_featuremasks) + $(call filechk,gen_featuremasks) archprepare: $(cpufeaturemasks.hdr) ### -- 2.51.0 From e3e68311ead15d8be61e8e1a8d2f0d1773a7ba9c Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Tue, 1 Apr 2025 10:13:47 +0530 Subject: [PATCH 16/16] block: remove unused nseg parameter We are no longer using nr_segs, after blk_mq_attempt_bio_merge was moved out of blk_mq_get_new_request. Signed-off-by: Nitesh Shetty Link: https://lore.kernel.org/r/20250401044348.15588-1-nj.shetty@samsung.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ae8494d88897..0cfd1a149f64 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, @@ -3125,7 +3124,7 @@ new_request: if (rq) { blk_mq_use_cached_rq(rq, plug, bio); } else { - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); -- 2.51.0