From e038213658f09bc775a67cf8e18f0aec4b0f7679 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 17:13:22 -0400 Subject: [PATCH 01/16] bcachefs: alloc_request.ca Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 54 ++++++++++++++++++---------------- fs/bcachefs/alloc_foreground.h | 2 ++ 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 3712e8722f3d..642d22643558 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -206,11 +206,13 @@ static inline bool may_alloc_bucket(struct bch_fs *c, return true; } -static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, +static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct alloc_request *req, u64 bucket, u8 gen, struct closure *cl) { + struct bch_dev *ca = req->ca; + if (unlikely(is_superblock_bucket(c, ca, bucket))) return NULL; @@ -257,7 +259,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * return ob; } -static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca, +static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct alloc_request *req, struct btree_iter *freespace_iter, struct closure *cl) @@ -265,7 +267,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc struct bch_fs *c = trans->c; u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); - if (!may_alloc_bucket(c, req, POS(ca->dev_idx, b))) + if (!may_alloc_bucket(c, req, POS(req->ca->dev_idx, b))) return NULL; u8 gen; @@ -275,7 +277,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc if (ret) return NULL; - return __try_alloc_bucket(c, ca, req, b, gen, cl); + return __try_alloc_bucket(c, req, b, gen, cl); } /* @@ -283,11 +285,11 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc */ static noinline struct open_bucket * bch2_bucket_alloc_early(struct btree_trans *trans, - struct bch_dev *ca, struct alloc_request *req, struct closure *cl) { struct bch_fs *c = trans->c; + struct bch_dev *ca = req->ca; struct btree_iter iter, citer; struct bkey_s_c k, ck; struct open_bucket *ob = NULL; @@ -348,7 +350,7 @@ again: req->counters.buckets_seen++; ob = may_alloc_bucket(c, req, k.k->p) - ? __try_alloc_bucket(c, ca, req, k.k->p.offset, a->gen, cl) + ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl) : NULL; next: bch2_set_btree_iter_dontneed(trans, &citer); @@ -374,10 +376,10 @@ next: } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - struct bch_dev *ca, - struct alloc_request *req, - struct closure *cl) + struct alloc_request *req, + struct closure *cl) { + struct bch_dev *ca = req->ca; struct btree_iter iter; struct bkey_s_c k; struct open_bucket *ob = NULL; @@ -417,7 +419,7 @@ again: goto next; } - ob = try_alloc_bucket(trans, ca, req, &iter, cl); + ob = try_alloc_bucket(trans, req, &iter, cl); if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; @@ -448,7 +450,7 @@ fail: return ob; } -static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, +static noinline void trace_bucket_alloc2(struct bch_fs *c, struct alloc_request *req, struct closure *cl, struct open_bucket *ob) @@ -457,12 +459,12 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, printbuf_tabstop_push(&buf, 24); - prt_printf(&buf, "dev\t%s (%u)\n", ca->name, ca->dev_idx); + prt_printf(&buf, "dev\t%s (%u)\n", req->ca->name, req->ca->dev_idx); prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[req->watermark]); prt_printf(&buf, "data type\t%s\n", __bch2_data_types[req->data_type]); prt_printf(&buf, "blocking\t%u\n", cl != NULL); prt_printf(&buf, "free\t%llu\n", req->usage.buckets[BCH_DATA_free]); - prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(ca, req->usage, req->watermark)); + prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark)); prt_printf(&buf, "copygc_wait\t%lu/%lli\n", bch2_copygc_wait_amount(c), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now)); @@ -488,7 +490,6 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, * bch2_bucket_alloc_trans - allocate a single bucket from a specific device * @trans: transaction object * @req: state for the entire allocation - * @ca: device to allocate from * @cl: if not NULL, closure to be used to wait if buckets not available * @nowait: if true, do not wait for buckets to become available * @@ -496,11 +497,11 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca, */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct alloc_request *req, - struct bch_dev *ca, struct closure *cl, bool nowait) { struct bch_fs *c = trans->c; + struct bch_dev *ca = req->ca; struct open_bucket *ob = NULL; bool freespace = READ_ONCE(ca->mi.freespace_initialized); u64 avail; @@ -542,8 +543,8 @@ again: closure_wake_up(&c->freelist_wait); alloc: ob = likely(freespace) - ? bch2_bucket_alloc_freelist(trans, ca, req, cl) - : bch2_bucket_alloc_early(trans, ca, req, cl); + ? bch2_bucket_alloc_freelist(trans, req, cl) + : bch2_bucket_alloc_early(trans, req, cl); if (req->counters.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); @@ -572,7 +573,7 @@ err: if (!IS_ERR(ob) ? trace_bucket_alloc_enabled() : trace_bucket_alloc_fail_enabled()) - trace_bucket_alloc2(c, ca, req, cl, ob); + trace_bucket_alloc2(c, req, cl, ob); return ob; } @@ -586,10 +587,11 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, struct alloc_request req = { .watermark = watermark, .data_type = data_type, + .ca = ca, }; bch2_trans_do(c, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, ca, cl, false))); + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } @@ -715,20 +717,20 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc); darray_for_each(devs_sorted, i) { - struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i); - if (!ca) + req->ca = bch2_dev_tryget_noerror(c, *i); + if (!req->ca) continue; - if (!ca->mi.durability && req->have_cache) { - bch2_dev_put(ca); + if (!req->ca->mi.durability && req->have_cache) { + bch2_dev_put(req->ca); continue; } - struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req, ca, cl, + struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req, cl, req->flags & BCH_WRITE_alloc_nowait); if (!IS_ERR(ob)) - bch2_dev_stripe_increment_inlined(ca, stripe, &req->usage); - bch2_dev_put(ca); + bch2_dev_stripe_increment_inlined(req->ca, stripe, &req->usage); + bch2_dev_put(req->ca); if (IS_ERR(ob)) { ret = PTR_ERR(ob); diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 5d311a41d65f..7117e1e5c6d9 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -44,6 +44,8 @@ struct alloc_request { struct bch_dev_usage usage; /* bch2_bucket_alloc_trans(): */ + struct bch_dev *ca; + enum { BTREE_BITMAP_NO, BTREE_BITMAP_YES, -- 2.51.0 From 95f2315af7536c220301421eff6291c80ec321e5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 17:57:06 -0400 Subject: [PATCH 02/16] bcachefs: alloc_request.ptrs2 Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 14 ++++++++------ fs/bcachefs/alloc_foreground.h | 1 + 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 642d22643558..f546b4fcd58f 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -825,18 +825,19 @@ static bool want_bucket(struct bch_fs *c, static int bucket_alloc_set_writepoint(struct bch_fs *c, struct alloc_request *req) { - struct open_buckets ptrs_skip = { .nr = 0 }; struct open_bucket *ob; unsigned i; int ret = 0; + req->ptrs2.nr = 0; + open_bucket_for_each(c, &req->wp->ptrs, ob, i) { if (!ret && want_bucket(c, req, ob)) ret = add_new_bucket(c, req, ob); else - ob_push(c, &ptrs_skip, ob); + ob_push(c, &req->ptrs2, ob); } - req->wp->ptrs = ptrs_skip; + req->wp->ptrs = req->ptrs2; return ret; } @@ -1209,11 +1210,12 @@ static noinline void deallocate_extra_replicas(struct bch_fs *c, struct alloc_request *req) { - struct open_buckets ptrs2 = { 0 }; struct open_bucket *ob; unsigned extra_replicas = req->nr_effective - req->nr_replicas; unsigned i; + req->ptrs2.nr = 0; + open_bucket_for_each(c, &req->ptrs, ob, i) { unsigned d = ob_dev(c, ob)->mi.durability; @@ -1221,11 +1223,11 @@ deallocate_extra_replicas(struct bch_fs *c, extra_replicas -= d; ob_push(c, &req->wp->ptrs, ob); } else { - ob_push(c, &ptrs2, ob); + ob_push(c, &req->ptrs2, ob); } } - req->ptrs = ptrs2; + req->ptrs = req->ptrs2; } /* diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 7117e1e5c6d9..ae8ca3b7786b 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -36,6 +36,7 @@ struct alloc_request { /* These fields are used primarily by open_bucket_add_buckets */ struct open_buckets ptrs; + struct open_buckets ptrs2; unsigned nr_effective; /* sum of @ptrs durability */ bool have_cache; /* have we allocated from a 0 durability dev */ struct bch_devs_mask devs_may_alloc; -- 2.51.0 From a0b0b9bb9e3cb896a5585701fa6b340d2ada6f63 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Mar 2025 17:50:52 -0400 Subject: [PATCH 03/16] bcachefs: alloc_request no longer on stack Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 84 +++++++++++++++++----------------- 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f546b4fcd58f..f68e5f6849b0 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1248,49 +1248,51 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, struct bch_fs *c = trans->c; struct open_bucket *ob; unsigned write_points_nr; - int ret; int i; - struct alloc_request req = { - .nr_replicas = nr_replicas, - .target = target, - .ec = erasure_code, - .watermark = watermark, - .flags = flags, - .devs_have = devs_have, - }; + struct alloc_request *req = bch2_trans_kmalloc_nomemzero(trans, sizeof(*req)); + int ret = PTR_ERR_OR_ZERO(req); + if (unlikely(ret)) + return ret; + + req->nr_replicas = nr_replicas; + req->target = target; + req->ec = erasure_code; + req->watermark = watermark; + req->flags = flags; + req->devs_have = devs_have; if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; BUG_ON(!nr_replicas || !nr_replicas_required); retry: - req.ptrs.nr = 0; - req.nr_effective = 0; - req.have_cache = false; + req->ptrs.nr = 0; + req->nr_effective = 0; + req->have_cache = false; write_points_nr = c->write_points_nr; - *wp_ret = req.wp = writepoint_find(trans, write_point.v); + *wp_ret = req->wp = writepoint_find(trans, write_point.v); - req.data_type = req.wp->data_type; + req->data_type = req->wp->data_type; ret = bch2_trans_relock(trans); if (ret) goto err; /* metadata may not allocate on cache devices: */ - if (req.data_type != BCH_DATA_user) - req.have_cache = true; + if (req->data_type != BCH_DATA_user) + req->have_cache = true; if (target && !(flags & BCH_WRITE_only_specified_devs)) { - ret = open_bucket_add_buckets(trans, &req, NULL); + ret = open_bucket_add_buckets(trans, req, NULL); if (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto alloc_done; /* Don't retry from all devices if we're out of open buckets: */ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { - int ret2 = open_bucket_add_buckets(trans, &req, cl); + int ret2 = open_bucket_add_buckets(trans, req, cl); if (!ret2 || bch2_err_matches(ret2, BCH_ERR_transaction_restart) || bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { @@ -1303,38 +1305,38 @@ retry: * Only try to allocate cache (durability = 0 devices) from the * specified target: */ - req.have_cache = true; - req.target = 0; + req->have_cache = true; + req->target = 0; - ret = open_bucket_add_buckets(trans, &req, cl); + ret = open_bucket_add_buckets(trans, req, cl); } else { - ret = open_bucket_add_buckets(trans, &req, cl); + ret = open_bucket_add_buckets(trans, req, cl); } alloc_done: - BUG_ON(!ret && req.nr_effective < req.nr_replicas); + BUG_ON(!ret && req->nr_effective < req->nr_replicas); - if (erasure_code && !ec_open_bucket(c, &req.ptrs)) + if (erasure_code && !ec_open_bucket(c, &req->ptrs)) pr_debug("failed to get ec bucket: ret %u", ret); if (ret == -BCH_ERR_insufficient_devices && - req.nr_effective >= nr_replicas_required) + req->nr_effective >= nr_replicas_required) ret = 0; if (ret) goto err; - if (req.nr_effective > req.nr_replicas) - deallocate_extra_replicas(c, &req); + if (req->nr_effective > req->nr_replicas) + deallocate_extra_replicas(c, req); /* Free buckets we didn't use: */ - open_bucket_for_each(c, &req.wp->ptrs, ob, i) + open_bucket_for_each(c, &req->wp->ptrs, ob, i) open_bucket_free_unused(c, ob); - req.wp->ptrs = req.ptrs; + req->wp->ptrs = req->ptrs; - req.wp->sectors_free = UINT_MAX; + req->wp->sectors_free = UINT_MAX; - open_bucket_for_each(c, &req.wp->ptrs, ob, i) { + open_bucket_for_each(c, &req->wp->ptrs, ob, i) { /* * Ensure proper write alignment - either due to misaligned * bucket sizes (from buggy bcachefs-tools), or writes that mix @@ -1348,29 +1350,29 @@ alloc_done: ob->sectors_free = max_t(int, 0, ob->sectors_free - align); - req.wp->sectors_free = min(req.wp->sectors_free, ob->sectors_free); + req->wp->sectors_free = min(req->wp->sectors_free, ob->sectors_free); } - req.wp->sectors_free = rounddown(req.wp->sectors_free, block_sectors(c)); + req->wp->sectors_free = rounddown(req->wp->sectors_free, block_sectors(c)); /* Did alignment use up space in an open_bucket? */ - if (unlikely(!req.wp->sectors_free)) { - bch2_alloc_sectors_done(c, req.wp); + if (unlikely(!req->wp->sectors_free)) { + bch2_alloc_sectors_done(c, req->wp); goto retry; } - BUG_ON(!req.wp->sectors_free || req.wp->sectors_free == UINT_MAX); + BUG_ON(!req->wp->sectors_free || req->wp->sectors_free == UINT_MAX); return 0; err: - open_bucket_for_each(c, &req.wp->ptrs, ob, i) - if (req.ptrs.nr < ARRAY_SIZE(req.ptrs.v)) - ob_push(c, &req.ptrs, ob); + open_bucket_for_each(c, &req->wp->ptrs, ob, i) + if (req->ptrs.nr < ARRAY_SIZE(req->ptrs.v)) + ob_push(c, &req->ptrs, ob); else open_bucket_free_unused(c, ob); - req.wp->ptrs = req.ptrs; + req->wp->ptrs = req->ptrs; - mutex_unlock(&req.wp->lock); + mutex_unlock(&req->wp->lock); if (bch2_err_matches(ret, BCH_ERR_freelist_empty) && try_decrease_writepoints(trans, write_points_nr)) -- 2.51.0 From 2a81bd454c45c89b167b6c2bd3ba7b5a489b0830 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 17:23:22 -0400 Subject: [PATCH 04/16] bcachefs: reduce new_stripe_alloc_buckets() stack usage Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 12 ++++++------ fs/bcachefs/alloc_foreground.h | 8 +++++++- fs/bcachefs/ec.c | 24 ++++++++++++------------ 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index f68e5f6849b0..31d2207a071b 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -829,15 +829,15 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c, unsigned i; int ret = 0; - req->ptrs2.nr = 0; + req->scratch_ptrs.nr = 0; open_bucket_for_each(c, &req->wp->ptrs, ob, i) { if (!ret && want_bucket(c, req, ob)) ret = add_new_bucket(c, req, ob); else - ob_push(c, &req->ptrs2, ob); + ob_push(c, &req->scratch_ptrs, ob); } - req->wp->ptrs = req->ptrs2; + req->wp->ptrs = req->scratch_ptrs; return ret; } @@ -1214,7 +1214,7 @@ deallocate_extra_replicas(struct bch_fs *c, unsigned extra_replicas = req->nr_effective - req->nr_replicas; unsigned i; - req->ptrs2.nr = 0; + req->scratch_ptrs.nr = 0; open_bucket_for_each(c, &req->ptrs, ob, i) { unsigned d = ob_dev(c, ob)->mi.durability; @@ -1223,11 +1223,11 @@ deallocate_extra_replicas(struct bch_fs *c, extra_replicas -= d; ob_push(c, &req->wp->ptrs, ob); } else { - ob_push(c, &req->ptrs2, ob); + ob_push(c, &req->scratch_ptrs, ob); } } - req->ptrs = req->ptrs2; + req->ptrs = req->scratch_ptrs; } /* diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index ae8ca3b7786b..192203410d4e 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -36,7 +36,6 @@ struct alloc_request { /* These fields are used primarily by open_bucket_add_buckets */ struct open_buckets ptrs; - struct open_buckets ptrs2; unsigned nr_effective; /* sum of @ptrs durability */ bool have_cache; /* have we allocated from a 0 durability dev */ struct bch_devs_mask devs_may_alloc; @@ -62,6 +61,13 @@ struct alloc_request { u64 skipped_nouse; u64 skipped_mi_btree_bitmap; } counters; + + unsigned scratch_nr_replicas; + unsigned scratch_nr_effective; + bool scratch_have_cache; + enum bch_data_type scratch_data_type; + struct open_buckets scratch_ptrs; + struct bch_devs_mask scratch_devs_may_alloc; }; struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 11f46dccc14f..37e63137041c 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1720,12 +1720,12 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, unsigned i, j, nr_have_parity = 0, nr_have_data = 0; int ret = 0; - enum bch_data_type saved_data_type = req->data_type; - struct open_buckets saved_ptrs = req->ptrs; - unsigned saved_nr_replicas = req->nr_replicas; - unsigned saved_nr_effective = req->nr_effective; - bool saved_have_cache = req->have_cache; - struct bch_devs_mask saved_devs_may_alloc = req->devs_may_alloc; + req->scratch_data_type = req->data_type; + req->scratch_ptrs = req->ptrs; + req->scratch_nr_replicas = req->nr_replicas; + req->scratch_nr_effective = req->nr_effective; + req->scratch_have_cache = req->have_cache; + req->scratch_devs_may_alloc = req->devs_may_alloc; req->devs_may_alloc = h->devs; req->have_cache = true; @@ -1801,12 +1801,12 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, goto err; } err: - req->data_type = saved_data_type; - req->ptrs = saved_ptrs; - req->nr_replicas = saved_nr_replicas; - req->nr_effective = saved_nr_effective; - req->have_cache = saved_have_cache; - req->devs_may_alloc = saved_devs_may_alloc; + req->data_type = req->scratch_data_type; + req->ptrs = req->scratch_ptrs; + req->nr_replicas = req->scratch_nr_replicas; + req->nr_effective = req->scratch_nr_effective; + req->have_cache = req->scratch_have_cache; + req->devs_may_alloc = req->scratch_devs_may_alloc; return ret; } -- 2.51.0 From ea27e8ca5d8e117b17a3d76c39404d206c5ebdeb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 11:59:39 -0400 Subject: [PATCH 05/16] bcachefs: darray: provide typedefs for primitive types Signed-off-by: Kent Overstreet --- fs/bcachefs/darray.h | 12 +++++++++++- fs/bcachefs/fsck.c | 2 -- fs/bcachefs/journal_types.h | 2 -- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index c6151495985f..88f0ca3f0af5 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -20,7 +20,17 @@ struct { \ #define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0) typedef DARRAY(char) darray_char; -typedef DARRAY(char *) darray_str; +typedef DARRAY(char *) darray_str; + +typedef DARRAY(u8) darray_u8; +typedef DARRAY(u16) darray_u16; +typedef DARRAY(u32) darray_u32; +typedef DARRAY(u64) darray_u64; + +typedef DARRAY(s8) darray_s8; +typedef DARRAY(s16) darray_s16; +typedef DARRAY(s32) darray_s32; +typedef DARRAY(s64) darray_s64; int __bch2_darray_resize_noprof(darray_char *, size_t, size_t, gfp_t); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index aaf187085276..d927fdafd43a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2445,8 +2445,6 @@ int bch2_check_root(struct bch_fs *c) return ret; } -typedef DARRAY(u32) darray_u32; - static bool darray_u32_has(darray_u32 *d, u32 v) { darray_for_each(*d, i) diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 8e0eba776b9d..51104bbb99da 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -151,8 +151,6 @@ enum journal_flags { #undef x }; -typedef DARRAY(u64) darray_u64; - struct journal_bio { struct bch_dev *ca; unsigned buf_idx; -- 2.51.0 From b974357c63d0b26606210942dc5659d755089d4e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Apr 2025 14:40:06 -0400 Subject: [PATCH 06/16] bcachefs: bch2_snapshot_table_make_room() Add a better helper for check_snapshot_exists(). create_snapids() can't be changed to use this, unfortunately, because the transaction that creates new snapshot will also be inserting other keys (e.g. root inode) that reference that snapshot ID, and they expect the snapshot table to already be updated. Signed-off-by: Kent Overstreet --- fs/bcachefs/snapshot.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index fec569c7deb1..2eede851572c 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -281,6 +281,16 @@ fsck_err: return ret; } +static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) +{ + mutex_lock(&c->snapshot_table_lock); + int ret = snapshot_t_mut(c, id) + ? 0 + : -BCH_ERR_ENOMEM_mark_snapshot; + mutex_unlock(&c->snapshot_table_lock); + return ret; +} + static int __bch2_mark_snapshot(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_s_c new, @@ -887,9 +897,8 @@ static int check_snapshot_exists(struct btree_trans *trans, u32 id) } bch2_trans_iter_exit(trans, &iter); - return bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?: - bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, - bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0); + return bch2_snapshot_table_make_room(c, id) ?: + bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0); } /* Figure out which snapshot nodes belong in the same tree: */ -- 2.51.0 From bcaea61adc1c19094cafbf0269fe99227b2ac89c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 12:18:39 -0400 Subject: [PATCH 07/16] bcachefs: add missing include Hygeine, and fix build in userspace. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 751a9679d7e5..e5b3e987d7bb 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -25,6 +25,7 @@ #include "subvolume.h" #include "trace.h" +#include #include #include -- 2.51.0 From c9b5d9cd26bde01a0591cd8eeed8847da997f576 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 22:30:39 -0400 Subject: [PATCH 08/16] bcachefs: bch2_kvmalloc() mem alloc profiling Signed-off-by: Kent Overstreet --- fs/bcachefs/util.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 3e52c7f8ddd2..ccc1cf699c4b 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -55,15 +55,16 @@ static inline size_t buf_pages(void *p, size_t len) PAGE_SIZE); } -static inline void *bch2_kvmalloc(size_t n, gfp_t flags) +static inline void *bch2_kvmalloc_noprof(size_t n, gfp_t flags) { void *p = unlikely(n >= INT_MAX) - ? vmalloc(n) - : kvmalloc(n, flags & ~__GFP_ZERO); + ? vmalloc_noprof(n) + : kvmalloc_noprof(n, flags & ~__GFP_ZERO); if (p && (flags & __GFP_ZERO)) memset(p, 0, n); return p; } +#define bch2_kvmalloc(...) alloc_hooks(bch2_kvmalloc_noprof(__VA_ARGS__)) #define init_heap(heap, _size, gfp) \ ({ \ -- 2.51.0 From 2767f4f258b8d034a99830fafdb46a8c52910bce Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 19:23:52 -0400 Subject: [PATCH 09/16] bcachefs: btree_io_complete_wq -> btree_write_complete_wq Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/super.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 24eed2b3be4d..09df91f10c20 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -877,7 +877,7 @@ struct bch_fs { struct btree_write_buffer btree_write_buffer; struct workqueue_struct *btree_update_wq; - struct workqueue_struct *btree_io_complete_wq; + struct workqueue_struct *btree_write_complete_wq; /* copygc needs its own workqueue for index updates.. */ struct workqueue_struct *copygc_wq; /* diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 60782f3e5aec..69b207502381 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -2184,7 +2184,7 @@ static void btree_node_write_endio(struct bio *bio) smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); INIT_WORK(&wb->work, btree_node_write_work); - queue_work(c->btree_io_complete_wq, &wb->work); + queue_work(c->btree_write_complete_wq, &wb->work); } static int validate_bset_for_write(struct bch_fs *c, struct btree *b, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 84a37d971ffd..cb3195a4fdb0 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -601,8 +601,8 @@ static void __bch2_fs_free(struct bch_fs *c) destroy_workqueue(c->btree_read_complete_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); - if (c->btree_io_complete_wq) - destroy_workqueue(c->btree_io_complete_wq); + if (c->btree_write_complete_wq) + destroy_workqueue(c->btree_write_complete_wq); if (c->btree_update_wq) destroy_workqueue(c->btree_update_wq); @@ -876,7 +876,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (!(c->btree_update_wq = alloc_workqueue("bcachefs", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || - !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io", + !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || -- 2.51.0 From 25ee021c7fc22797ac34b9b9fb9b24921b647901 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 19:26:19 -0400 Subject: [PATCH 10/16] bcachefs: simplify journal pin initialization Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index bb45d3634194..e10f9b930aa6 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1429,13 +1429,11 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) */ nr += nr / 4; - if (nr + 1 > j->pin.size) { - free_fifo(&j->pin); - init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL); - if (!j->pin.data) { - bch_err(c, "error reallocating journal fifo (%llu open entries)", nr); - return -BCH_ERR_ENOMEM_journal_pin_fifo; - } + nr = max(nr, JOURNAL_PIN); + init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL); + if (!j->pin.data) { + bch_err(c, "error reallocating journal fifo (%llu open entries)", nr); + return -BCH_ERR_ENOMEM_journal_pin_fifo; } j->replay_journal_seq = last_seq; @@ -1610,9 +1608,6 @@ int bch2_fs_journal_init(struct journal *j) ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); - if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL))) - return -BCH_ERR_ENOMEM_journal_pin_fifo; - j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN; j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL); if (!j->free_buf) @@ -1621,8 +1616,6 @@ int bch2_fs_journal_init(struct journal *j) for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) j->buf[i].idx = i; - j->pin.front = j->pin.back = 1; - j->wq = alloc_workqueue("bcachefs_journal", WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512); if (!j->wq) -- 2.51.0 From 31813dcf379d7fc513530e3a9cf7b60cd2aa2a9d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 19:41:35 -0400 Subject: [PATCH 11/16] bcachefs: alphabetize init function calls Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 50 ++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cb3195a4fdb0..93ba6fef40b8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -780,17 +780,18 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) for (i = 0; i < BCH_TIME_STAT_NR; i++) bch2_time_stats_init(&c->times[i]); - bch2_fs_copygc_init(c); - bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); - bch2_fs_btree_iter_init_early(c); - bch2_fs_btree_interior_update_init_early(c); - bch2_fs_journal_keys_init(c); bch2_fs_allocator_background_init(c); bch2_fs_allocator_foreground_init(c); - bch2_fs_rebalance_init(c); - bch2_fs_quota_init(c); + bch2_fs_btree_cache_init_early(&c->btree_cache); + bch2_fs_btree_interior_update_init_early(c); + bch2_fs_btree_iter_init_early(c); + bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); + bch2_fs_copygc_init(c); bch2_fs_ec_init_early(c); + bch2_fs_journal_keys_init(c); bch2_fs_move_init(c); + bch2_fs_quota_init(c); + bch2_fs_rebalance_init(c); bch2_fs_sb_errors_init_early(c); INIT_LIST_HEAD(&c->list); @@ -817,8 +818,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) c->journal.noflush_write_time = &c->times[BCH_TIME_journal_noflush_write]; c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq]; - bch2_fs_btree_cache_init_early(&c->btree_cache); - mutex_init(&c->sectors_available_lock); ret = percpu_init_rwsem(&c->mark_lock); @@ -905,29 +904,30 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) goto err; } - ret = bch2_fs_counters_init(c) ?: - bch2_fs_sb_errors_init(c) ?: - bch2_io_clock_init(&c->io_clock[READ]) ?: - bch2_io_clock_init(&c->io_clock[WRITE]) ?: - bch2_fs_journal_init(&c->journal) ?: - bch2_fs_btree_iter_init(c) ?: + ret = bch2_fs_btree_cache_init(c) ?: - bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: - bch2_fs_btree_interior_update_init(c) ?: bch2_fs_btree_gc_init(c) ?: - bch2_fs_buckets_waiting_for_journal_init(c) ?: + bch2_fs_btree_iter_init(c) ?: + bch2_fs_btree_interior_update_init(c) ?: + bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: bch2_fs_btree_write_buffer_init(c) ?: - bch2_fs_subvolumes_init(c) ?: - bch2_fs_io_read_init(c) ?: - bch2_fs_io_write_init(c) ?: - bch2_fs_nocow_locking_init(c) ?: - bch2_fs_encryption_init(c) ?: + bch2_fs_buckets_waiting_for_journal_init(c) ?: + bch2_io_clock_init(&c->io_clock[READ]) ?: + bch2_io_clock_init(&c->io_clock[WRITE]) ?: bch2_fs_compress_init(c) ?: + bch2_fs_counters_init(c) ?: bch2_fs_ec_init(c) ?: - bch2_fs_vfs_init(c) ?: + bch2_fs_encryption_init(c) ?: bch2_fs_fsio_init(c) ?: bch2_fs_fs_io_buffered_init(c) ?: - bch2_fs_fs_io_direct_init(c); + bch2_fs_fs_io_direct_init(c) ?: + bch2_fs_io_read_init(c) ?: + bch2_fs_io_write_init(c) ?: + bch2_fs_journal_init(&c->journal) ?: + bch2_fs_nocow_locking_init(c) ?: + bch2_fs_sb_errors_init(c) ?: + bch2_fs_subvolumes_init(c) ?: + bch2_fs_vfs_init(c); if (ret) goto err; -- 2.51.0 From a17e985be9831bf866795fe5e3da219d2061ce6c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 19:30:43 -0400 Subject: [PATCH 12/16] bcachefs: Move various init code to _init_early() _init_early() is for initialization that cannot fail, and often must happen for teardown partway through initialization to work. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 7 +------ fs/bcachefs/btree_gc.h | 3 +-- fs/bcachefs/btree_write_buffer.c | 7 ++++++- fs/bcachefs/btree_write_buffer.h | 1 + fs/bcachefs/journal.c | 5 ++++- fs/bcachefs/journal.h | 1 + fs/bcachefs/nocow_locking.c | 4 +--- fs/bcachefs/nocow_locking.h | 2 +- fs/bcachefs/subvolume.c | 3 +-- fs/bcachefs/subvolume.h | 2 +- fs/bcachefs/super.c | 9 +++++---- 11 files changed, 23 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 37b69d89341f..1f02d28c175c 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1266,16 +1266,11 @@ void bch2_gc_gens_async(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_gc_gens); } -void bch2_fs_btree_gc_exit(struct bch_fs *c) -{ -} - -int bch2_fs_btree_gc_init(struct bch_fs *c) +void bch2_fs_btree_gc_init_early(struct bch_fs *c) { seqcount_init(&c->gc_pos_lock); INIT_WORK(&c->gc_gens_work, bch2_gc_gens_work); init_rwsem(&c->gc_lock); mutex_init(&c->gc_gens_lock); - return 0; } diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index 9693a90a48a2..ec77662369a2 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -83,7 +83,6 @@ void bch2_gc_pos_to_text(struct printbuf *, struct gc_pos *); int bch2_gc_gens(struct bch_fs *); void bch2_gc_gens_async(struct bch_fs *); -void bch2_fs_btree_gc_exit(struct bch_fs *); -int bch2_fs_btree_gc_init(struct bch_fs *); +void bch2_fs_btree_gc_init_early(struct bch_fs *); #endif /* _BCACHEFS_BTREE_GC_H */ diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 0941fb2c026d..68ab48af40f0 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -866,13 +866,18 @@ void bch2_fs_btree_write_buffer_exit(struct bch_fs *c) darray_exit(&wb->inc.keys); } -int bch2_fs_btree_write_buffer_init(struct bch_fs *c) +void bch2_fs_btree_write_buffer_init_early(struct bch_fs *c) { struct btree_write_buffer *wb = &c->btree_write_buffer; mutex_init(&wb->inc.lock); mutex_init(&wb->flushing.lock); INIT_WORK(&wb->flush_work, bch2_btree_write_buffer_flush_work); +} + +int bch2_fs_btree_write_buffer_init(struct bch_fs *c) +{ + struct btree_write_buffer *wb = &c->btree_write_buffer; /* Will be resized by journal as needed: */ unsigned initial_size = 1 << 16; diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index d535cea28bde..05f56fd1eed0 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -101,6 +101,7 @@ int bch2_journal_keys_to_write_buffer_end(struct bch_fs *, struct journal_keys_t int bch2_btree_write_buffer_resize(struct bch_fs *, size_t); void bch2_fs_btree_write_buffer_exit(struct bch_fs *); +void bch2_fs_btree_write_buffer_init_early(struct bch_fs *); int bch2_fs_btree_write_buffer_init(struct bch_fs *); #endif /* _BCACHEFS_BTREE_WRITE_BUFFER_H */ diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index e10f9b930aa6..7522a618b9c9 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1588,7 +1588,7 @@ void bch2_fs_journal_exit(struct journal *j) free_fifo(&j->pin); } -int bch2_fs_journal_init(struct journal *j) +void bch2_fs_journal_init_early(struct journal *j) { static struct lock_class_key res_key; @@ -1607,7 +1607,10 @@ int bch2_fs_journal_init(struct journal *j) atomic64_set(&j->reservations.counter, ((union journal_res_state) { .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v); +} +int bch2_fs_journal_init(struct journal *j) +{ j->free_buf_size = j->buf_size_want = JOURNAL_ENTRY_SIZE_MIN; j->free_buf = kvmalloc(j->free_buf_size, GFP_KERNEL); if (!j->free_buf) diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 641e20c05a14..886ffd9c0db6 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -458,6 +458,7 @@ void bch2_journal_set_replay_done(struct journal *); void bch2_dev_journal_exit(struct bch_dev *); int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *); void bch2_fs_journal_exit(struct journal *); +void bch2_fs_journal_init_early(struct journal *); int bch2_fs_journal_init(struct journal *); #endif /* _BCACHEFS_JOURNAL_H */ diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c index 3c21981a4a1c..962218fa68ec 100644 --- a/fs/bcachefs/nocow_locking.c +++ b/fs/bcachefs/nocow_locking.c @@ -133,12 +133,10 @@ void bch2_fs_nocow_locking_exit(struct bch_fs *c) BUG_ON(atomic_read(&l->l[j])); } -int bch2_fs_nocow_locking_init(struct bch_fs *c) +void bch2_fs_nocow_locking_init_early(struct bch_fs *c) { struct bucket_nocow_lock_table *t = &c->nocow_locks; for (struct nocow_lock_bucket *l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) spin_lock_init(&l->lock); - - return 0; } diff --git a/fs/bcachefs/nocow_locking.h b/fs/bcachefs/nocow_locking.h index f9d6a426a960..48b8a003c0d2 100644 --- a/fs/bcachefs/nocow_locking.h +++ b/fs/bcachefs/nocow_locking.h @@ -45,6 +45,6 @@ static inline bool bch2_bucket_nocow_trylock(struct bucket_nocow_lock_table *t, void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); void bch2_fs_nocow_locking_exit(struct bch_fs *); -int bch2_fs_nocow_locking_init(struct bch_fs *); +void bch2_fs_nocow_locking_init_early(struct bch_fs *); #endif /* _BCACHEFS_NOCOW_LOCKING_H */ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 239ea783698c..0421ffc1128f 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -714,11 +714,10 @@ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) return ret; } -int bch2_fs_subvolumes_init(struct bch_fs *c) +void bch2_fs_subvolumes_init_early(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work); INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work, bch2_subvolume_wait_for_pagecache_and_delete); mutex_init(&c->snapshots_unlinked_lock); - return 0; } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index f640c1e3d639..ee5e4e5a0fc8 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -86,6 +86,6 @@ int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, boo int bch2_initialize_subvolumes(struct bch_fs *); int bch2_fs_upgrade_for_subvolumes(struct bch_fs *); -int bch2_fs_subvolumes_init(struct bch_fs *); +void bch2_fs_subvolumes_init_early(struct bch_fs *); #endif /* _BCACHEFS_SUBVOLUME_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 93ba6fef40b8..9cff32bde7a4 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -567,7 +567,6 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[WRITE]); bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); - bch2_fs_btree_gc_exit(c); bch2_journal_keys_put_initial(c); bch2_find_btree_nodes_exit(&c->found_btree_nodes); BUG_ON(atomic_read(&c->journal_keys.ref)); @@ -783,16 +782,21 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_allocator_background_init(c); bch2_fs_allocator_foreground_init(c); bch2_fs_btree_cache_init_early(&c->btree_cache); + bch2_fs_btree_gc_init_early(c); bch2_fs_btree_interior_update_init_early(c); bch2_fs_btree_iter_init_early(c); bch2_fs_btree_key_cache_init_early(&c->btree_key_cache); + bch2_fs_btree_write_buffer_init_early(c); bch2_fs_copygc_init(c); bch2_fs_ec_init_early(c); + bch2_fs_journal_init_early(&c->journal); bch2_fs_journal_keys_init(c); bch2_fs_move_init(c); + bch2_fs_nocow_locking_init_early(c); bch2_fs_quota_init(c); bch2_fs_rebalance_init(c); bch2_fs_sb_errors_init_early(c); + bch2_fs_subvolumes_init_early(c); INIT_LIST_HEAD(&c->list); @@ -906,7 +910,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ret = bch2_fs_btree_cache_init(c) ?: - bch2_fs_btree_gc_init(c) ?: bch2_fs_btree_iter_init(c) ?: bch2_fs_btree_interior_update_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: @@ -924,9 +927,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_io_read_init(c) ?: bch2_fs_io_write_init(c) ?: bch2_fs_journal_init(&c->journal) ?: - bch2_fs_nocow_locking_init(c) ?: bch2_fs_sb_errors_init(c) ?: - bch2_fs_subvolumes_init(c) ?: bch2_fs_vfs_init(c); if (ret) goto err; -- 2.51.0 From d4d71b58e5139afc5f9bda0139b99404eb216d8a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 5 Apr 2025 17:36:04 -0400 Subject: [PATCH 13/16] bcachefs: RO mounts now use less memory Defer memory allocations only needed in RW mode until we actually go RW. This is part of improved support for RO images. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/io_read.c | 8 +++++++ fs/bcachefs/io_write.c | 8 ------- fs/bcachefs/super.c | 51 +++++++++++++++++++++++++++++------------- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 09df91f10c20..1e40ad2a7bce 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -614,6 +614,7 @@ struct bch_dev { x(accounting_replay_done) \ x(may_go_rw) \ x(rw) \ + x(rw_init_done) \ x(was_rw) \ x(stopping) \ x(emergency_ro) \ diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index e5b3e987d7bb..e490f136d63d 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -1490,10 +1490,18 @@ void bch2_fs_io_read_exit(struct bch_fs *c) rhashtable_destroy(&c->promote_table); bioset_exit(&c->bio_read_split); bioset_exit(&c->bio_read); + mempool_exit(&c->bio_bounce_pages); } int bch2_fs_io_read_init(struct bch_fs *c) { + if (mempool_init_page_pool(&c->bio_bounce_pages, + max_t(unsigned, + c->opts.btree_node_size, + c->opts.encoded_extent_max) / + PAGE_SIZE, 0)) + return -BCH_ERR_ENOMEM_bio_bounce_pages_init; + if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), BIOSET_NEED_BVECS)) return -BCH_ERR_ENOMEM_bio_read_init; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index c1237da079ed..401347e135b7 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1744,7 +1744,6 @@ void bch2_write_op_to_text(struct printbuf *out, struct bch_write_op *op) void bch2_fs_io_write_exit(struct bch_fs *c) { - mempool_exit(&c->bio_bounce_pages); bioset_exit(&c->replica_set); bioset_exit(&c->bio_write); } @@ -1755,12 +1754,5 @@ int bch2_fs_io_write_init(struct bch_fs *c) bioset_init(&c->replica_set, 4, offsetof(struct bch_write_bio, bio), 0)) return -BCH_ERR_ENOMEM_bio_write_init; - if (mempool_init_page_pool(&c->bio_bounce_pages, - max_t(unsigned, - c->opts.btree_node_size, - c->opts.encoded_extent_max) / - PAGE_SIZE, 0)) - return -BCH_ERR_ENOMEM_bio_bounce_pages_init; - return 0; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 9cff32bde7a4..834ba091e84f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -183,6 +183,7 @@ static int bch2_dev_alloc(struct bch_fs *, unsigned); static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *); static void bch2_dev_io_ref_stop(struct bch_dev *, int); static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *); +static int bch2_fs_init_rw(struct bch_fs *); struct bch_fs *bch2_dev_to_fs(dev_t dev) { @@ -439,6 +440,10 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch_info(c, "going read-write"); + ret = bch2_fs_init_rw(c); + if (ret) + goto err; + ret = bch2_sb_members_v2_init(c); if (ret) goto err; @@ -736,6 +741,35 @@ err: return ret; } +static int bch2_fs_init_rw(struct bch_fs *c) +{ + if (test_bit(BCH_FS_rw_init_done, &c->flags)) + return 0; + + if (!(c->btree_update_wq = alloc_workqueue("bcachefs", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || + !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write_complete", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || + !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || + !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit", + WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || + !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", + WQ_FREEZABLE, 0))) + return -BCH_ERR_ENOMEM_fs_other_alloc; + + int ret = bch2_fs_btree_interior_update_init(c) ?: + bch2_fs_btree_write_buffer_init(c) ?: + bch2_fs_fs_io_buffered_init(c) ?: + bch2_fs_io_write_init(c) ?: + bch2_fs_journal_init(&c->journal); + if (ret) + return ret; + + set_bit(BCH_FS_rw_init_done, &c->flags); + return 0; +} + static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) { struct bch_fs *c; @@ -877,18 +911,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) (btree_blocks(c) + 1) * 2 * sizeof(struct sort_iter_set); - if (!(c->btree_update_wq = alloc_workqueue("bcachefs", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) || - !(c->btree_write_complete_wq = alloc_workqueue("bcachefs_btree_write", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || - !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || - !(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete", + if (!(c->btree_read_complete_wq = alloc_workqueue("bcachefs_btree_read_complete", WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) || - !(c->btree_write_submit_wq = alloc_workqueue("bcachefs_btree_write_sumit", - WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || - !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref", - WQ_FREEZABLE, 0)) || #ifndef BCH_WRITE_REF_DEBUG percpu_ref_init(&c->writes, bch2_writes_disabled, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || @@ -911,9 +935,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ret = bch2_fs_btree_cache_init(c) ?: bch2_fs_btree_iter_init(c) ?: - bch2_fs_btree_interior_update_init(c) ?: bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?: - bch2_fs_btree_write_buffer_init(c) ?: bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_io_clock_init(&c->io_clock[READ]) ?: bch2_io_clock_init(&c->io_clock[WRITE]) ?: @@ -922,11 +944,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_ec_init(c) ?: bch2_fs_encryption_init(c) ?: bch2_fs_fsio_init(c) ?: - bch2_fs_fs_io_buffered_init(c) ?: bch2_fs_fs_io_direct_init(c) ?: bch2_fs_io_read_init(c) ?: - bch2_fs_io_write_init(c) ?: - bch2_fs_journal_init(&c->journal) ?: bch2_fs_sb_errors_init(c) ?: bch2_fs_vfs_init(c); if (ret) -- 2.51.0 From 3a2a0d08b225047ac1d2504059c45a5acf8072b8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 3 Apr 2025 20:56:09 -0400 Subject: [PATCH 14/16] bcachefs: move_data_phys: stats are not required Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index fe2fa665150b..a4678a205da6 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -916,8 +916,10 @@ static int bch2_move_data_phys(struct bch_fs *c, bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ctxt.stats->phys = true; - ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; + if (ctxt.stats) { + ctxt.stats->phys = true; + ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; + } int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg); bch2_moving_ctxt_exit(&ctxt); -- 2.51.0 From 0e790469bf3044022cb02025abdae775c9908ca8 Mon Sep 17 00:00:00 2001 From: Integral Date: Sun, 6 Apr 2025 22:53:28 +0800 Subject: [PATCH 15/16] bcachefs: early return for negative values when parsing BCH_OPT_UINT Currently, when passing a negative integer as argument, the error message is "too big" due to casting to an unsigned integer: > bcachefs format --block_size=-1 bcachefs.img invalid option: block_size: too big (max 65536) When negative value in argument detected, return early before calling bch2_opt_validate(). A new error code `BCH_ERR_option_negative` is added. Signed-off-by: Integral Signed-off-by: Kent Overstreet --- fs/bcachefs/errcode.h | 1 + fs/bcachefs/opts.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index d9ebffa5b3a2..768b176f6ea8 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -211,6 +211,7 @@ x(EINVAL, inode_unpack_error) \ x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ + x(EINVAL, option_negative) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ x(EROFS, erofs_trans_commit) \ x(EROFS, erofs_no_writes) \ diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index af3258814822..f40de111e527 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -360,9 +360,15 @@ int bch2_opt_parse(struct bch_fs *c, return -EINVAL; } - ret = opt->flags & OPT_HUMAN_READABLE - ? bch2_strtou64_h(val, res) - : kstrtou64(val, 10, res); + if (*val != '-') { + ret = opt->flags & OPT_HUMAN_READABLE + ? bch2_strtou64_h(val, res) + : kstrtou64(val, 10, res); + } else { + prt_printf(err, "%s: must be a non-negative number", opt->attr.name); + return -BCH_ERR_option_negative; + } + if (ret < 0) { if (err) prt_printf(err, "%s: must be a number", -- 2.51.0 From 84ccd47d265579dd23768e69b5204801ad6b5eca Mon Sep 17 00:00:00 2001 From: Integral Date: Sun, 6 Apr 2025 23:26:59 +0800 Subject: [PATCH 16/16] bcachefs: split error messages of invalid compression into two lines When an invalid compression type or level is passed as an argument to `--compression`, two error messages are squashed into one line: > bcachefs format --compression=lzo bcachefs-comp.img invalid option: invalid compression typecompression: parse error > bcachefs format --compression=lz4:16 bcachefs-comp.img invalid option: invalid compression levelcompression: parse error To resolve this issue, add a newline character at the end of the first error message to separate them into two lines. Signed-off-by: Integral Signed-off-by: Kent Overstreet --- fs/bcachefs/compress.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 28ed32449913..d68c3c7896a3 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -714,7 +714,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, ret = match_string(bch2_compression_opts, -1, type_str); if (ret < 0 && err) - prt_str(err, "invalid compression type"); + prt_str(err, "invalid compression type\n"); if (ret < 0) goto err; @@ -729,7 +729,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, if (!ret && level > 15) ret = -EINVAL; if (ret < 0 && err) - prt_str(err, "invalid compression level"); + prt_str(err, "invalid compression level\n"); if (ret < 0) goto err; -- 2.51.0