From 84bd6afee121b9e9bcc26f88cb55e0ee5c7a8f56 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 16:24:43 -0400 Subject: [PATCH 01/16] bcachefs: inline bch2_ob_ptr() This was an oversight, we want bch2_alloc_sectors_append_ptrs_inlined() fully inlined. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 14 -------------- fs/bcachefs/alloc_foreground.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 2d7f32f9499e..b50846da7ae4 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1388,20 +1388,6 @@ err: return ret; } -struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) -{ - struct bch_dev *ca = ob_dev(c, ob); - - return (struct bch_extent_ptr) { - .type = 1 << BCH_EXTENT_ENTRY_ptr, - .gen = ob->gen, - .dev = ob->dev, - .offset = bucket_to_sector(ca, ob->bucket) + - ca->mi.bucket_size - - ob->sectors_free, - }; -} - void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bkey_i *k, unsigned sectors, bool cached) diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 192203410d4e..2e01c7b61ed1 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -3,6 +3,7 @@ #define _BCACHEFS_ALLOC_FOREGROUND_H #include "bcachefs.h" +#include "buckets.h" #include "alloc_types.h" #include "extents.h" #include "io_write_types.h" @@ -233,7 +234,19 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *, struct closure *, struct write_point **); -struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *); +static inline struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) +{ + struct bch_dev *ca = ob_dev(c, ob); + + return (struct bch_extent_ptr) { + .type = 1 << BCH_EXTENT_ENTRY_ptr, + .gen = ob->gen, + .dev = ob->dev, + .offset = bucket_to_sector(ca, ob->bucket) + + ca->mi.bucket_size - + ob->sectors_free, + }; +} /* * Append pointers to the space we just allocated to @k, and mark @sectors space -- 2.51.0 From fbe728f9569b683564421a9190be53e60111a864 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 21:15:34 -0400 Subject: [PATCH 02/16] bcachefs: improve check_inode_hash_info_matches_root() error message Signed-off-by: Kent Overstreet --- fs/bcachefs/str_hash.c | 31 +++++++++++++++++++------------ fs/bcachefs/str_hash.h | 8 +++++--- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index a90bf7b8a2b4..55a3a116b5a8 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -157,6 +157,8 @@ static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans if (bkey_is_inode(k.k)) goto found; } + + /* This would've been caught by check_key_has_inode() */ bch_err(c, "%s(): inum %llu not found", __func__, inum); ret = -BCH_ERR_fsck_repair_unimplemented; goto err; @@ -166,20 +168,25 @@ found:; if (ret) goto err; - struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); - if (hash_info->type != hash2.type || - memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) { + struct bch_hash_info hash_root = bch2_hash_info_init(c, &inode); + if (hash_info->type != hash_root.type || + memcmp(&hash_info->siphash_key, + &hash_root.siphash_key, + sizeof(hash_root.siphash_key))) { ret = repair_inode_hash_info(trans, &inode); if (!ret) { - bch_err(c, "inode hash info mismatch with root, but mismatch not found\n" - "%u %llx %llx\n" - "%u %llx %llx", - hash_info->type, - hash_info->siphash_key.k0, - hash_info->siphash_key.k1, - hash2.type, - hash2.siphash_key.k0, - hash2.siphash_key.k1); + struct printbuf buf = PRINTBUF; + prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", inum); + + prt_printf(&buf, "root snapshot %u ", hash_root.inum_snapshot); + bch2_prt_str_hash_type(&buf, hash_root.type); + prt_printf(&buf, " %llx %llx\n", hash_root.siphash_key.k0, hash_root.siphash_key.k1); + + prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot); + bch2_prt_str_hash_type(&buf, hash_info->type); + prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); ret = -BCH_ERR_fsck_repair_unimplemented; } } diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 0c1a00539bd1..ae3154fb6a94 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -32,6 +32,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) } struct bch_hash_info { + u32 inum_snapshot; u8 type; struct unicode_map *cf_encoding; /* @@ -45,11 +46,12 @@ static inline struct bch_hash_info bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) { struct bch_hash_info info = { - .type = INODE_STR_HASH(bi), + .inum_snapshot = bi->bi_snapshot, + .type = INODE_STR_HASH(bi), #ifdef CONFIG_UNICODE - .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, + .cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL, #endif - .siphash_key = { .k0 = bi->bi_hash_seed } + .siphash_key = { .k0 = bi->bi_hash_seed } }; if (unlikely(info.type == BCH_STR_HASH_siphash_old)) { -- 2.51.0 From 39430cfd27ed2e1243374ea28479773506e119c3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 4 May 2025 15:02:53 -0400 Subject: [PATCH 03/16] bcachefs: Improve bch2_extent_ptr_set_cached() Preferentially keep existing cached pointers instead of adding new ones. Signed-off-by: Kent Overstreet --- fs/bcachefs/extents.c | 55 ++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index c4fe4ffd41f1..d3af841e48ef 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1136,33 +1136,50 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c, struct bkey_s k, struct bch_extent_ptr *ptr) { - struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); + struct bkey_ptrs ptrs; union bch_extent_entry *entry; struct extent_ptr_decoded p; + bool have_cached_ptr; + unsigned drop_dev = ptr->dev; rcu_read_lock(); - if (!want_cached_ptr(c, opts, ptr)) { - bch2_bkey_drop_ptr_noerror(k, ptr); - goto out; - } +restart_drop_ptrs: + ptrs = bch2_bkey_ptrs(k); + have_cached_ptr = false; - /* - * Stripes can't contain cached data, for - reasons. - * - * Possibly something we can fix in the future? - */ - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (&entry->ptr == ptr) { - if (p.has_ec) - bch2_bkey_drop_ptr_noerror(k, ptr); - else - ptr->cached = true; - goto out; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + /* + * Check if it's erasure coded - stripes can't contain cached + * data. Possibly something we can fix in the future? + */ + if (&entry->ptr == ptr && p.has_ec) + goto drop; + + if (p.ptr.cached) { + if (have_cached_ptr || !want_cached_ptr(c, opts, &p.ptr)) { + bch2_bkey_drop_ptr_noerror(k, &entry->ptr); + ptr = NULL; + goto restart_drop_ptrs; + } + + have_cached_ptr = true; } + } + + if (!ptr) + bkey_for_each_ptr(ptrs, ptr2) + if (ptr2->dev == drop_dev) + ptr = ptr2; - BUG(); -out: + if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) + goto drop; + + ptr->cached = true; + rcu_read_unlock(); + return; +drop: rcu_read_unlock(); + bch2_bkey_drop_ptr_noerror(k, ptr); } /* -- 2.51.0 From 502222041c810b5d5ba5d45512e0a131c7f07d0a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 21:19:17 -0400 Subject: [PATCH 04/16] bcachefs: __bch2_fs_free() cleanup Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 45e2b2bc8c65..7c6ea43b4347 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -579,35 +579,36 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); bch2_free_fsck_errs(c); - bch2_fs_accounting_exit(c); - bch2_fs_async_obj_exit(c); - bch2_fs_sb_errors_exit(c); - bch2_fs_counters_exit(c); + bch2_fs_vfs_exit(c); bch2_fs_snapshots_exit(c); + bch2_fs_sb_errors_exit(c); + bch2_fs_replicas_exit(c); bch2_fs_quota_exit(c); + bch2_fs_nocow_locking_exit(c); + bch2_fs_journal_exit(&c->journal); bch2_fs_fs_io_direct_exit(c); bch2_fs_fs_io_buffered_exit(c); bch2_fs_fsio_exit(c); - bch2_fs_vfs_exit(c); - bch2_fs_ec_exit(c); - bch2_fs_encryption_exit(c); - bch2_fs_nocow_locking_exit(c); bch2_fs_io_write_exit(c); bch2_fs_io_read_exit(c); + bch2_fs_encryption_exit(c); + bch2_fs_ec_exit(c); + bch2_fs_counters_exit(c); + bch2_fs_compress_exit(c); + bch2_io_clock_exit(&c->io_clock[WRITE]); + bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_buckets_waiting_for_journal_exit(c); - bch2_fs_btree_interior_update_exit(c); + bch2_fs_btree_write_buffer_exit(c); bch2_fs_btree_key_cache_exit(&c->btree_key_cache); - bch2_fs_btree_cache_exit(c); bch2_fs_btree_iter_exit(c); - bch2_fs_replicas_exit(c); - bch2_fs_journal_exit(&c->journal); - bch2_io_clock_exit(&c->io_clock[WRITE]); - bch2_io_clock_exit(&c->io_clock[READ]); - bch2_fs_compress_exit(c); + bch2_fs_btree_interior_update_exit(c); + bch2_fs_btree_cache_exit(c); + bch2_fs_accounting_exit(c); + bch2_fs_async_obj_exit(c); bch2_journal_keys_put_initial(c); bch2_find_btree_nodes_exit(&c->found_btree_nodes); + BUG_ON(atomic_read(&c->journal_keys.ref)); - bch2_fs_btree_write_buffer_exit(c); percpu_free_rwsem(&c->mark_lock); if (c->online_reserved) { u64 v = percpu_u64_get(c->online_reserved); -- 2.51.0 From 96fc7d8adb7881f7ffffcd6ab2be3b43fc5a5978 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 5 May 2025 20:35:36 -0400 Subject: [PATCH 05/16] bcachefs: opts.rebalance_on_ac_only Add an option for setting rebalance to only run when connected to mains power. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 1 + fs/bcachefs/opts.h | 5 ++++ fs/bcachefs/rebalance.c | 48 ++++++++++++++++++++++++++++++++--- fs/bcachefs/rebalance.h | 4 ++- fs/bcachefs/rebalance_types.h | 5 ++++ fs/bcachefs/super.c | 3 ++- 6 files changed, 60 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 0beff6af7ecf..061fa2666f35 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -870,6 +870,7 @@ LE64_BITMASK(BCH_SB_WRITE_ERROR_TIMEOUT,struct bch_sb, flags[6], 4, 14); LE64_BITMASK(BCH_SB_CSUM_ERR_RETRY_NR, struct bch_sb, flags[6], 14, 20); LE64_BITMASK(BCH_SB_DEGRADED_ACTION, struct bch_sb, flags[6], 20, 22); LE64_BITMASK(BCH_SB_CASEFOLD, struct bch_sb, flags[6], 22, 23); +LE64_BITMASK(BCH_SB_REBALANCE_AC_ONLY, struct bch_sb, flags[6], 23, 24); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index b8cd0b04e62a..f4c014ad43c1 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -490,6 +490,11 @@ enum fsck_err_opts { BCH2_NO_SB_OPT, true, \ NULL, "Enable rebalance: disable for debugging, or to\n"\ "quiet the system when doing performance testing\n")\ + x(rebalance_on_ac_only, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH_SB_REBALANCE_AC_ONLY, false, \ + NULL, "Enable rebalance while on mains power only\n") \ x(no_data_io, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 7bcebcac2e1a..8fefe2b174c2 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -518,6 +518,13 @@ static void rebalance_wait(struct bch_fs *c) bch2_kthread_io_clock_wait(clock, r->wait_iotime_end, MAX_SCHEDULE_TIMEOUT); } +static bool bch2_rebalance_enabled(struct bch_fs *c) +{ + return c->opts.rebalance_enabled && + !(c->opts.rebalance_on_ac_only && + c->rebalance.on_battery); +} + static int do_rebalance(struct moving_context *ctxt) { struct btree_trans *trans = ctxt->trans; @@ -537,9 +544,9 @@ static int do_rebalance(struct moving_context *ctxt) BTREE_ITER_all_snapshots); while (!bch2_move_ratelimit(ctxt)) { - if (!c->opts.rebalance_enabled) { + if (!bch2_rebalance_enabled(c)) { bch2_moving_ctxt_flush_all(ctxt); - kthread_wait_freezable(c->opts.rebalance_enabled || + kthread_wait_freezable(bch2_rebalance_enabled(c) || kthread_should_stop()); } @@ -714,9 +721,42 @@ int bch2_rebalance_start(struct bch_fs *c) return 0; } -void bch2_fs_rebalance_init(struct bch_fs *c) +#ifdef CONFIG_POWER_SUPPLY +#include + +static int bch2_rebalance_power_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct bch_fs *c = container_of(nb, struct bch_fs, rebalance.power_notifier); + + c->rebalance.on_battery = !power_supply_is_system_supplied(); + bch2_rebalance_wakeup(c); + return NOTIFY_OK; +} +#endif + +void bch2_fs_rebalance_exit(struct bch_fs *c) { - bch2_pd_controller_init(&c->rebalance.pd); +#ifdef CONFIG_POWER_SUPPLY + power_supply_unreg_notifier(&c->rebalance.power_notifier); +#endif +} + +int bch2_fs_rebalance_init(struct bch_fs *c) +{ + struct bch_fs_rebalance *r = &c->rebalance; + + bch2_pd_controller_init(&r->pd); + +#ifdef CONFIG_POWER_SUPPLY + r->power_notifier.notifier_call = bch2_rebalance_power_notifier; + int ret = power_supply_reg_notifier(&r->power_notifier); + if (ret) + return ret; + + r->on_battery = !power_supply_is_system_supplied(); +#endif + return 0; } static int check_rebalance_work_one(struct btree_trans *trans, diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index b7c8c0652ad6..5d9214fe1a22 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -52,7 +52,9 @@ void bch2_rebalance_status_to_text(struct printbuf *, struct bch_fs *); void bch2_rebalance_stop(struct bch_fs *); int bch2_rebalance_start(struct bch_fs *); -void bch2_fs_rebalance_init(struct bch_fs *); + +void bch2_fs_rebalance_exit(struct bch_fs *); +int bch2_fs_rebalance_init(struct bch_fs *); int bch2_check_rebalance_work(struct bch_fs *); diff --git a/fs/bcachefs/rebalance_types.h b/fs/bcachefs/rebalance_types.h index fe5098c17dfc..33d77286f1d5 100644 --- a/fs/bcachefs/rebalance_types.h +++ b/fs/bcachefs/rebalance_types.h @@ -30,6 +30,11 @@ struct bch_fs_rebalance { struct bbpos scan_start; struct bbpos scan_end; struct bch_move_stats scan_stats; + + bool on_battery; +#ifdef CONFIG_POWER_SUPPLY + struct notifier_block power_notifier; +#endif }; #endif /* _BCACHEFS_REBALANCE_TYPES_H */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7c6ea43b4347..bd0565b7a9ba 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -583,6 +583,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_fs_snapshots_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_replicas_exit(c); + bch2_fs_rebalance_exit(c); bch2_fs_quota_exit(c); bch2_fs_nocow_locking_exit(c); bch2_fs_journal_exit(&c->journal); @@ -867,7 +868,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_fs_move_init(c); bch2_fs_nocow_locking_init_early(c); bch2_fs_quota_init(c); - bch2_fs_rebalance_init(c); bch2_fs_sb_errors_init_early(c); bch2_fs_snapshots_init_early(c); bch2_fs_subvolumes_init_early(c); @@ -989,6 +989,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch2_fs_fsio_init(c) ?: bch2_fs_fs_io_direct_init(c) ?: bch2_fs_io_read_init(c) ?: + bch2_fs_rebalance_init(c) ?: bch2_fs_sb_errors_init(c) ?: bch2_fs_vfs_init(c); if (ret) -- 2.51.0 From 66e9a7f13916fb0630cc48e0c21c474607aa3967 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 May 2025 00:51:39 -0400 Subject: [PATCH 06/16] bcachefs: bch2_dev_remove_stripes() respects degraded flags Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +- fs/bcachefs/ec.c | 77 ++++++++++++++++++++++++---------- fs/bcachefs/ec.h | 4 +- fs/bcachefs/super.c | 3 +- 4 files changed, 62 insertions(+), 25 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 002e3853f8cf..81e2ae4bb400 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2442,8 +2442,7 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) * We clear the LRU and need_discard btrees first so that we don't race * with bch2_do_invalidates() and bch2_do_discards() */ - ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?: - bch2_btree_delete_range(c, BTREE_ID_lru, start, end, + ret = bch2_btree_delete_range(c, BTREE_ID_lru, start, end, BTREE_TRIGGER_norun, NULL) ?: bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, BTREE_TRIGGER_norun, NULL) ?: diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index dcd4e2266d34..bf5f4f6283a4 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2106,23 +2106,17 @@ err: /* device removal */ -static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a) +int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + unsigned dev_idx, + unsigned flags) { - struct bch_alloc_v4 a_convert; - const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert); - - if (!a->stripe) + if (k.k->type != KEY_TYPE_stripe) return 0; - if (a->stripe_sectors) { - bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data"); - return -BCH_ERR_invalidate_stripe_to_dev; - } - - struct btree_iter iter; struct bkey_i_stripe *s = - bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe), - BTREE_ITER_slots, stripe); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe); int ret = PTR_ERR_OR_ZERO(s); if (ret) return ret; @@ -2139,35 +2133,76 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_ acc.replicas.data_type = BCH_DATA_user; ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); if (ret) - goto err; + return ret; struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i)); - bkey_for_each_ptr(ptrs, ptr) - if (ptr->dev == k_a.k->p.inode) + + /* XXX: how much redundancy do we still have? check degraded flags */ + + unsigned nr_good = 0; + + rcu_read_lock(); + bkey_for_each_ptr(ptrs, ptr) { + if (ptr->dev == dev_idx) ptr->dev = BCH_SB_MEMBER_INVALID; + struct bch_dev *ca = bch2_dev_rcu(trans->c, ptr->dev); + nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed; + } + rcu_read_unlock(); + + if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED)) + return -BCH_ERR_remove_would_lose_data; + + unsigned nr_data = s->v.nr_blocks - s->v.nr_redundant; + + if (nr_good < nr_data && !(flags & BCH_FORCE_IF_DATA_LOST)) + return -BCH_ERR_remove_would_lose_data; + sectors = -sectors; memset(&acc, 0, sizeof(acc)); acc.type = BCH_DISK_ACCOUNTING_replicas; bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); acc.replicas.data_type = BCH_DATA_user; - ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); + return bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); +} + +static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, struct bkey_s_c k_a, + unsigned flags) +{ + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert); + + if (!a->stripe) + return 0; + + if (a->stripe_sectors) { + bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data"); + return -BCH_ERR_invalidate_stripe_to_dev; + } + + struct btree_iter iter; + struct bkey_s_c_stripe s = + bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe), + BTREE_ITER_slots, stripe); + int ret = bkey_err(s); if (ret) - goto err; -err: + return ret; + + ret = bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags); bch2_trans_iter_exit(trans, &iter); return ret; } -int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) +int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { return bch2_trans_run(c, for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ - bch2_invalidate_stripe_to_dev(trans, k); + bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); }))); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 83d37bcb548a..548048adf0d5 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -288,7 +288,9 @@ static inline void ec_stripe_new_put(struct bch_fs *c, struct ec_stripe_new *s, } } -int bch2_dev_remove_stripes(struct bch_fs *, unsigned); +int bch2_invalidate_stripe_to_dev(struct btree_trans *, struct btree_iter *, + struct bkey_s_c, unsigned, unsigned); +int bch2_dev_remove_stripes(struct bch_fs *, unsigned, unsigned); void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *); void bch2_fs_ec_stop(struct bch_fs *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index bd0565b7a9ba..faa012107a97 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1744,7 +1744,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) __bch2_dev_read_only(c, ca); - ret = bch2_dev_data_drop(c, ca->dev_idx, flags); + ret = bch2_dev_data_drop(c, ca->dev_idx, flags) ?: + bch2_dev_remove_stripes(c, ca->dev_idx, flags); bch_err_msg(ca, ret, "bch2_dev_data_drop()"); if (ret) goto err; -- 2.51.0 From b3f80d09236e4915d7deedfaf15d7bdaef6f14d2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 3 May 2025 21:55:26 -0400 Subject: [PATCH 07/16] bcachefs: BCH_SB_MEMBER_DELETED_UUID Add a sentinal value for devices that have been removed, but don't want to reuse their index until a fsck has completed. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 4 ++++ fs/bcachefs/sb-members.c | 29 ++++++++++++++++++++++++++++- fs/bcachefs/sb-members.h | 4 +++- fs/bcachefs/sb-members_format.h | 4 ++++ 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 92ae31737a24..dd08ec080313 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1079,6 +1079,10 @@ out: * allocator thread - issue wakeup in case they blocked on gc_lock: */ closure_wake_up(&c->freelist_wait); + + if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags)) + bch2_sb_members_clean_deleted(c); + bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index f6a0b3de6bca..b9568a68fbf6 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -525,6 +525,7 @@ int bch2_sb_member_alloc(struct bch_fs *c) unsigned u64s; int best = -1; u64 best_last_mount = 0; + unsigned nr_deleted = 0; if (dev_idx < BCH_SB_MEMBERS_MAX) goto have_slot; @@ -535,7 +536,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) continue; struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx); - if (bch2_member_alive(&m)) + + nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID); + + if (!bch2_is_zero(&m.uuid, sizeof(m.uuid))) continue; u64 last_mount = le64_to_cpu(m.last_mount); @@ -549,6 +553,10 @@ int bch2_sb_member_alloc(struct bch_fs *c) goto have_slot; } + if (nr_deleted) + bch_err(c, "unable to allocate new member, but have %u deleted: run fsck", + nr_deleted); + return -BCH_ERR_ENOSPC_sb_members; have_slot: nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); @@ -564,3 +572,22 @@ have_slot: c->disk_sb.sb->nr_devices = nr_devices; return dev_idx; } + +void bch2_sb_members_clean_deleted(struct bch_fs *c) +{ + mutex_lock(&c->sb_lock); + bool write_sb = false; + + for (unsigned i = 0; i < c->sb.nr_devices; i++) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i); + + if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) { + memset(&m->uuid, 0, sizeof(m->uuid)); + write_sb = true; + } + } + + if (write_sb) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); +} diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index c9cb8f7657b0..6bd9b86aee5b 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -320,7 +320,8 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2; static inline bool bch2_member_alive(struct bch_member *m) { - return !bch2_is_zero(&m->uuid, sizeof(m->uuid)); + return !bch2_is_zero(&m->uuid, sizeof(m->uuid)) && + !uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID); } static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev) @@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c); void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c); int bch2_sb_member_alloc(struct bch_fs *); +void bch2_sb_members_clean_deleted(struct bch_fs *); #endif /* _BCACHEFS_SB_MEMBERS_H */ diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index 472218a59102..fb72ad730518 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -13,6 +13,10 @@ */ #define BCH_SB_MEMBER_INVALID 255 +#define BCH_SB_MEMBER_DELETED_UUID \ + UUID_INIT(0xffffffff, 0xffff, 0xffff, \ + 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef) + #define BCH_MIN_NR_NBUCKETS (1 << 6) #define BCH_IOPS_MEASUREMENTS() \ -- 2.51.0 From 09fa6c3039d8bb22351ad071ea4656dd4f331d18 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 3 May 2025 21:36:23 -0400 Subject: [PATCH 08/16] bcachefs: bch2_dev_data_drop_by_backpointers() Currently, device removal has to scan all metadata for pointers to the device being removed. Add a new method, with the same interface as bch2_dev_data_drop(), that scans by backpointers instead - this will drastically speed up device removal. Signed-off-by: Kent Overstreet --- fs/bcachefs/migrate.c | 117 +++++++++++++++++++++++++++++++++++++----- fs/bcachefs/migrate.h | 3 +- 2 files changed, 105 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 90dcf80bd64a..bb7a92270c09 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -4,10 +4,13 @@ */ #include "bcachefs.h" +#include "backpointers.h" #include "bkey_buf.h" #include "btree_update.h" #include "btree_update_interior.h" +#include "btree_write_buffer.h" #include "buckets.h" +#include "ec.h" #include "errcode.h" #include "extents.h" #include "io_write.h" @@ -20,7 +23,7 @@ #include "super-io.h" static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, - unsigned dev_idx, int flags, bool metadata) + unsigned dev_idx, unsigned flags, bool metadata) { unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST; @@ -37,11 +40,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, return 0; } +static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter, + struct btree *b, unsigned dev_idx, unsigned flags) +{ + struct bch_fs *c = trans->c; + struct bkey_buf k; + + bch2_bkey_buf_init(&k); + bch2_bkey_buf_copy(&k, c, &b->key); + + int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?: + bch2_btree_node_update_key(trans, iter, b, k.k, 0, false); + + bch_err_fn(c, ret); + bch2_bkey_buf_exit(&k, c); + return ret; +} + static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, unsigned dev_idx, - int flags) + unsigned flags) { struct bch_fs *c = trans->c; struct bkey_i *n; @@ -77,9 +97,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans, return 0; } +static int bch2_dev_btree_drop_key(struct btree_trans *trans, + struct bkey_s_c_backpointer bp, + unsigned dev_idx, + struct bkey_buf *last_flushed, + unsigned flags) +{ + struct btree_iter iter; + struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed); + int ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret; + + ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags); + + bch2_trans_iter_exit(trans, &iter); + return ret; +} + static int bch2_dev_usrdata_drop(struct bch_fs *c, struct progress_indicator_state *progress, - unsigned dev_idx, int flags) + unsigned dev_idx, unsigned flags) { struct btree_trans *trans = bch2_trans_get(c); enum btree_id id; @@ -106,7 +144,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, static int bch2_dev_metadata_drop(struct bch_fs *c, struct progress_indicator_state *progress, - unsigned dev_idx, int flags) + unsigned dev_idx, unsigned flags) { struct btree_trans *trans; struct btree_iter iter; @@ -137,20 +175,12 @@ retry: if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx)) goto next; - bch2_bkey_buf_copy(&k, c, &b->key); - - ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), - dev_idx, flags, true); - if (ret) - break; - - ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false); + ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { ret = 0; continue; } - bch_err_msg(c, ret, "updating btree node key"); if (ret) break; next: @@ -176,7 +206,66 @@ err: return ret; } -int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags) +static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx, + struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed, + unsigned flags) +{ + struct btree_iter iter; + struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, + last_flushed); + int ret = bkey_err(k); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + return 0; + if (ret) + return ret; + + if (!k.k || !bch2_bkey_has_device_c(k, dev_idx)) + goto out; + + /* + * XXX: pass flags arg to invalidate_stripe_to_dev and handle it + * properly + */ + + if (bkey_is_btree_ptr(k.k)) + ret = bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags); + else if (k.k->type == KEY_TYPE_stripe) + ret = bch2_invalidate_stripe_to_dev(trans, &iter, k, dev_idx, flags); + else + ret = bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); +out: + bch2_trans_iter_exit(trans, &iter); + return ret; +} + +int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags) +{ + struct btree_trans *trans = bch2_trans_get(c); + + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + int ret = bch2_btree_write_buffer_flush_sync(trans) ?: + for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers, + POS(dev_idx, 0), + POS(dev_idx, U64_MAX), 0, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ + if (k.k->type != KEY_TYPE_backpointer) + continue; + + data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k), + &last_flushed, flags); + + })); + + bch2_bkey_buf_exit(&last_flushed, trans->c); + bch2_trans_put(trans); + bch_err_fn(c, ret); + return ret; +} + +int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags) { struct progress_indicator_state progress; bch2_progress_init(&progress, c, diff --git a/fs/bcachefs/migrate.h b/fs/bcachefs/migrate.h index 027efaa0d575..30018140711b 100644 --- a/fs/bcachefs/migrate.h +++ b/fs/bcachefs/migrate.h @@ -2,6 +2,7 @@ #ifndef _BCACHEFS_MIGRATE_H #define _BCACHEFS_MIGRATE_H -int bch2_dev_data_drop(struct bch_fs *, unsigned, int); +int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned); +int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned); #endif /* _BCACHEFS_MIGRATE_H */ -- 2.51.0 From a8539ad8fa88e39c3f566b7c35029f25ab90b72e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 3 May 2025 21:26:04 -0400 Subject: [PATCH 09/16] bcachefs: bcachefs_metadata_version_fast_device_removal Fast device removal, that uses backpointers to find pointers to the device being removed instead of a full metadata scan. This requires BCH_SB_MEMBER_DELETED_UUID, which is an incompatible change - hence the version number bump. We don't fully trust backpointers, so we don't want to reuse device indexes until after a fsck has verified that there aren't any pointers to removed devices. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_format.h | 3 ++- fs/bcachefs/ec.c | 4 +++- fs/bcachefs/super.c | 27 +++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 061fa2666f35..a483d440fa39 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -696,7 +696,8 @@ struct bch_sb_field_ext { x(stripe_lru, BCH_VERSION(1, 23)) \ x(casefolding, BCH_VERSION(1, 24)) \ x(extent_flags, BCH_VERSION(1, 25)) \ - x(snapshot_deletion_v2, BCH_VERSION(1, 26)) + x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \ + x(fast_device_removal, BCH_VERSION(1, 27)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index bf5f4f6283a4..c581426e3894 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -2197,13 +2197,15 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - return bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); }))); + bch_err_fn(c, ret); + return ret; } /* startup/shutdown */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index faa012107a97..dc8189f9d2f1 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1726,6 +1726,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { struct bch_member *m; unsigned dev_idx = ca->dev_idx, data; + bool fast_device_removal = !bch2_request_incompat_feature(c, + bcachefs_metadata_version_fast_device_removal); int ret; down_write(&c->state_lock); @@ -1744,12 +1746,25 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) __bch2_dev_read_only(c, ca); - ret = bch2_dev_data_drop(c, ca->dev_idx, flags) ?: - bch2_dev_remove_stripes(c, ca->dev_idx, flags); - bch_err_msg(ca, ret, "bch2_dev_data_drop()"); + ret = fast_device_removal + ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags) + : (bch2_dev_data_drop(c, ca->dev_idx, flags) ?: + bch2_dev_remove_stripes(c, ca->dev_idx, flags)); if (ret) goto err; + /* Check if device still has data before blowing away alloc info */ + struct bch_dev_usage usage = bch2_dev_usage_read(ca); + for (unsigned i = 0; i < BCH_DATA_NR; i++) + if (!data_type_is_empty(i) && + !data_type_is_hidden(i) && + usage.buckets[i]) { + bch_err(ca, "Remove failed: still has data (%s, %llu buckets)", + __bch2_data_types[i], usage.buckets[i]); + ret = -EBUSY; + goto err; + } + ret = bch2_dev_remove_alloc(c, ca); bch_err_msg(ca, ret, "bch2_dev_remove_alloc()"); if (ret) @@ -1813,7 +1828,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) */ mutex_lock(&c->sb_lock); m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - memset(&m->uuid, 0, sizeof(m->uuid)); + + if (fast_device_removal) + m->uuid = BCH_SB_MEMBER_DELETED_UUID; + else + memset(&m->uuid, 0, sizeof(m->uuid)); bch2_write_super(c); -- 2.51.0 From 8c69e2b52ea81b102ace48debd114467199ca77a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 14:26:18 -0400 Subject: [PATCH 10/16] bcachefs: Knob for manual snapshot deletion Add 'opts.snapshot_deletion_enabled', enabled by default. This may be turned off so that the new sysfs knob, 'internal/trigger_delete_dead_snapshots', may be used instead - this will allow snapshot deletion to be profiled more easily. Signed-off-by: Kent Overstreet --- fs/bcachefs/opts.h | 6 ++++++ fs/bcachefs/snapshot.c | 26 ++++++++++++++++++++++---- fs/bcachefs/snapshot.h | 1 + fs/bcachefs/snapshot_types.h | 1 + fs/bcachefs/sysfs.c | 5 +++++ 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index f4c014ad43c1..2a02606254b3 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -495,6 +495,12 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_REBALANCE_AC_ONLY, false, \ NULL, "Enable rebalance while on mains power only\n") \ + x(auto_snapshot_deletion, u8, \ + OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, true, \ + NULL, "Enable automatic snapshot deletion: disable for debugging, or to\n"\ + "quiet the system when doing performance testing\n")\ x(no_data_io, u8, \ OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 9ec3275c7b0a..c3dc450cbcec 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -1776,14 +1776,18 @@ static void bch2_snapshot_delete_nodes_to_text(struct printbuf *out, struct snap prt_newline(out); } -int bch2_delete_dead_snapshots(struct bch_fs *c) +int __bch2_delete_dead_snapshots(struct bch_fs *c) { - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) + struct snapshot_delete *d = &c->snapshot_delete; + int ret = 0; + + if (!mutex_trylock(&d->lock)) return 0; + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) + goto out_unlock; + struct btree_trans *trans = bch2_trans_get(c); - struct snapshot_delete *d = &c->snapshot_delete; - int ret = 0; /* * For every snapshot node: If we have no live children and it's not @@ -1857,11 +1861,21 @@ err: d->running = false; mutex_unlock(&d->progress_lock); bch2_trans_put(trans); +out_unlock: + mutex_unlock(&d->lock); if (!bch2_err_matches(ret, EROFS)) bch_err_fn(c, ret); return ret; } +int bch2_delete_dead_snapshots(struct bch_fs *c) +{ + if (!c->opts.auto_snapshot_deletion) + return 0; + + return __bch2_delete_dead_snapshots(c); +} + void bch2_delete_dead_snapshots_work(struct work_struct *work) { struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete.work); @@ -1874,6 +1888,9 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work) void bch2_delete_dead_snapshots_async(struct bch_fs *c) { + if (!c->opts.auto_snapshot_deletion) + return; + if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_delete_dead_snapshots)) return; @@ -1977,6 +1994,7 @@ void bch2_fs_snapshots_exit(struct bch_fs *c) void bch2_fs_snapshots_init_early(struct bch_fs *c) { INIT_WORK(&c->snapshot_delete.work, bch2_delete_dead_snapshots_work); + mutex_init(&c->snapshot_delete.lock); mutex_init(&c->snapshot_delete.progress_lock); mutex_init(&c->snapshots_unlinked_lock); } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 69c484b77729..63b9469eb1eb 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -273,6 +273,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans, return __bch2_key_has_snapshot_overwrites(trans, id, pos); } +int __bch2_delete_dead_snapshots(struct bch_fs *); int bch2_delete_dead_snapshots(struct bch_fs *); void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_async(struct bch_fs *); diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h index 1aa7a58442ae..0ab698f13e5c 100644 --- a/fs/bcachefs/snapshot_types.h +++ b/fs/bcachefs/snapshot_types.h @@ -42,6 +42,7 @@ struct snapshot_interior_delete { typedef DARRAY(struct snapshot_interior_delete) interior_delete_list; struct snapshot_delete { + struct mutex lock; struct work_struct work; struct mutex progress_lock; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index adf99a805a62..4c7d609d79fd 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -149,6 +149,7 @@ write_attribute(trigger_btree_key_cache_shrink); write_attribute(trigger_btree_updates); write_attribute(trigger_freelist_wakeup); write_attribute(trigger_recalc_capacity); +write_attribute(trigger_delete_dead_snapshots); read_attribute(gc_gens_pos); read_attribute(uuid); @@ -439,6 +440,9 @@ STORE(bch2_fs) up_read(&c->state_lock); } + if (attr == &sysfs_trigger_delete_dead_snapshots) + __bch2_delete_dead_snapshots(c); + #ifdef CONFIG_BCACHEFS_TESTS if (attr == &sysfs_perf_test) { char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp; @@ -568,6 +572,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_trigger_btree_updates, &sysfs_trigger_freelist_wakeup, &sysfs_trigger_recalc_capacity, + &sysfs_trigger_delete_dead_snapshots, &sysfs_gc_gens_pos, -- 2.51.0 From 970dde8271b607876bfdbb66b941ffda35e74973 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 7 May 2025 16:34:35 -0400 Subject: [PATCH 11/16] bcachefs: Add missing include fix debug build in userspace Signed-off-by: Kent Overstreet --- fs/bcachefs/util.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 14cb2c7dfda4..25cf61ebd40c 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- 2.51.0 From 1dfa01ef24151547a2a622e90ad73b082b1bc739 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 May 2025 14:24:12 -0400 Subject: [PATCH 12/16] bcachefs: bch2_copygc_dev_wait_amount() Factor out the per-device calculations, for better introspection. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 2 +- fs/bcachefs/movinggc.c | 56 +++++++++++++++++++++------------- fs/bcachefs/movinggc.h | 2 +- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index b50846da7ae4..828cf94217dd 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -465,7 +465,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "blocking\t%u\n", cl != NULL); prt_printf(&buf, "free\t%llu\n", req->usage.buckets[BCH_DATA_free]); prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark)); - prt_printf(&buf, "copygc_wait\t%lu/%lli\n", + prt_printf(&buf, "copygc_wait\t%llu/%lli\n", bch2_copygc_wait_amount(c), c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now)); prt_printf(&buf, "seen\t%llu\n", req->counters.buckets_seen); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index e97e87ebe312..6e5680a3a97c 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -261,6 +261,25 @@ err: return ret; } +static u64 bch2_copygc_dev_wait_amount(struct bch_dev *ca) +{ + struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca); + struct bch_dev_usage usage; + + for (unsigned i = 0; i < BCH_DATA_NR; i++) + usage.buckets[i] = usage_full.d[i].buckets; + + s64 fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * + ca->mi.bucket_size) >> 1); + s64 fragmented = 0; + + for (unsigned i = 0; i < BCH_DATA_NR; i++) + if (data_type_movable(i)) + fragmented += usage_full.d[i].fragmented; + + return max(0LL, fragmented_allowed - fragmented); +} + /* * Copygc runs when the amount of fragmented data is above some arbitrary * threshold: @@ -275,28 +294,13 @@ err: * often and continually reduce the amount of fragmented space as the device * fills up. So, we increase the threshold by half the current free space. */ -unsigned long bch2_copygc_wait_amount(struct bch_fs *c) +u64 bch2_copygc_wait_amount(struct bch_fs *c) { - s64 wait = S64_MAX, fragmented_allowed, fragmented; + u64 wait = U64_MAX; rcu_read_lock(); - for_each_rw_member_rcu(c, ca) { - struct bch_dev_usage_full usage_full = bch2_dev_usage_full_read(ca); - struct bch_dev_usage usage; - - for (unsigned i = 0; i < BCH_DATA_NR; i++) - usage.buckets[i] = usage_full.d[i].buckets; - - fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) * - ca->mi.bucket_size) >> 1); - fragmented = 0; - - for (unsigned i = 0; i < BCH_DATA_NR; i++) - if (data_type_movable(i)) - fragmented += usage_full.d[i].fragmented; - - wait = min(wait, max(0LL, fragmented_allowed - fragmented)); - } + for_each_rw_member_rcu(c, ca) + wait = min(wait, bch2_copygc_dev_wait_amount(ca)); rcu_read_unlock(); return wait; @@ -320,14 +324,22 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) c->copygc_wait_at) << 9); prt_newline(out); - prt_printf(out, "Currently calculated wait:\t"); - prt_human_readable_u64(out, bch2_copygc_wait_amount(c)); - prt_newline(out); + bch2_printbuf_make_room(out, 4096); rcu_read_lock(); + out->atomic++; + + prt_printf(out, "Currently calculated wait:\n"); + for_each_rw_member_rcu(c, ca) { + prt_printf(out, " %s:\t", ca->name); + prt_human_readable_u64(out, bch2_copygc_dev_wait_amount(ca)); + prt_newline(out); + } + struct task_struct *t = rcu_dereference(c->copygc_thread); if (t) get_task_struct(t); + --out->atomic; rcu_read_unlock(); if (t) { diff --git a/fs/bcachefs/movinggc.h b/fs/bcachefs/movinggc.h index d1885cf67a45..b9683d22bab0 100644 --- a/fs/bcachefs/movinggc.h +++ b/fs/bcachefs/movinggc.h @@ -2,7 +2,7 @@ #ifndef _BCACHEFS_MOVINGGC_H #define _BCACHEFS_MOVINGGC_H -unsigned long bch2_copygc_wait_amount(struct bch_fs *); +u64 bch2_copygc_wait_amount(struct bch_fs *); void bch2_copygc_wait_to_text(struct printbuf *, struct bch_fs *); static inline void bch2_copygc_wakeup(struct bch_fs *c) -- 2.51.0 From 82067c916994dd1bfec65496144dc16e17899e36 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 8 May 2025 23:21:28 -0400 Subject: [PATCH 13/16] bcachefs: buckets_in_flight on stack copygc runs with a full stack available, there's no reason to dynamically allocate this. Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 6e5680a3a97c..66f4920552c5 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -354,19 +354,13 @@ static int bch2_copygc_thread(void *arg) struct moving_context ctxt; struct bch_move_stats move_stats; struct io_clock *clock = &c->io_clock[WRITE]; - struct buckets_in_flight *buckets; + struct buckets_in_flight buckets = {}; u64 last, wait; - int ret = 0; - buckets = kzalloc(sizeof(struct buckets_in_flight), GFP_KERNEL); - if (!buckets) - return -ENOMEM; - ret = rhashtable_init(&buckets->table, &bch_move_bucket_params); + int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); bch_err_msg(c, ret, "allocating copygc buckets in flight"); - if (ret) { - kfree(buckets); + if (ret) return ret; - } set_freezable(); @@ -389,13 +383,13 @@ static int bch2_copygc_thread(void *arg) cond_resched(); if (!c->opts.copygc_enabled) { - move_buckets_wait(&ctxt, buckets, true); + move_buckets_wait(&ctxt, &buckets, true); kthread_wait_freezable(c->opts.copygc_enabled || kthread_should_stop()); } if (unlikely(freezing(current))) { - move_buckets_wait(&ctxt, buckets, true); + move_buckets_wait(&ctxt, &buckets, true); __refrigerator(false); continue; } @@ -406,7 +400,7 @@ static int bch2_copygc_thread(void *arg) if (wait > clock->max_slop) { c->copygc_wait_at = last; c->copygc_wait = last + wait; - move_buckets_wait(&ctxt, buckets, true); + move_buckets_wait(&ctxt, &buckets, true); trace_and_count(c, copygc_wait, c, wait, last + wait); bch2_kthread_io_clock_wait(clock, last + wait, MAX_SCHEDULE_TIMEOUT); @@ -416,7 +410,7 @@ static int bch2_copygc_thread(void *arg) c->copygc_wait = 0; c->copygc_running = true; - ret = bch2_copygc(&ctxt, buckets, &did_work); + ret = bch2_copygc(&ctxt, &buckets, &did_work); c->copygc_running = false; wake_up(&c->copygc_running_wq); @@ -427,16 +421,14 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; - move_buckets_wait(&ctxt, buckets, true); + move_buckets_wait(&ctxt, &buckets, true); bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), MAX_SCHEDULE_TIMEOUT); } } - move_buckets_wait(&ctxt, buckets, true); - - rhashtable_destroy(&buckets->table); - kfree(buckets); + move_buckets_wait(&ctxt, &buckets, true); + rhashtable_destroy(&buckets.table); bch2_moving_ctxt_exit(&ctxt); bch2_move_stats_exit(&move_stats, c); -- 2.51.0 From 3ffda8c219d636012cfc2b5dae0bf19d831a53e0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 May 2025 15:16:14 -0400 Subject: [PATCH 14/16] bcachefs: kill dead code in move_data_phys() Signed-off-by: Kent Overstreet --- fs/bcachefs/move.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index ff56d8886c32..42076aa3438b 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -849,10 +849,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_start, 0); - bch_err_msg(c, ret, "looking up alloc key"); - if (ret) - goto err; - ret = bch2_btree_write_buffer_tryflush(trans); if (!bch2_err_matches(ret, EROFS)) bch_err_msg(c, ret, "flushing btree write buffer"); -- 2.51.0 From 7f9dada701aa357cecf432cf2f345fd3897f92ed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 May 2025 15:27:36 -0400 Subject: [PATCH 15/16] bcachefs: delete dead items in bch_dev Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index cd35d1cf3fbb..07a16c473af3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -631,10 +631,6 @@ struct bch_dev { unsigned nr_partial_buckets; unsigned nr_btree_reserve; - size_t inc_gen_needs_gc; - size_t inc_gen_really_needs_gc; - size_t buckets_waiting_on_journal; - struct work_struct invalidate_work; struct work_struct discard_work; struct mutex discard_buckets_in_flight_lock; -- 2.51.0 From 13ffcbae86dadbf7711f42e4940bafae88a87e1f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 9 May 2025 16:25:21 -0400 Subject: [PATCH 16/16] bcachefs: "buckets with backpointer mismatches" now allocated on demand More self healing work: we're going to be calling check_bucket_backpointer_mismatch() at runtime, outside of fsck. Then when we need to we'll kick off the full check_extents_to_backpointers recovery pass. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 123 ++++++++++++++++++++++--------------- fs/bcachefs/backpointers.h | 4 ++ fs/bcachefs/bcachefs.h | 9 ++- fs/bcachefs/buckets.c | 22 +++++++ fs/bcachefs/super.c | 7 +++ 5 files changed, 113 insertions(+), 52 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index e6178eb2c396..631d4d24d78f 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -15,6 +15,14 @@ #include +static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) +{ + return (struct bbpos) { + .btree = bp.btree_id, + .pos = bp.pos, + }; +} + int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, struct bkey_validate_context from) { @@ -671,8 +679,22 @@ static int check_extent_to_backpointers(struct btree_trans *trans, rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); - bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); - bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty); + if (!ca) { + rcu_read_unlock(); + continue; + } + + u64 b = PTR_BUCKET_NR(ca, &p.ptr); + bool set[2]; + + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) { + unsigned long *bitmap = + READ_ONCE(ca->bucket_backpointer_mismatches[i].buckets); + set[i] = bitmap && test_bit(b, bitmap); + } + + bool check = set[0]; + bool empty = set[1]; bool stale = p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)); rcu_read_unlock(); @@ -724,14 +746,6 @@ err: return ret; } -static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) -{ - return (struct bbpos) { - .btree = bp.btree_id, - .pos = bp.pos, - }; -} - static u64 mem_may_pin_bytes(struct bch_fs *c) { struct sysinfo i; @@ -933,12 +947,25 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b goto err; } - if (!sectors[ALLOC_dirty] && - !sectors[ALLOC_stripe] && - !sectors[ALLOC_cached]) - __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty); - else - __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); + bool empty = (sectors[ALLOC_dirty] + + sectors[ALLOC_stripe] + + sectors[ALLOC_cached]) == 0; + + struct bucket_bitmap *bitmap = &ca->bucket_backpointer_mismatches[empty]; + + mutex_lock(&bitmap->lock); + if (!bitmap->buckets) { + bitmap->buckets = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), + sizeof(unsigned long), GFP_KERNEL); + if (!bitmap->buckets) { + mutex_unlock(&bitmap->lock); + ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; + goto err; + } + } + + bitmap->nr += !__test_and_set_bit(alloc_k.k->p.offset, bitmap->buckets); + mutex_unlock(&bitmap->lock); } err: bch2_dev_put(ca); @@ -962,8 +989,19 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); - bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, - ca->mi.nbuckets, bucket.offset); + u64 next = ca->mi.nbuckets; + + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) { + unsigned long *bitmap = + READ_ONCE(ca->bucket_backpointer_mismatches[i].buckets); + if (bitmap) + next = min_t(u64, next, + find_next_bit(bitmap, + ca->mi.nbuckets, + bucket.offset)); + } + + bucket.offset = next; if (bucket.offset == ca->mi.nbuckets) goto next; @@ -1072,28 +1110,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) { int ret = 0; - /* - * Can't allow devices to come/go/resize while we have bucket bitmaps - * allocated - */ - down_read(&c->state_lock); - - for_each_member_device(c, ca) { - BUG_ON(ca->bucket_backpointer_mismatches); - ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), - sizeof(unsigned long), - GFP_KERNEL); - ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), - sizeof(unsigned long), - GFP_KERNEL); - if (!ca->bucket_backpointer_mismatches || - !ca->bucket_backpointer_empty) { - bch2_dev_put(ca); - ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; - goto err_free_bitmaps; - } - } - struct btree_trans *trans = bch2_trans_get(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; @@ -1110,8 +1126,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; for_each_member_device(c, ca) { nr_buckets += ca->mi.nbuckets; - nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); - nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets); + nr_mismatches += ca->bucket_backpointer_mismatches[0].nr; + nr_empty += ca->bucket_backpointer_mismatches[1].nr; } if (!nr_mismatches && !nr_empty) @@ -1153,19 +1169,17 @@ err: bch2_trans_put(trans); bch2_bkey_buf_exit(&s.last_flushed, c); bch2_btree_cache_unpin(c); -err_free_bitmaps: - for_each_member_device(c, ca) { - kvfree(ca->bucket_backpointer_empty); - ca->bucket_backpointer_empty = NULL; - kvfree(ca->bucket_backpointer_mismatches); - ca->bucket_backpointer_mismatches = NULL; - } - up_read(&c->state_lock); + for_each_member_device(c, ca) + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) + bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatches[i]); + bch_err_fn(c, ret); return ret; } +/* backpointers -> extents */ + static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, @@ -1281,3 +1295,12 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) bch_err_fn(c, ret); return ret; } + +void bch2_bucket_bitmap_free(struct bucket_bitmap *b) +{ + mutex_lock(&b->lock); + kvfree(b->buckets); + b->buckets = NULL; + b->nr = 0; + mutex_unlock(&b->lock); +} diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 16575dbc5736..c72707ee9d42 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -182,8 +182,12 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, struct btree_iter *, struct bkey_buf *); +int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *); + int bch2_check_btree_backpointers(struct bch_fs *); int bch2_check_extents_to_backpointers(struct bch_fs *); int bch2_check_backpointers_to_extents(struct bch_fs *); +void bch2_bucket_bitmap_free(struct bucket_bitmap *); + #endif /* _BCACHEFS_BACKPOINTERS_BACKGROUND_H */ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 07a16c473af3..66659dade3f0 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -574,6 +574,12 @@ enum bch_dev_write_ref { BCH_DEV_WRITE_REF_NR, }; +struct bucket_bitmap { + unsigned long *buckets; + u64 nr; + struct mutex lock; +}; + struct bch_dev { struct kobject kobj; #ifdef CONFIG_BCACHEFS_DEBUG @@ -618,8 +624,7 @@ struct bch_dev { u8 *oldest_gen; unsigned long *buckets_nouse; - unsigned long *bucket_backpointer_mismatches; - unsigned long *bucket_backpointer_empty; + struct bucket_bitmap bucket_backpointer_mismatches[2]; struct bch_dev_usage_full __percpu *usage; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 596edc7bba2f..8d6955ef631b 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -1324,6 +1324,28 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) sizeof(bucket_gens->b[0]) * copy); } + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) { + struct bucket_bitmap *bitmap = &ca->bucket_backpointer_mismatches[i]; + + mutex_lock(&bitmap->lock); + if (bitmap->buckets) { + unsigned long *n = kvcalloc(BITS_TO_LONGS(nbuckets), + sizeof(unsigned long), GFP_KERNEL); + if (!n) { + mutex_unlock(&bitmap->lock); + ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; + goto err; + } + + memcpy(n, bitmap->buckets, + BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); + kvfree(bitmap->buckets); + bitmap->buckets = n; + + } + mutex_unlock(&bitmap->lock); + } + rcu_assign_pointer(ca->bucket_gens, bucket_gens); bucket_gens = old_bucket_gens; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index dc8189f9d2f1..77b834cfe126 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -11,6 +11,7 @@ #include "alloc_background.h" #include "alloc_foreground.h" #include "async_objs.h" +#include "backpointers.h" #include "bkey_sort.h" #include "btree_cache.h" #include "btree_gc.h" @@ -1341,6 +1342,9 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) + bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatches[i]); + bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); @@ -1471,6 +1475,9 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c, atomic_long_set(&ca->ref, 1); #endif + for (unsigned i = 0; i < ARRAY_SIZE(ca->bucket_backpointer_mismatches); i++) + mutex_init(&ca->bucket_backpointer_mismatches[i].lock); + bch2_dev_allocator_background_init(ca); if (enumerated_ref_init(&ca->io_ref[READ], BCH_DEV_READ_REF_NR, NULL) || -- 2.51.0