From: Kent Overstreet Date: Tue, 20 Oct 2020 02:36:24 +0000 (-0400) Subject: bcachefs: Fix for bad stripe pointers X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=39283c712e6df927c7c49e8b738ca110551bb399;p=users%2Fhch%2Fuuid.git bcachefs: Fix for bad stripe pointers The allocator usually doesn't increment bucket gens right away on buckets that it's about to hand out (for reasons that need to be documented), instead deferring that to whatever extent update first references that bucket. But stripe pointers reference buckets without changing bucket sector counts, meaning we could end up with a pointer in a stripe with a gen newer than the bucket it points to. Fix this by adding a transactional trigger for KEY_TYPE_stripe that just writes out the keys in the alloc btree for the buckets it points to. Also - consolidate the code that checks pointer validity. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b0448d2f1916..8f0c1f378b77 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -497,8 +497,6 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw) * commands to the newly free buckets, then puts them on the various freelists. */ -#define BUCKET_GC_GEN_MAX 96U - /** * wait_buckets_available - wait on reclaimable buckets * diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 56a846fde8dd..66ce54724e93 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -13,6 +13,9 @@ struct bkey_alloc_unpacked { #undef x }; +/* How out of date a pointer gen is allowed to be: */ +#define BUCKET_GC_GEN_MAX 96U + /* returns true if not equal */ static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l, struct bkey_alloc_unpacked r) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index b295e46de059..f02518f9d9ec 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -591,6 +591,7 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS \ ((1U << BKEY_TYPE_EXTENTS)| \ (1U << BKEY_TYPE_INODES)| \ + (1U << BKEY_TYPE_EC)| \ (1U << BKEY_TYPE_REFLINK)) enum btree_trigger_flags { diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a4a5e084aad3..9c33a8be2c58 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -337,8 +337,9 @@ static inline bool iter_has_trans_triggers(struct btree_iter *iter) static inline bool iter_has_nontrans_triggers(struct btree_iter *iter) { - return (BTREE_NODE_TYPE_HAS_TRIGGERS & - ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS) & + return (((BTREE_NODE_TYPE_HAS_TRIGGERS & + ~BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS)) | + (1U << BTREE_ID_EC)) & (1U << iter->btree_id); } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 7bc51f397c7b..80d11decb71e 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -883,124 +883,140 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p, p.crc.uncompressed_size); } -static void bucket_set_stripe(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - struct bch_fs_usage *fs_usage, - u64 journal_seq, - unsigned flags, - bool enabled) -{ - bool gc = flags & BTREE_TRIGGER_GC; - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = PTR_BUCKET(ca, ptr, gc); - struct bucket_mark new, old; - - old = bucket_cmpxchg(g, new, ({ - new.stripe = enabled; - if (journal_seq) { - new.journal_seq_valid = 1; - new.journal_seq = journal_seq; - } - })); - - bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); - - /* - * XXX write repair code for these, flag stripe as possibly bad - */ - if (old.gen != ptr->gen) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "stripe with stale pointer"); -#if 0 - /* - * We'd like to check for these, but these checks don't work - * yet: - */ - if (old.stripe && enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "multiple stripes using same bucket"); - - if (!old.stripe && !enabled) - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, - "deleting stripe but bucket not marked as stripe bucket"); -#endif -} - -static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, - struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type ptr_data_type, - u8 bucket_gen, u8 *bucket_data_type, - u16 *dirty_sectors, u16 *cached_sectors) -{ - u16 *dst_sectors = !p.ptr.cached +static int check_bucket_ref(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 bucket_data_type, + u16 dirty_sectors, u16 cached_sectors) +{ + size_t bucket_nr = PTR_BUCKET_NR(bch_dev_bkey_exists(c, ptr->dev), ptr); + u16 bucket_sectors = !ptr->cached ? dirty_sectors : cached_sectors; - u16 orig_sectors = *dst_sectors; char buf[200]; - if (gen_after(p.ptr.gen, bucket_gen)) { + if (gen_after(ptr->gen, bucket_gen)) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (gen_cmp(bucket_gen, p.ptr.gen) > 96U) { + if (gen_cmp(bucket_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen && !p.ptr.cached) { + if (bucket_gen != ptr->gen && !ptr->cached) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s: stale dirty ptr (gen %u)\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - p.ptr.gen, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + ptr->gen, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (bucket_gen != p.ptr.gen) + if (bucket_gen != ptr->gen) return 1; - if (*bucket_data_type && *bucket_data_type != ptr_data_type) { + if (bucket_data_type && ptr_data_type && + bucket_data_type != ptr_data_type) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type], + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type], bch2_data_types[ptr_data_type], (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } - if (checked_add(*dst_sectors, sectors)) { + if ((unsigned) (bucket_sectors + sectors) > U16_MAX) { bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U16_MAX\n" "while marking %s", - p.ptr.dev, PTR_BUCKET_NR(bch_dev_bkey_exists(c, p.ptr.dev), &p.ptr), - bucket_gen, - bch2_data_types[*bucket_data_type ?: ptr_data_type], - orig_sectors, sectors, + ptr->dev, bucket_nr, bucket_gen, + bch2_data_types[bucket_data_type ?: ptr_data_type], + bucket_sectors, sectors, (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); return -EIO; } + return 0; +} + +static int bucket_set_stripe(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + struct bch_fs_usage *fs_usage, + u64 journal_seq, + unsigned flags, + bool enabled) +{ + bool gc = flags & BTREE_TRIGGER_GC; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bucket *g = PTR_BUCKET(ca, ptr, gc); + struct bucket_mark new, old; + char buf[200]; + int ret; + + old = bucket_cmpxchg(g, new, ({ + ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, + new.dirty_sectors, new.cached_sectors); + if (ret) + return ret; + + if (new.stripe && enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + if (!new.stripe && !enabled) + bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + "bucket %u:%zu gen %u: deleting stripe but not marked\n%s", + ptr->dev, PTR_BUCKET_NR(ca, ptr), new.gen, + (bch2_bkey_val_to_text(&PBUF(buf), c, k), buf)); + + new.stripe = enabled; + if (journal_seq) { + new.journal_seq_valid = 1; + new.journal_seq = journal_seq; + } + })); + + bch2_dev_usage_update(c, ca, fs_usage, old, new, gc); + return 0; +} + +static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + s64 sectors, enum bch_data_type ptr_data_type, + u8 bucket_gen, u8 *bucket_data_type, + u16 *dirty_sectors, u16 *cached_sectors) +{ + u16 *dst_sectors = !ptr->cached + ? dirty_sectors + : cached_sectors; + int ret = check_bucket_ref(c, k, ptr, sectors, ptr_data_type, + bucket_gen, *bucket_data_type, + *dirty_sectors, *cached_sectors); + + if (ret) + return ret; + + *dst_sectors += sectors; *bucket_data_type = *dirty_sectors || *cached_sectors ? ptr_data_type : 0; return 0; @@ -1025,7 +1041,7 @@ static int bch2_mark_pointer(struct bch_fs *c, struct bkey_s_c k, new.v.counter = old.v.counter = v; bucket_data_type = new.data_type; - ret = __mark_pointer(c, k, p, sectors, data_type, new.gen, + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, new.gen, &bucket_data_type, &new.dirty_sectors, &new.cached_sectors); @@ -1189,6 +1205,7 @@ static int bch2_mark_stripe(struct bch_fs *c, ? bkey_s_c_to_stripe(new).v : NULL; struct stripe *m = genradix_ptr(&c->stripes[gc], idx); unsigned i; + int ret; if (!m || (old_s && !m->alive)) { bch_err_ratelimited(c, "error marking nonexistent stripe %zu", @@ -1198,9 +1215,12 @@ static int bch2_mark_stripe(struct bch_fs *c, if (!new_s) { /* Deleting: */ - for (i = 0; i < old_s->nr_blocks; i++) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, - journal_seq, flags, false); + for (i = 0; i < old_s->nr_blocks; i++) { + ret = bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage, + journal_seq, flags, false); + if (ret) + return ret; + } if (!gc && m->on_heap) { spin_lock(&c->ec_stripes_heap_lock); @@ -1219,11 +1239,16 @@ static int bch2_mark_stripe(struct bch_fs *c, old_s->ptrs + i, sizeof(struct bch_extent_ptr))) { - if (old_s) - bucket_set_stripe(c, old_s->ptrs + i, fs_usage, + if (old_s) { + bucket_set_stripe(c, old, old_s->ptrs + i, fs_usage, journal_seq, flags, false); - bucket_set_stripe(c, new_s->ptrs + i, fs_usage, - journal_seq, flags, true); + if (ret) + return ret; + } + ret = bucket_set_stripe(c, new, new_s->ptrs + i, fs_usage, + journal_seq, flags, true); + if (ret) + return ret; } } @@ -1549,23 +1574,21 @@ static int trans_get_key(struct btree_trans *trans, return ret; } -static int bch2_trans_mark_pointer(struct btree_trans *trans, - struct bkey_s_c k, struct extent_ptr_decoded p, - s64 sectors, enum bch_data_type data_type) +static int bch2_trans_start_alloc_update(struct btree_trans *trans, struct btree_iter **_iter, + const struct bch_extent_ptr *ptr, + struct bkey_alloc_unpacked *u) { struct bch_fs *c = trans->c; - struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev); - struct bpos pos = POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)); - struct btree_iter *iter; - struct bkey_s_c k_a; - struct bkey_alloc_unpacked u; - struct bkey_i_alloc *a; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct bpos pos = POS(ptr->dev, PTR_BUCKET_NR(ca, ptr)); struct bucket *g; + struct btree_iter *iter; + struct bkey_s_c k; int ret; - iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k_a); + iter = trans_get_update(trans, BTREE_ID_ALLOC, pos, &k); if (iter) { - u = bch2_alloc_unpack(k_a); + *u = bch2_alloc_unpack(k); } else { iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, pos, BTREE_ITER_CACHED| @@ -1575,16 +1598,36 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, return PTR_ERR(iter); ret = bch2_btree_iter_traverse(iter); - if (ret) - goto out; + if (ret) { + bch2_trans_iter_put(trans, iter); + return ret; + } percpu_down_read(&c->mark_lock); g = bucket(ca, pos.offset); - u = alloc_mem_to_key(g, READ_ONCE(g->mark)); + *u = alloc_mem_to_key(g, READ_ONCE(g->mark)); percpu_up_read(&c->mark_lock); } - ret = __mark_pointer(c, k, p, sectors, data_type, u.gen, &u.data_type, + *_iter = iter; + return 0; +} + +static int bch2_trans_mark_pointer(struct btree_trans *trans, + struct bkey_s_c k, struct extent_ptr_decoded p, + s64 sectors, enum bch_data_type data_type) +{ + struct bch_fs *c = trans->c; + struct btree_iter *iter; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + int ret; + + ret = bch2_trans_start_alloc_update(trans, &iter, &p.ptr, &u); + if (ret) + return ret; + + ret = __mark_pointer(c, k, &p.ptr, sectors, data_type, u.gen, &u.data_type, &u.dirty_sectors, &u.cached_sectors); if (ret) goto out; @@ -1595,7 +1638,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans, goto out; bkey_alloc_init(&a->k_i); - a->k.p = pos; + a->k.p = iter->pos; bch2_alloc_pack(a, u); bch2_trans_update(trans, iter, &a->k_i, 0); out: @@ -1716,6 +1759,44 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } +static int bch2_trans_mark_stripe(struct btree_trans *trans, + struct bkey_s_c k) +{ + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + struct bkey_alloc_unpacked u; + struct bkey_i_alloc *a; + struct btree_iter *iter; + unsigned i; + int ret = 0; + + /* + * The allocator code doesn't necessarily update bucket gens in the + * btree when incrementing them, right before handing out new buckets - + * we just need to persist those updates here along with the new stripe: + */ + + for (i = 0; i < s->nr_blocks && !ret; i++) { + ret = bch2_trans_start_alloc_update(trans, &iter, + &s->ptrs[i], &u); + if (ret) + break; + + a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8); + ret = PTR_ERR_OR_ZERO(a); + if (ret) + goto put_iter; + + bkey_alloc_init(&a->k_i); + a->k.p = iter->pos; + bch2_alloc_pack(a, u); + bch2_trans_update(trans, iter, &a->k_i, 0); +put_iter: + bch2_trans_iter_put(trans, iter); + } + + return ret; +} + static int __bch2_trans_mark_reflink_p(struct btree_trans *trans, struct bkey_s_c_reflink_p p, u64 idx, unsigned sectors, @@ -1815,6 +1896,8 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k, case KEY_TYPE_reflink_v: return bch2_trans_mark_extent(trans, k, offset, sectors, flags, BCH_DATA_user); + case KEY_TYPE_stripe: + return bch2_trans_mark_stripe(trans, k); case KEY_TYPE_inode: d = replicas_deltas_realloc(trans, 0);