From: Kent Overstreet Date: Tue, 15 Dec 2020 00:41:03 +0000 (-0500) Subject: bcachefs: Don't read existing stripes synchronously in write path X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=81d8599e192e85fa7e01d8a6e8e4095177ff46a3;p=users%2Fwilly%2Fxarray.git bcachefs: Don't read existing stripes synchronously in write path Previously, in the stripe creation path, when reusing an existing stripe we'd read the existing stripe synchronously - ouch. Now, we allocate two stripe bufs if we're using an existing stripe, so that we can do the read asynchronously - and, we read the full stripe so that we can run recovery, if necessary. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 3f8281b5db41..397099514418 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -871,7 +871,7 @@ struct bch_stripe { __u8 csum_type; __u8 pad; - struct bch_extent_ptr ptrs[0]; + struct bch_extent_ptr ptrs[]; } __attribute__((packed, aligned(8))); /* Reflink: */ diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 76509c5970d2..72ee53dc95d0 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx) return false; } +/* Stripe bufs: */ + +static void ec_stripe_buf_free(struct ec_stripe_buf *stripe) +{ + unsigned i; + + for (i = 0; i < stripe->key.v.nr_blocks; i++) { + kvpfree(stripe->data[i], stripe->size << 9); + stripe->data[i] = NULL; + } +} + +static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe) +{ + unsigned i; + + memset(stripe->valid, 0xFF, sizeof(stripe->valid)); + + for (i = 0; i < stripe->key.v.nr_blocks; i++) { + stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL); + if (!stripe->data[i]) + goto err; + } + + return 0; +err: + ec_stripe_buf_free(stripe); + return -ENOMEM; +} + /* Checksumming: */ static void ec_generate_checksums(struct ec_stripe_buf *buf) @@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf) raid_gen(nr_data, v->nr_redundant, bytes, buf->data); } -static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr) -{ - return nr - bitmap_weight(buf->valid, nr); -} - static unsigned ec_nr_failed(struct ec_stripe_buf *buf) { - return __ec_nr_failed(buf, buf->key.v.nr_blocks); + return buf->key.v.nr_blocks - + bitmap_weight(buf->valid, buf->key.v.nr_blocks); } static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf) @@ -822,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s) struct open_bucket *ob; struct bkey_i *k; struct stripe *m; - struct bch_stripe *v = &s->stripe.key.v; + struct bch_stripe *v = &s->new_stripe.key.v; unsigned i, nr_data = v->nr_blocks - v->nr_redundant; - struct closure cl; int ret; BUG_ON(s->h->s == s); - closure_init_stack(&cl); + closure_sync(&s->iodone); if (s->err) { if (s->err != -EROFS) @@ -837,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s) goto err; } + if (s->have_existing_stripe) { + ec_validate_checksums(c, &s->existing_stripe); + + if (ec_do_recov(c, &s->existing_stripe)) { + bch_err(c, "error creating stripe: error reading existing stripe"); + goto err; + } + + for (i = 0; i < nr_data; i++) + if (stripe_blockcount_get(&s->existing_stripe.key.v, i)) + swap(s->new_stripe.data[i], + s->existing_stripe.data[i]); + + ec_stripe_buf_free(&s->existing_stripe); + } + BUG_ON(!s->allocated); if (!percpu_ref_tryget(&c->writes)) @@ -845,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s) BUG_ON(bitmap_weight(s->blocks_allocated, s->blocks.nr) != s->blocks.nr); - ec_generate_ec(&s->stripe); + ec_generate_ec(&s->new_stripe); - ec_generate_checksums(&s->stripe); + ec_generate_checksums(&s->new_stripe); /* write p/q: */ for (i = nr_data; i < v->nr_blocks; i++) - ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl); - - closure_sync(&cl); + ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone); + closure_sync(&s->iodone); - for (i = nr_data; i < v->nr_blocks; i++) - if (!test_bit(i, s->stripe.valid)) { - bch_err(c, "error creating stripe: error writing redundancy buckets"); - goto err_put_writes; - } + if (ec_nr_failed(&s->new_stripe)) { + bch_err(c, "error creating stripe: error writing redundancy buckets"); + goto err_put_writes; + } - ret = s->existing_stripe - ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i, + ret = s->have_existing_stripe + ? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i, &s->res, NULL, BTREE_INSERT_NOFAIL) - : ec_stripe_bkey_insert(c, s, &s->stripe.key); + : ec_stripe_bkey_insert(c, s, &s->new_stripe.key); if (ret) { bch_err(c, "error creating stripe: error creating stripe key"); goto err_put_writes; } for_each_keylist_key(&s->keys, k) { - ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k); + ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k); if (ret) { bch_err(c, "error creating stripe: error %i updating pointers", ret); break; @@ -879,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s) } spin_lock(&c->ec_stripes_heap_lock); - m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset); + m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset); #if 0 pr_info("created a %s stripe %llu", - s->existing_stripe ? "existing" : "new", + s->have_existing_stripe ? "existing" : "new", s->stripe.key.k.p.offset); #endif BUG_ON(m->on_heap); - bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset); + bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset); spin_unlock(&c->ec_stripes_heap_lock); err_put_writes: percpu_ref_put(&c->writes); @@ -902,8 +941,9 @@ err: bch2_keylist_free(&s->keys, s->inline_keys); - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) - kvpfree(s->stripe.data[i], s->stripe.size << 9); + ec_stripe_buf_free(&s->existing_stripe); + ec_stripe_buf_free(&s->new_stripe); + closure_debug_destroy(&s->iodone); kfree(s); } @@ -980,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp) ca = bch_dev_bkey_exists(c, ob->ptr.dev); offset = ca->mi.bucket_size - ob->sectors_free; - return ob->ec->stripe.data[ob->ec_idx] + (offset << 9); + return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9); } void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp, @@ -1087,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c, static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s; - unsigned i; lockdep_assert_held(&h->lock); @@ -1096,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) return -ENOMEM; mutex_init(&s->lock); + closure_init(&s->iodone, NULL); atomic_set(&s->pin, 1); s->c = c; s->h = h; @@ -1105,27 +1145,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) bch2_keylist_init(&s->keys, s->inline_keys); - s->stripe.offset = 0; - s->stripe.size = h->blocksize; - memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid)); + s->new_stripe.offset = 0; + s->new_stripe.size = h->blocksize; - ec_stripe_key_init(c, &s->stripe.key, s->nr_data, + ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize); - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) { - s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL); - if (!s->stripe.data[i]) - goto err; - } - h->s = s; - return 0; -err: - for (i = 0; i < s->stripe.key.v.nr_blocks; i++) - kvpfree(s->stripe.data[i], s->stripe.size << 9); - kfree(s); - return -ENOMEM; } static struct ec_stripe_head * @@ -1217,7 +1244,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h) devs = h->devs; for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) { - __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d); + __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d); --nr_data; } @@ -1327,51 +1354,70 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, unsigned algo, unsigned redundancy) { - struct closure cl; struct ec_stripe_head *h; struct open_bucket *ob; unsigned i, data_idx = 0; s64 idx; int ret; - closure_init_stack(&cl); - h = __bch2_ec_stripe_head_get(c, target, algo, redundancy); - if (!h) + if (!h) { + bch_err(c, "no stripe head"); return NULL; + } if (!h->s) { if (ec_new_stripe_alloc(c, h)) { bch2_ec_stripe_head_put(c, h); + bch_err(c, "failed to allocate new stripe"); return NULL; } idx = get_existing_stripe(c, target, algo, redundancy); if (idx >= 0) { - h->s->existing_stripe = true; - h->s->existing_stripe_idx = idx; - if (get_stripe_key(c, idx, &h->s->stripe)) { - /* btree error */ + h->s->have_existing_stripe = true; + ret = get_stripe_key(c, idx, &h->s->existing_stripe); + if (ret) { + bch2_fs_fatal_error(c, "error reading stripe key: %i", ret); + bch2_ec_stripe_head_put(c, h); + return NULL; + } + + if (ec_stripe_buf_alloc(&h->s->existing_stripe)) { + /* + * this is a problem: we have deleted from the + * stripes heap already + */ BUG(); } - for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++) - if (stripe_blockcount_get(&h->s->stripe.key.v, i)) { + for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) { + if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) __set_bit(i, h->s->blocks_allocated); - ec_block_io(c, &h->s->stripe, READ, i, &cl); - } + + ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + } + + bkey_copy(&h->s->new_stripe.key.k_i, + &h->s->existing_stripe.key.k_i); + } + + if (ec_stripe_buf_alloc(&h->s->new_stripe)) { + BUG(); } } if (!h->s->allocated) { - if (!h->s->existing_stripe && + if (!h->s->have_existing_stripe && !h->s->res.sectors) { ret = bch2_disk_reservation_get(c, &h->s->res, - h->blocksize, - h->s->nr_parity, 0); + h->blocksize, + h->s->nr_parity, 0); if (ret) { - /* What should we do here? */ - bch_err(c, "unable to create new stripe: %i", ret); + /* + * This means we need to wait for copygc to + * empty out buckets from existing stripes: + */ bch2_ec_stripe_head_put(c, h); h = NULL; goto out; @@ -1391,19 +1437,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c, h->s->nr_data, data_idx); BUG_ON(data_idx >= h->s->nr_data); - h->s->stripe.key.v.ptrs[data_idx] = ob->ptr; + h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr; h->s->data_block_idx[i] = data_idx; data_idx++; } open_bucket_for_each(c, &h->s->parity, ob, i) - h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr; + h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr; //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]); h->s->allocated = true; } out: - closure_sync(&cl); return h; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 450bb1a113a3..1d4aad50db4d 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -88,6 +88,7 @@ struct ec_stripe_new { struct ec_stripe_head *h; struct mutex lock; struct list_head list; + struct closure iodone; /* counts in flight writes, stripe is created when pin == 0 */ atomic_t pin; @@ -98,8 +99,7 @@ struct ec_stripe_new { u8 nr_parity; bool allocated; bool pending; - bool existing_stripe; - u64 existing_stripe_idx; + bool have_existing_stripe; unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)]; @@ -111,7 +111,8 @@ struct ec_stripe_new { struct keylist keys; u64 inline_keys[BKEY_U64s * 8]; - struct ec_stripe_buf stripe; + struct ec_stripe_buf new_stripe; + struct ec_stripe_buf existing_stripe; }; struct ec_stripe_head {