From 14e2523fc59d1673766fb2a81b1d2f82134debc0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2025 13:18:50 -0500 Subject: [PATCH 01/16] bcachefs: Rename BCH_WRITE flags fer consistency with other x-macros enums The uppercase/lowercase style is nice for making the namespace explicit. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 6 +-- fs/bcachefs/data_update.c | 8 ++-- fs/bcachefs/fs-io-buffered.c | 2 +- fs/bcachefs/fs-io-direct.c | 4 +- fs/bcachefs/io_read.c | 2 +- fs/bcachefs/io_write.c | 86 +++++++++++++++++----------------- fs/bcachefs/io_write.h | 29 ++++++------ fs/bcachefs/io_write_types.h | 2 +- fs/bcachefs/rebalance.c | 4 +- 9 files changed, 71 insertions(+), 72 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 5a781fb4c794..1a539e7bedc8 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -728,7 +728,7 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct bch_dev_usage usage; struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type, - cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage); + cl, flags & BCH_WRITE_alloc_nowait, &usage); if (!IS_ERR(ob)) bch2_dev_stripe_increment_inlined(ca, stripe, &usage); bch2_dev_put(ca); @@ -1336,7 +1336,7 @@ retry: if (wp->data_type != BCH_DATA_user) have_cache = true; - if (target && !(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { + if (target && !(flags & BCH_WRITE_only_specified_devs)) { ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, @@ -1426,7 +1426,7 @@ err: if (cl && bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) ret = -BCH_ERR_bucket_alloc_blocked; - if (cl && !(flags & BCH_WRITE_ALLOC_NOWAIT) && + if (cl && !(flags & BCH_WRITE_alloc_nowait) && bch2_err_matches(ret, BCH_ERR_freelist_empty)) ret = -BCH_ERR_bucket_alloc_blocked; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 1cfb86823fdf..bfd8ba162630 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -681,10 +681,10 @@ int bch2_data_update_init(struct btree_trans *trans, m->op.target = data_opts.target; m->op.write_point = wp; m->op.nr_replicas = 0; - m->op.flags |= BCH_WRITE_PAGES_STABLE| - BCH_WRITE_PAGES_OWNED| - BCH_WRITE_DATA_ENCODED| - BCH_WRITE_MOVE| + m->op.flags |= BCH_WRITE_pages_stable| + BCH_WRITE_pages_owned| + BCH_WRITE_data_encoded| + BCH_WRITE_move| m->data_opts.write_flags; m->op.compression_opt = io_opts.background_compression; m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index b68472f1d1d9..dc00edfefc91 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -430,7 +430,7 @@ static void bch2_writepage_io_done(struct bch_write_op *op) } } - if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { + if (io->op.flags & BCH_WRITE_wrote_data_inline) { bio_for_each_folio_all(fi, bio) { struct bch_folio *s; diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 2089c36b5866..d2f2cbaba7a8 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -511,8 +511,8 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio) dio->op.devs_need_flush = &inode->ei_devs_need_flush; if (sync) - dio->op.flags |= BCH_WRITE_SYNC; - dio->op.flags |= BCH_WRITE_CHECK_ENOSPC; + dio->op.flags |= BCH_WRITE_sync; + dio->op.flags |= BCH_WRITE_check_enospc; ret = bch2_quota_reservation_add(c, inode, &dio->quota_res, bio_sectors(bio), true); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 304f76fb0f43..539b48f94523 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -232,7 +232,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, if (!have_io_error(failed)) { update_opts.target = opts.promote_target; update_opts.extra_replicas = 1; - update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED; + update_opts.write_flags = BCH_WRITE_alloc_nowait|BCH_WRITE_cached; } else { update_opts.target = opts.foreground_target; diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index a903f39caa3e..076e39474610 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -374,7 +374,7 @@ static int bch2_write_index_default(struct bch_write_op *op) bch2_extent_update(trans, inum, &iter, sk.k, &op->res, op->new_i_size, &op->i_sectors_delta, - op->flags & BCH_WRITE_CHECK_ENOSPC); + op->flags & BCH_WRITE_check_enospc); bch2_trans_iter_exit(trans, &iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) @@ -403,7 +403,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, (subvol_inum) { op->subvol, op->pos.inode, }, offset << 9); prt_printf(out, "write error%s: ", - op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); + op->flags & BCH_WRITE_move ? "(internal move)" : ""); } void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) @@ -418,7 +418,7 @@ static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf (subvol_inum) { op->subvol, op->pos.inode, }, offset << 9); prt_printf(out, "write error%s: ", - op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); + op->flags & BCH_WRITE_move ? "(internal move)" : ""); } void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, @@ -493,7 +493,7 @@ static void bch2_write_done(struct closure *cl) bch2_time_stats_update(&c->times[BCH_TIME_data_write], op->start_time); bch2_disk_reservation_put(c, &op->res); - if (!(op->flags & BCH_WRITE_MOVE)) + if (!(op->flags & BCH_WRITE_move)) bch2_write_ref_put(c, BCH_WRITE_REF_write); bch2_keylist_free(&op->insert_keys, op->inline_keys); @@ -539,7 +539,7 @@ static void __bch2_write_index(struct bch_write_op *op) unsigned dev; int ret = 0; - if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { + if (unlikely(op->flags & BCH_WRITE_io_error)) { ret = bch2_write_drop_io_error_ptrs(op); if (ret) goto err; @@ -548,7 +548,7 @@ static void __bch2_write_index(struct bch_write_op *op) if (!bch2_keylist_empty(keys)) { u64 sectors_start = keylist_sectors(keys); - ret = !(op->flags & BCH_WRITE_MOVE) + ret = !(op->flags & BCH_WRITE_move) ? bch2_write_index_default(op) : bch2_data_update_index_update(op); @@ -580,7 +580,7 @@ out: err: keys->top = keys->keys; op->error = ret; - op->flags |= BCH_WRITE_SUBMITTED; + op->flags |= BCH_WRITE_submitted; goto out; } @@ -623,8 +623,8 @@ static CLOSURE_CALLBACK(bch2_write_index) struct workqueue_struct *wq = index_update_wq(op); unsigned long flags; - if ((op->flags & BCH_WRITE_SUBMITTED) && - (op->flags & BCH_WRITE_MOVE)) + if ((op->flags & BCH_WRITE_submitted) && + (op->flags & BCH_WRITE_move)) bch2_bio_free_pages_pool(op->c, &op->wbio.bio); spin_lock_irqsave(&wp->writes_lock, flags); @@ -662,11 +662,11 @@ void bch2_write_point_do_index_updates(struct work_struct *work) if (!op) break; - op->flags |= BCH_WRITE_IN_WORKER; + op->flags |= BCH_WRITE_in_worker; __bch2_write_index(op); - if (!(op->flags & BCH_WRITE_SUBMITTED)) + if (!(op->flags & BCH_WRITE_submitted)) __bch2_write(op); else bch2_write_done(&op->cl); @@ -690,7 +690,7 @@ static void bch2_write_endio(struct bio *bio) "data write error: %s", bch2_blk_status_to_str(bio->bi_status))) { set_bit(wbio->dev, op->failed.d); - op->flags |= BCH_WRITE_IO_ERROR; + op->flags |= BCH_WRITE_io_error; } if (wbio->nocow) { @@ -737,7 +737,7 @@ static void init_append_extent(struct bch_write_op *op, bch2_extent_crc_append(&e->k_i, crc); bch2_alloc_sectors_append_ptrs_inlined(op->c, wp, &e->k_i, crc.compressed_size, - op->flags & BCH_WRITE_CACHED); + op->flags & BCH_WRITE_cached); bch2_keylist_push(&op->insert_keys); } @@ -854,7 +854,7 @@ static enum prep_encoded_ret { struct bch_fs *c = op->c; struct bio *bio = &op->wbio.bio; - if (!(op->flags & BCH_WRITE_DATA_ENCODED)) + if (!(op->flags & BCH_WRITE_data_encoded)) return PREP_ENCODED_OK; BUG_ON(bio_sectors(bio) != op->crc.compressed_size); @@ -962,9 +962,9 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, if (ec_buf || op->compression_opt || (op->csum_type && - !(op->flags & BCH_WRITE_PAGES_STABLE)) || + !(op->flags & BCH_WRITE_pages_stable)) || (bch2_csum_type_is_encryption(op->csum_type) && - !(op->flags & BCH_WRITE_PAGES_OWNED))) { + !(op->flags & BCH_WRITE_pages_owned))) { dst = bch2_write_bio_alloc(c, wp, src, &page_alloc_failed, ec_buf); @@ -984,7 +984,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, break; BUG_ON(op->compression_opt && - (op->flags & BCH_WRITE_DATA_ENCODED) && + (op->flags & BCH_WRITE_data_encoded) && bch2_csum_type_is_encryption(op->crc.csum_type)); BUG_ON(op->compression_opt && !bounce); @@ -1022,7 +1022,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, } } - if ((op->flags & BCH_WRITE_DATA_ENCODED) && + if ((op->flags & BCH_WRITE_data_encoded) && !crc_is_compressed(crc) && bch2_csum_type_is_encryption(op->crc.csum_type) == bch2_csum_type_is_encryption(op->csum_type)) { @@ -1054,7 +1054,7 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp, crc.compression_type = compression_type; crc.nonce = nonce; } else { - if ((op->flags & BCH_WRITE_DATA_ENCODED) && + if ((op->flags & BCH_WRITE_data_encoded) && bch2_rechecksum_bio(c, src, version, op->crc, NULL, &op->crc, src_len >> 9, @@ -1228,9 +1228,9 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) static void __bch2_nocow_write_done(struct bch_write_op *op) { - if (unlikely(op->flags & BCH_WRITE_IO_ERROR)) { + if (unlikely(op->flags & BCH_WRITE_io_error)) { op->error = -EIO; - } else if (unlikely(op->flags & BCH_WRITE_CONVERT_UNWRITTEN)) + } else if (unlikely(op->flags & BCH_WRITE_convert_unwritten)) bch2_nocow_write_convert_unwritten(op); } @@ -1259,7 +1259,7 @@ static void bch2_nocow_write(struct bch_write_op *op) struct bucket_to_lock *stale_at; int stale, ret; - if (op->flags & BCH_WRITE_MOVE) + if (op->flags & BCH_WRITE_move) return; darray_init(&buckets); @@ -1317,7 +1317,7 @@ retry: }), GFP_KERNEL|__GFP_NOFAIL); if (ptr->unwritten) - op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; + op->flags |= BCH_WRITE_convert_unwritten; } /* Unlock before taking nocow locks, doing IO: */ @@ -1325,7 +1325,7 @@ retry: bch2_trans_unlock(trans); bch2_cut_front(op->pos, op->insert_keys.top); - if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) + if (op->flags & BCH_WRITE_convert_unwritten) bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); darray_for_each(buckets, i) { @@ -1350,7 +1350,7 @@ retry: wbio_init(bio)->put_bio = true; bio->bi_opf = op->wbio.bio.bi_opf; } else { - op->flags |= BCH_WRITE_SUBMITTED; + op->flags |= BCH_WRITE_submitted; } op->pos.offset += bio_sectors(bio); @@ -1364,7 +1364,7 @@ retry: op->insert_keys.top, true); bch2_keylist_push(&op->insert_keys); - if (op->flags & BCH_WRITE_SUBMITTED) + if (op->flags & BCH_WRITE_submitted) break; bch2_btree_iter_advance(&iter); } @@ -1384,15 +1384,15 @@ err: bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); op->error = ret; - op->flags |= BCH_WRITE_SUBMITTED; + op->flags |= BCH_WRITE_submitted; } /* fallback to cow write path? */ - if (!(op->flags & BCH_WRITE_SUBMITTED)) { + if (!(op->flags & BCH_WRITE_submitted)) { closure_sync(&op->cl); __bch2_nocow_write_done(op); op->insert_keys.top = op->insert_keys.keys; - } else if (op->flags & BCH_WRITE_SYNC) { + } else if (op->flags & BCH_WRITE_sync) { closure_sync(&op->cl); bch2_nocow_write_done(&op->cl.work); } else { @@ -1444,7 +1444,7 @@ static void __bch2_write(struct bch_write_op *op) if (unlikely(op->opts.nocow && c->opts.nocow_enabled)) { bch2_nocow_write(op); - if (op->flags & BCH_WRITE_SUBMITTED) + if (op->flags & BCH_WRITE_submitted) goto out_nofs_restore; } again: @@ -1474,7 +1474,7 @@ again: ret = bch2_trans_run(c, lockrestart_do(trans, bch2_alloc_sectors_start_trans(trans, op->target, - op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED), + op->opts.erasure_code && !(op->flags & BCH_WRITE_cached), op->write_point, &op->devs_have, op->nr_replicas, @@ -1497,10 +1497,10 @@ again: bch2_alloc_sectors_done_inlined(c, wp); err: if (ret <= 0) { - op->flags |= BCH_WRITE_SUBMITTED; + op->flags |= BCH_WRITE_submitted; if (unlikely(ret < 0)) { - if (!(op->flags & BCH_WRITE_ALLOC_NOWAIT)) { + if (!(op->flags & BCH_WRITE_alloc_nowait)) { struct printbuf buf = PRINTBUF; bch2_write_op_error(&buf, op); prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret)); @@ -1532,14 +1532,14 @@ err: * synchronously here if we weren't able to submit all of the IO at * once, as that signals backpressure to the caller. */ - if ((op->flags & BCH_WRITE_SYNC) || - (!(op->flags & BCH_WRITE_SUBMITTED) && - !(op->flags & BCH_WRITE_IN_WORKER))) { + if ((op->flags & BCH_WRITE_sync) || + (!(op->flags & BCH_WRITE_submitted) && + !(op->flags & BCH_WRITE_in_worker))) { bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); - if (!(op->flags & BCH_WRITE_SUBMITTED)) + if (!(op->flags & BCH_WRITE_submitted)) goto again; bch2_write_done(&op->cl); } else { @@ -1560,8 +1560,8 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len) memset(&op->failed, 0, sizeof(op->failed)); - op->flags |= BCH_WRITE_WROTE_DATA_INLINE; - op->flags |= BCH_WRITE_SUBMITTED; + op->flags |= BCH_WRITE_wrote_data_inline; + op->flags |= BCH_WRITE_submitted; bch2_check_set_feature(op->c, BCH_FEATURE_inline_data); @@ -1624,8 +1624,8 @@ CLOSURE_CALLBACK(bch2_write) BUG_ON(!op->write_point.v); BUG_ON(bkey_eq(op->pos, POS_MAX)); - if (op->flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) - op->flags |= BCH_WRITE_ALLOC_NOWAIT; + if (op->flags & BCH_WRITE_only_specified_devs) + op->flags |= BCH_WRITE_alloc_nowait; op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas); op->start_time = local_clock(); @@ -1646,13 +1646,13 @@ CLOSURE_CALLBACK(bch2_write) goto err; } - if (!(op->flags & BCH_WRITE_MOVE) && + if (!(op->flags & BCH_WRITE_move) && !bch2_write_ref_tryget(c, BCH_WRITE_REF_write)) { op->error = -BCH_ERR_erofs_no_writes; goto err; } - if (!(op->flags & BCH_WRITE_MOVE)) + if (!(op->flags & BCH_WRITE_move)) this_cpu_add(c->counters[BCH_COUNTER_io_write], bio_sectors(bio)); bch2_increment_clock(c, bio_sectors(bio), WRITE); diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h index b4626013abc8..02cca52be0bd 100644 --- a/fs/bcachefs/io_write.h +++ b/fs/bcachefs/io_write.h @@ -23,21 +23,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *, void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op); #define BCH_WRITE_FLAGS() \ - x(ALLOC_NOWAIT) \ - x(CACHED) \ - x(DATA_ENCODED) \ - x(PAGES_STABLE) \ - x(PAGES_OWNED) \ - x(ONLY_SPECIFIED_DEVS) \ - x(WROTE_DATA_INLINE) \ - x(FROM_INTERNAL) \ - x(CHECK_ENOSPC) \ - x(SYNC) \ - x(MOVE) \ - x(IN_WORKER) \ - x(SUBMITTED) \ - x(IO_ERROR) \ - x(CONVERT_UNWRITTEN) + x(alloc_nowait) \ + x(cached) \ + x(data_encoded) \ + x(pages_stable) \ + x(pages_owned) \ + x(only_specified_devs) \ + x(wrote_data_inline) \ + x(check_enospc) \ + x(sync) \ + x(move) \ + x(in_worker) \ + x(submitted) \ + x(io_error) \ + x(convert_unwritten) enum __bch_write_flags { #define x(f) __BCH_WRITE_##f, diff --git a/fs/bcachefs/io_write_types.h b/fs/bcachefs/io_write_types.h index 6e878a6f2f0b..3ef6df9145ef 100644 --- a/fs/bcachefs/io_write_types.h +++ b/fs/bcachefs/io_write_types.h @@ -64,7 +64,7 @@ struct bch_write_op { struct bpos pos; struct bversion version; - /* For BCH_WRITE_DATA_ENCODED: */ + /* For BCH_WRITE_data_encoded: */ struct bch_extent_crc_unpacked crc; struct write_point_specifier write_point; diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index d0a1f5cd5c2b..58f6d97e506c 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -341,7 +341,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, memset(data_opts, 0, sizeof(*data_opts)); data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); data_opts->target = io_opts->background_target; - data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; + data_opts->write_flags |= BCH_WRITE_only_specified_devs; if (!data_opts->rewrite_ptrs) { /* @@ -449,7 +449,7 @@ static bool rebalance_pred(struct bch_fs *c, void *arg, { data_opts->rewrite_ptrs = bch2_bkey_ptrs_need_rebalance(c, io_opts, k); data_opts->target = io_opts->background_target; - data_opts->write_flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; + data_opts->write_flags |= BCH_WRITE_only_specified_devs; return data_opts->rewrite_ptrs != 0; } -- 2.51.0 From 0f856b72286810a1bcf148c92f04d84c36bb4f30 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 14 Jan 2025 15:20:04 -0500 Subject: [PATCH 02/16] bcachefs: rbio_init_fragment() Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 18 +++++++----------- fs/bcachefs/io_read.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 539b48f94523..3b474c679fb4 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -177,6 +177,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, struct bch_io_failures *failed) { struct bch_fs *c = trans->c; + struct bch_read_bio *orig = *rbio; struct promote_op *op = NULL; struct bio *bio; unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); @@ -206,7 +207,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, goto err; } - rbio_init(&(*rbio)->bio, opts); + rbio_init_fragment(&(*rbio)->bio, orig); bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) { @@ -215,7 +216,6 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, } (*rbio)->bounce = true; - (*rbio)->split = true; (*rbio)->kmalloc = true; if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, @@ -1024,16 +1024,15 @@ get_bio: } else if (bounce) { unsigned sectors = pick.crc.compressed_size; - rbio = rbio_init(bio_alloc_bioset(NULL, + rbio = rbio_init_fragment(bio_alloc_bioset(NULL, DIV_ROUND_UP(sectors, PAGE_SECTORS), 0, GFP_NOFS, &c->bio_read_split), - orig->opts); + orig); bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); rbio->bounce = true; - rbio->split = true; } else if (flags & BCH_READ_must_clone) { /* * Have to clone if there were any splits, due to error @@ -1043,11 +1042,10 @@ get_bio: * from the whole bio, in which case we don't want to retry and * lose the error) */ - rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, + rbio = rbio_init_fragment(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, &c->bio_read_split), - orig->opts); + orig); rbio->bio.bi_iter = iter; - rbio->split = true; } else { rbio = orig; rbio->bio.bi_iter = iter; @@ -1058,9 +1056,7 @@ get_bio: rbio->c = c; rbio->submit_time = local_clock(); - if (rbio->split) - rbio->parent = orig; - else + if (!rbio->split) rbio->end_io = orig->bio.bi_end_io; rbio->bvec_iter = iter; rbio->offset_into_extent= offset_into_extent; diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index ef5603daf122..11fdf73a38b1 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -162,6 +162,20 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, BCH_READ_user_mapped); } + +static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, + struct bch_read_bio *orig) +{ + struct bch_read_bio *rbio = to_rbio(bio); + + rbio->_state = 0; + rbio->split = true; + rbio->parent = orig; + rbio->promote = NULL; + rbio->opts = orig->opts; + return rbio; +} + static inline struct bch_read_bio *rbio_init(struct bio *bio, struct bch_io_opts opts) { -- 2.51.0 From dfa204b169ed29e4cdcb5a20d4cba2f579235543 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Jan 2025 12:59:43 -0500 Subject: [PATCH 03/16] bcachefs: rbio_init() cleanup Move more initialization to rbio_init(), to assist in further cleanups. Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io-buffered.c | 13 ++++++------- fs/bcachefs/fs-io-direct.c | 16 ++++++++++++---- fs/bcachefs/io_read.c | 18 ++++++++---------- fs/bcachefs/io_read.h | 17 ++++++++++------- fs/bcachefs/move.c | 8 +++++--- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index dc00edfefc91..0ec2eebdeffa 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -163,8 +163,6 @@ static void bchfs_read(struct btree_trans *trans, BCH_READ_may_promote; int ret = 0; - rbio->c = c; - rbio->start_time = local_clock(); rbio->subvol = inum.subvol; bch2_bkey_buf_init(&sk); @@ -290,12 +288,13 @@ void bch2_readahead(struct readahead_control *ractl) struct bch_read_bio *rbio = rbio_init(bio_alloc_bioset(NULL, n, REQ_OP_READ, GFP_KERNEL, &c->bio_read), - opts); + c, + opts, + bch2_readpages_end_io); readpage_iter_advance(&readpages_iter); rbio->bio.bi_iter.bi_sector = folio_sector(folio); - rbio->bio.bi_end_io = bch2_readpages_end_io; BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); bchfs_read(trans, rbio, inode_inum(inode), @@ -333,10 +332,10 @@ int bch2_read_single_folio(struct folio *folio, struct address_space *mapping) bch2_inode_opts_get(&opts, c, &inode->ei_inode); rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_KERNEL, &c->bio_read), - opts); + c, + opts, + bch2_read_single_folio_end_io); rbio->bio.bi_private = &done; - rbio->bio.bi_end_io = bch2_read_single_folio_end_io; - rbio->bio.bi_opf = REQ_OP_READ|REQ_SYNC; rbio->bio.bi_iter.bi_sector = folio_sector(folio); BUG_ON(!bio_add_folio(&rbio->bio, folio, folio_size(folio), 0)); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index d2f2cbaba7a8..535bc5fcbcc0 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -73,6 +73,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) struct blk_plug plug; loff_t offset = req->ki_pos; bool sync = is_sync_kiocb(req); + bool split = false; size_t shorten; ssize_t ret; @@ -99,8 +100,6 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) GFP_KERNEL, &c->dio_read_bioset); - bio->bi_end_io = bch2_direct_IO_read_endio; - dio = container_of(bio, struct dio_read, rbio.bio); closure_init(&dio->cl, NULL); @@ -133,12 +132,13 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) goto start; while (iter->count) { + split = true; + bio = bio_alloc_bioset(NULL, bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS), REQ_OP_READ, GFP_KERNEL, &c->bio_read); - bio->bi_end_io = bch2_direct_IO_read_split_endio; start: bio->bi_opf = REQ_OP_READ|REQ_SYNC; bio->bi_iter.bi_sector = offset >> 9; @@ -160,7 +160,15 @@ start: if (iter->count) closure_get(&dio->cl); - bch2_read(c, rbio_init(bio, opts), inode_inum(inode)); + struct bch_read_bio *rbio = + rbio_init(bio, + c, + opts, + split + ? bch2_direct_IO_read_split_endio + : bch2_direct_IO_read_endio); + + bch2_read(c, rbio, inode_inum(inode)); } blk_finish_plug(&plug); diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 3b474c679fb4..aa6536d8c62e 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -171,13 +171,12 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, struct bkey_s_c k, struct bpos pos, struct extent_ptr_decoded *pick, - struct bch_io_opts opts, unsigned sectors, + struct bch_read_bio *orig, struct bch_read_bio **rbio, struct bch_io_failures *failed) { struct bch_fs *c = trans->c; - struct bch_read_bio *orig = *rbio; struct promote_op *op = NULL; struct bio *bio; unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); @@ -230,11 +229,11 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, struct data_update_opts update_opts = {}; if (!have_io_error(failed)) { - update_opts.target = opts.promote_target; + update_opts.target = orig->opts.promote_target; update_opts.extra_replicas = 1; update_opts.write_flags = BCH_WRITE_alloc_nowait|BCH_WRITE_cached; } else { - update_opts.target = opts.foreground_target; + update_opts.target = orig->opts.foreground_target; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); unsigned ptr_bit = 1; @@ -247,7 +246,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), - opts, + orig->opts, update_opts, btree_id, k); /* @@ -279,8 +278,8 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, struct bvec_iter iter, struct bkey_s_c k, struct extent_ptr_decoded *pick, - struct bch_io_opts opts, unsigned flags, + struct bch_read_bio *orig, struct bch_read_bio **rbio, bool *bounce, bool *read_full, @@ -304,7 +303,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, struct promote_op *promote; int ret; - ret = should_promote(c, k, pos, opts, flags, failed); + ret = should_promote(c, k, pos, orig->opts, flags, failed); if (ret) goto nopromote; @@ -312,7 +311,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, k.k->type == KEY_TYPE_reflink_v ? BTREE_ID_reflink : BTREE_ID_extents, - k, pos, pick, opts, sectors, rbio, failed); + k, pos, pick, sectors, orig, rbio, failed); ret = PTR_ERR_OR_ZERO(promote); if (ret) goto nopromote; @@ -989,7 +988,7 @@ retry_pick: } if (orig->opts.promote_target || have_io_error(failed)) - promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, + promote = promote_alloc(trans, iter, k, &pick, flags, orig, &rbio, &bounce, &read_full, failed); if (!read_full) { @@ -1054,7 +1053,6 @@ get_bio: EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); - rbio->c = c; rbio->submit_time = local_clock(); if (!rbio->split) rbio->end_io = orig->bio.bi_end_io; diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 11fdf73a38b1..5be4f4b35568 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -152,8 +152,6 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, BUG_ON(rbio->_state); - rbio->c = c; - rbio->start_time = local_clock(); rbio->subvol = inum.subvol; __bch2_read(c, rbio, rbio->bio.bi_iter, inum, &failed, @@ -162,12 +160,12 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, BCH_READ_user_mapped); } - static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, struct bch_read_bio *orig) { struct bch_read_bio *rbio = to_rbio(bio); + rbio->c = orig->c; rbio->_state = 0; rbio->split = true; rbio->parent = orig; @@ -177,13 +175,18 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, } static inline struct bch_read_bio *rbio_init(struct bio *bio, - struct bch_io_opts opts) + struct bch_fs *c, + struct bch_io_opts opts, + bio_end_io_t end_io) { struct bch_read_bio *rbio = to_rbio(bio); - rbio->_state = 0; - rbio->promote = NULL; - rbio->opts = opts; + rbio->start_time = local_clock(); + rbio->c = c; + rbio->_state = 0; + rbio->promote = NULL; + rbio->opts = opts; + rbio->bio.bi_end_io = end_io; return rbio; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 70304d7c234a..b8ac6dd28471 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -307,8 +307,6 @@ int bch2_move_extent(struct moving_context *ctxt, GFP_KERNEL)) goto err_free; - io->rbio.c = c; - io->rbio.opts = io_opts; bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); io->rbio.bio.bi_vcnt = pages; io->rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); @@ -316,7 +314,11 @@ int bch2_move_extent(struct moving_context *ctxt, io->rbio.bio.bi_opf = REQ_OP_READ; io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); - io->rbio.bio.bi_end_io = move_read_endio; + + rbio_init(&io->rbio.bio, + c, + io_opts, + move_read_endio); ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, io_opts, data_opts, iter->btree_id, k); -- 2.51.0 From a70bd976303296940ebc7611660ed0dedfeb1fd7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Jan 2025 18:53:55 -0500 Subject: [PATCH 04/16] bcachefs: data_update now embeds bch_read_bio Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.h | 5 +++++ fs/bcachefs/move.c | 28 ++++++++++++---------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 7a200e6b770b..d1dd4819a8a3 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -4,6 +4,7 @@ #define _BCACHEFS_DATA_UPDATE_H #include "bkey_buf.h" +#include "io_read.h" #include "io_write_types.h" struct moving_context; @@ -28,7 +29,11 @@ struct data_update { struct data_update_opts data_opts; struct moving_context *ctxt; struct bch_move_stats *stats; + + struct bch_read_bio rbio; struct bch_write_op op; + /* Must be last since it is variable size */ + struct bio_vec bi_inline_vecs[]; }; void bch2_data_update_to_text(struct printbuf *, struct data_update *); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index b8ac6dd28471..6ff1459e3e2e 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -74,11 +74,7 @@ struct moving_io { unsigned read_sectors; unsigned write_sectors; - struct bch_read_bio rbio; - struct data_update write; - /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[]; }; static void move_free(struct moving_io *io) @@ -113,7 +109,7 @@ static void move_write_done(struct bch_write_op *op) static void move_write(struct moving_io *io) { - if (unlikely(io->rbio.bio.bi_status || io->rbio.hole)) { + if (unlikely(io->write.rbio.bio.bi_status || io->write.rbio.hole)) { move_free(io); return; } @@ -131,7 +127,7 @@ static void move_write(struct moving_io *io) atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); atomic_inc(&io->write.ctxt->write_ios); - bch2_data_update_read_done(&io->write, io->rbio.pick.crc); + bch2_data_update_read_done(&io->write, io->write.rbio.pick.crc); } struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) @@ -144,7 +140,7 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctx static void move_read_endio(struct bio *bio) { - struct moving_io *io = container_of(bio, struct moving_io, rbio.bio); + struct moving_io *io = container_of(bio, struct moving_io, write.rbio.bio); struct moving_context *ctxt = io->write.ctxt; atomic_sub(io->read_sectors, &ctxt->read_sectors); @@ -299,7 +295,7 @@ int bch2_move_extent(struct moving_context *ctxt, io->read_sectors = k.k->size; io->write_sectors = k.k->size; - bio_init(&io->write.op.wbio.bio, NULL, io->bi_inline_vecs, pages, 0); + bio_init(&io->write.op.wbio.bio, NULL, io->write.bi_inline_vecs, pages, 0); io->write.op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); @@ -307,15 +303,15 @@ int bch2_move_extent(struct moving_context *ctxt, GFP_KERNEL)) goto err_free; - bio_init(&io->rbio.bio, NULL, io->bi_inline_vecs, pages, 0); - io->rbio.bio.bi_vcnt = pages; - io->rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); - io->rbio.bio.bi_iter.bi_size = sectors << 9; + bio_init(&io->write.rbio.bio, NULL, io->write.bi_inline_vecs, pages, 0); + io->write.rbio.bio.bi_vcnt = pages; + io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); + io->write.rbio.bio.bi_iter.bi_size = sectors << 9; - io->rbio.bio.bi_opf = REQ_OP_READ; - io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); + io->write.rbio.bio.bi_opf = REQ_OP_READ; + io->write.rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); - rbio_init(&io->rbio.bio, + rbio_init(&io->write.rbio.bio, c, io_opts, move_read_endio); @@ -357,7 +353,7 @@ int bch2_move_extent(struct moving_context *ctxt, * ctxt when doing wakeup */ closure_get(&ctxt->cl); - bch2_read_extent(trans, &io->rbio, + bch2_read_extent(trans, &io->write.rbio, bkey_start_pos(k.k), iter->btree_id, k, 0, BCH_READ_data_update| -- 2.51.0 From 8f97793d67a2bd75345c8e2ac1664950186bf2fb Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 15 Jan 2025 22:22:29 -0500 Subject: [PATCH 05/16] bcachefs: promote_op uses embedded bch_read_bio Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 104 ++++++++++++++++++------------------------ fs/bcachefs/io_read.h | 8 +--- 2 files changed, 47 insertions(+), 65 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index aa6536d8c62e..5fcb1947db6e 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -166,15 +166,14 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) bch2_data_update_read_done(&op->write, rbio->pick.crc); } -static struct promote_op *__promote_alloc(struct btree_trans *trans, - enum btree_id btree_id, - struct bkey_s_c k, - struct bpos pos, - struct extent_ptr_decoded *pick, - unsigned sectors, - struct bch_read_bio *orig, - struct bch_read_bio **rbio, - struct bch_io_failures *failed) +static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, + enum btree_id btree_id, + struct bkey_s_c k, + struct bpos pos, + struct extent_ptr_decoded *pick, + unsigned sectors, + struct bch_read_bio *orig, + struct bch_io_failures *failed) { struct bch_fs *c = trans->c; struct promote_op *op = NULL; @@ -188,34 +187,25 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL); if (!op) { ret = -BCH_ERR_nopromote_enomem; - goto err; + goto err_put; } op->start_time = local_clock(); op->pos = pos; - /* - * We don't use the mempool here because extents that aren't - * checksummed or compressed can be too big for the mempool: - */ - *rbio = kzalloc(sizeof(struct bch_read_bio) + - sizeof(struct bio_vec) * pages, - GFP_KERNEL); - if (!*rbio) { - ret = -BCH_ERR_nopromote_enomem; - goto err; - } - - rbio_init_fragment(&(*rbio)->bio, orig); - bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); + rbio_init_fragment(&op->write.rbio.bio, orig); + bio_init(&op->write.rbio.bio, + NULL, + op->write.bi_inline_vecs, + pages, 0); - if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) { + if (bch2_bio_alloc_pages(&op->write.rbio.bio, sectors << 9, GFP_KERNEL)) { ret = -BCH_ERR_nopromote_enomem; goto err; } - (*rbio)->bounce = true; - (*rbio)->kmalloc = true; + op->write.rbio.bounce = true; + op->write.rbio.promote = true; if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, bch_promote_params)) { @@ -260,27 +250,23 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, } op->write.op.end_io = promote_done; - - return op; + return &op->write.rbio; err: - if (*rbio) - bio_free_pages(&(*rbio)->bio); - kfree(*rbio); - *rbio = NULL; + bio_free_pages(&op->write.rbio.bio); /* We may have added to the rhashtable and thus need rcu freeing: */ kfree_rcu(op, rcu); +err_put: bch2_write_ref_put(c, BCH_WRITE_REF_promote); return ERR_PTR(ret); } noinline -static struct promote_op *promote_alloc(struct btree_trans *trans, +static struct bch_read_bio *promote_alloc(struct btree_trans *trans, struct bvec_iter iter, struct bkey_s_c k, struct extent_ptr_decoded *pick, unsigned flags, struct bch_read_bio *orig, - struct bch_read_bio **rbio, bool *bounce, bool *read_full, struct bch_io_failures *failed) @@ -300,18 +286,18 @@ static struct promote_op *promote_alloc(struct btree_trans *trans, struct bpos pos = promote_full ? bkey_start_pos(k.k) : POS(k.k->p.inode, iter.bi_sector); - struct promote_op *promote; int ret; ret = should_promote(c, k, pos, orig->opts, flags, failed); if (ret) goto nopromote; - promote = __promote_alloc(trans, - k.k->type == KEY_TYPE_reflink_v - ? BTREE_ID_reflink - : BTREE_ID_extents, - k, pos, pick, sectors, orig, rbio, failed); + struct bch_read_bio *promote = + __promote_alloc(trans, + k.k->type == KEY_TYPE_reflink_v + ? BTREE_ID_reflink + : BTREE_ID_extents, + k, pos, pick, sectors, orig, failed); ret = PTR_ERR_OR_ZERO(promote); if (ret) goto nopromote; @@ -374,20 +360,24 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) { BUG_ON(rbio->bounce && !rbio->split); - if (rbio->promote) - promote_free(rbio->c, rbio->promote); - rbio->promote = NULL; - - if (rbio->bounce) - bch2_bio_free_pages_pool(rbio->c, &rbio->bio); - if (rbio->split) { struct bch_read_bio *parent = rbio->parent; - if (rbio->kmalloc) - kfree(rbio); - else + if (rbio->promote) { + struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); + + if (!rbio->bio.bi_status) { + promote_start(op, rbio); + } else { + bch2_bio_free_pages_pool(rbio->c, &rbio->bio); + promote_free(rbio->c, op); + } + } else { + if (rbio->bounce) + bch2_bio_free_pages_pool(rbio->c, &rbio->bio); + bio_put(&rbio->bio); + } rbio = parent; } @@ -482,7 +472,8 @@ static void bch2_rbio_retry(struct work_struct *work) if (rbio->retry == READ_RETRY_AVOID) bch2_mark_io_failure(&failed, &rbio->pick); - rbio->bio.bi_status = 0; + if (!rbio->split) + rbio->bio.bi_status = 0; rbio = bch2_rbio_free(rbio); @@ -753,9 +744,6 @@ static void __bch2_read_endio(struct work_struct *work) ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); if (ret) goto decrypt_err; - - promote_start(rbio->promote, rbio); - rbio->promote = NULL; } nodecode: if (likely(!(rbio->flags & BCH_READ_in_retry))) { @@ -887,7 +875,6 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct bch_read_bio *rbio = NULL; - struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; struct bpos data_pos = bkey_start_pos(k.k); int pick_ret; @@ -988,8 +975,8 @@ retry_pick: } if (orig->opts.promote_target || have_io_error(failed)) - promote = promote_alloc(trans, iter, k, &pick, flags, orig, - &rbio, &bounce, &read_full, failed); + rbio = promote_alloc(trans, iter, k, &pick, flags, orig, + &bounce, &read_full, failed); if (!read_full) { EBUG_ON(crc_is_compressed(pick.crc)); @@ -1070,7 +1057,6 @@ get_bio: rbio->data_btree = data_btree; rbio->data_pos = data_pos; rbio->version = k.k->bversion; - rbio->promote = promote; INIT_WORK(&rbio->work, NULL); if (flags & BCH_READ_data_update) diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 5be4f4b35568..f54c9943e34a 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -35,9 +35,9 @@ struct bch_read_bio { u16 flags; union { struct { - u16 bounce:1, + u16 promote:1, + bounce:1, split:1, - kmalloc:1, have_ioref:1, narrow_crcs:1, hole:1, @@ -63,8 +63,6 @@ struct bch_read_bio { struct bpos data_pos; struct bversion version; - struct promote_op *promote; - struct bch_io_opts opts; struct work_struct work; @@ -169,7 +167,6 @@ static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, rbio->_state = 0; rbio->split = true; rbio->parent = orig; - rbio->promote = NULL; rbio->opts = orig->opts; return rbio; } @@ -184,7 +181,6 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, rbio->start_time = local_clock(); rbio->c = c; rbio->_state = 0; - rbio->promote = NULL; rbio->opts = opts; rbio->bio.bi_end_io = end_io; return rbio; -- 2.51.0 From 536d789781c66e3e3ae447b8116952d5ce689e6b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 17 Jan 2025 14:26:30 -0500 Subject: [PATCH 06/16] bcachefs: bch2_update_unwritten_extent() no longer depends on wbio Prep work for improving bch2_data_update_init(). Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 26 +++++++++++++++----------- fs/bcachefs/errcode.h | 4 ++++ fs/bcachefs/move.c | 2 +- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index bfd8ba162630..f5b7e6050f40 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -456,23 +456,23 @@ void bch2_data_update_exit(struct data_update *update) bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } -static void bch2_update_unwritten_extent(struct btree_trans *trans, - struct data_update *update) +static int bch2_update_unwritten_extent(struct btree_trans *trans, + struct data_update *update) { struct bch_fs *c = update->op.c; - struct bio *bio = &update->op.wbio.bio; struct bkey_i_extent *e; struct write_point *wp; struct closure cl; struct btree_iter iter; struct bkey_s_c k; - int ret; + int ret = 0; closure_init_stack(&cl); bch2_keylist_init(&update->op.insert_keys, update->op.inline_keys); - while (bio_sectors(bio)) { - unsigned sectors = bio_sectors(bio); + while (bpos_lt(update->op.pos, update->k.k->k.p)) { + unsigned sectors = update->k.k->k.p.offset - + update->op.pos.offset; bch2_trans_begin(trans); @@ -508,7 +508,7 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, bch_err_fn_ratelimited(c, ret); if (ret) - return; + break; sectors = min(sectors, wp->sectors_free); @@ -518,7 +518,6 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); bch2_alloc_sectors_done(c, wp); - bio_advance(bio, sectors << 9); update->op.pos.offset += sectors; extent_for_each_ptr(extent_i_to_s(e), ptr) @@ -537,6 +536,8 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans, bch2_trans_unlock(trans); closure_sync(&cl); } + + return ret; } void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, @@ -657,10 +658,10 @@ int bch2_data_update_init(struct btree_trans *trans, * snapshots table - just skip it, we can move it later. */ if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot))) - return -BCH_ERR_data_update_done; + return -BCH_ERR_data_update_done_no_snapshot; if (!bkey_get_dev_refs(c, k)) - return -BCH_ERR_data_update_done; + return -BCH_ERR_data_update_done_no_dev_refs; if (c->opts.nocow_enabled && !bkey_nocow_lock(c, ctxt, k)) { @@ -758,6 +759,8 @@ int bch2_data_update_init(struct btree_trans *trans, /* if iter == NULL, it's just a promote */ if (iter) ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts); + if (!ret) + ret = -BCH_ERR_data_update_done_no_writes_needed; goto out; } @@ -771,7 +774,8 @@ int bch2_data_update_init(struct btree_trans *trans, } if (bkey_extent_is_unwritten(k)) { - bch2_update_unwritten_extent(trans, m); + ret = bch2_update_unwritten_extent(trans, m) ?: + -BCH_ERR_data_update_done_unwritten; goto out; } diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 712877036612..82f950ea1c26 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -181,6 +181,10 @@ x(EINVAL, not_in_recovery) \ x(EINVAL, cannot_rewind_recovery) \ x(0, data_update_done) \ + x(BCH_ERR_data_update_done, data_update_done_unwritten) \ + x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \ + x(BCH_ERR_data_update_done, data_update_done_no_snapshot) \ + x(BCH_ERR_data_update_done, data_update_done_no_dev_refs) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ x(EINVAL, mismatched_block_size) \ diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 6ff1459e3e2e..03f071827c5c 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -364,7 +364,7 @@ err_free_pages: err_free: kfree(io); err: - if (ret == -BCH_ERR_data_update_done) + if (bch2_err_matches(ret, BCH_ERR_data_update_done)) return 0; if (bch2_err_matches(ret, EROFS) || -- 2.51.0 From 6f7111f820d5b956bab7ff744ecae953e2bf8e4f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Jan 2025 00:40:43 -0500 Subject: [PATCH 07/16] bcachefs: cleanup redundant code around data_update_op initialization Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 35 +++++++++++++-- fs/bcachefs/data_update.h | 6 +-- fs/bcachefs/io_read.c | 92 ++++++++++++++------------------------- fs/bcachefs/move.c | 45 +++---------------- 4 files changed, 73 insertions(+), 105 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index f5b7e6050f40..5873abf0a0b2 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -20,6 +20,8 @@ #include "subvolume.h" #include "trace.h" +#include + static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -427,16 +429,15 @@ int bch2_data_update_index_update(struct bch_write_op *op) return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); } -void bch2_data_update_read_done(struct data_update *m, - struct bch_extent_crc_unpacked crc) +void bch2_data_update_read_done(struct data_update *m) { m->read_done = true; /* write bio must own pages: */ BUG_ON(!m->op.wbio.bio.bi_vcnt); - m->op.crc = crc; - m->op.wbio.bio.bi_iter.bi_size = crc.compressed_size << 9; + m->op.crc = m->rbio.pick.crc; + m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; this_cpu_add(m->op.c->counters[BCH_COUNTER_move_extent_write], m->k.k->k.size); @@ -454,6 +455,8 @@ void bch2_data_update_exit(struct data_update *update) bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); bch2_bio_free_pages_pool(c, &update->op.wbio.bio); + kfree(update->bvecs); + update->bvecs = NULL; } static int bch2_update_unwritten_extent(struct btree_trans *trans, @@ -779,7 +782,31 @@ int bch2_data_update_init(struct btree_trans *trans, goto out; } + /* write path might have to decompress data: */ + unsigned buf_bytes = 0; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9); + + unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE); + + m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL); + if (!m->bvecs) + goto enomem; + + bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ); + bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0); + + if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL)) + goto enomem; + + rbio_init(&m->rbio.bio, c, io_opts, NULL); + m->rbio.bio.bi_iter.bi_size = buf_bytes; + m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); + m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); + return 0; +enomem: + ret = -ENOMEM; out: bch2_data_update_exit(m); return ret ?: -BCH_ERR_data_update_done; diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index d1dd4819a8a3..f4cf5d17cc37 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -32,8 +32,7 @@ struct data_update { struct bch_read_bio rbio; struct bch_write_op op; - /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[]; + struct bio_vec *bvecs; }; void bch2_data_update_to_text(struct printbuf *, struct data_update *); @@ -41,8 +40,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *, struct data_update *); int bch2_data_update_index_update(struct bch_write_op *); -void bch2_data_update_read_done(struct data_update *, - struct bch_extent_crc_unpacked); +void bch2_data_update_read_done(struct data_update *); int bch2_extent_drop_ptrs(struct btree_trans *, struct btree_iter *, diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 5fcb1947db6e..b00d43c4fdcf 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -125,45 +125,37 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, return 0; } -static void promote_free(struct bch_fs *c, struct promote_op *op) +static noinline void promote_free(struct bch_read_bio *rbio) { - int ret; + struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); + struct bch_fs *c = rbio->c; + + int ret = rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params); + BUG_ON(ret); bch2_data_update_exit(&op->write); - ret = rhashtable_remove_fast(&c->promote_table, &op->hash, - bch_promote_params); - BUG_ON(ret); bch2_write_ref_put(c, BCH_WRITE_REF_promote); kfree_rcu(op, rcu); } static void promote_done(struct bch_write_op *wop) { - struct promote_op *op = - container_of(wop, struct promote_op, write.op); - struct bch_fs *c = op->write.op.c; + struct promote_op *op = container_of(wop, struct promote_op, write.op); + struct bch_fs *c = op->write.rbio.c; - bch2_time_stats_update(&c->times[BCH_TIME_data_promote], - op->start_time); - promote_free(c, op); + bch2_time_stats_update(&c->times[BCH_TIME_data_promote], op->start_time); + promote_free(&op->write.rbio); } -static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) +static noinline void promote_start(struct bch_read_bio *rbio) { - struct bio *bio = &op->write.op.wbio.bio; + struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); trace_and_count(op->write.op.c, read_promote, &rbio->bio); - /* we now own pages: */ - BUG_ON(!rbio->bounce); - BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); - - memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, - sizeof(struct bio_vec) * rbio->bio.bi_vcnt); - swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - - bch2_data_update_read_done(&op->write, rbio->pick.crc); + bch2_data_update_read_done(&op->write); } static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, @@ -176,15 +168,12 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, struct bch_io_failures *failed) { struct bch_fs *c = trans->c; - struct promote_op *op = NULL; - struct bio *bio; - unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); int ret; if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) return ERR_PTR(-BCH_ERR_nopromote_no_writes); - op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL); + struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL); if (!op) { ret = -BCH_ERR_nopromote_enomem; goto err_put; @@ -193,29 +182,12 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, op->start_time = local_clock(); op->pos = pos; - rbio_init_fragment(&op->write.rbio.bio, orig); - bio_init(&op->write.rbio.bio, - NULL, - op->write.bi_inline_vecs, - pages, 0); - - if (bch2_bio_alloc_pages(&op->write.rbio.bio, sectors << 9, GFP_KERNEL)) { - ret = -BCH_ERR_nopromote_enomem; - goto err; - } - - op->write.rbio.bounce = true; - op->write.rbio.promote = true; - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, bch_promote_params)) { ret = -BCH_ERR_nopromote_in_flight; goto err; } - bio = &op->write.op.wbio.bio; - bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - struct data_update_opts update_opts = {}; if (!have_io_error(failed)) { @@ -243,16 +215,20 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, * possible errors: -BCH_ERR_nocow_lock_blocked, * -BCH_ERR_ENOSPC_disk_reservation: */ - if (ret) { - BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, - bch_promote_params)); - goto err; - } + if (ret) + goto err_remove_hash; + rbio_init_fragment(&op->write.rbio.bio, orig); + op->write.rbio.bounce = true; + op->write.rbio.promote = true; op->write.op.end_io = promote_done; + return &op->write.rbio; +err_remove_hash: + BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, + bch_promote_params)); err: - bio_free_pages(&op->write.rbio.bio); + bio_free_pages(&op->write.op.wbio.bio); /* We may have added to the rhashtable and thus need rcu freeing: */ kfree_rcu(op, rcu); err_put: @@ -363,15 +339,11 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) if (rbio->split) { struct bch_read_bio *parent = rbio->parent; - if (rbio->promote) { - struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); - - if (!rbio->bio.bi_status) { - promote_start(op, rbio); - } else { - bch2_bio_free_pages_pool(rbio->c, &rbio->bio); - promote_free(rbio->c, op); - } + if (unlikely(rbio->promote)) { + if (!rbio->bio.bi_status) + promote_start(rbio); + else + promote_free(rbio); } else { if (rbio->bounce) bch2_bio_free_pages_pool(rbio->c, &rbio->bio); @@ -938,11 +910,13 @@ retry_pick: } if (flags & BCH_READ_data_update) { + struct data_update *u = container_of(orig, struct data_update, rbio); + /* * can happen if we retry, and the extent we were going to read * has been merged in the meantime: */ - if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) { + if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) percpu_ref_put(&ca->io_ref); goto hole; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 03f071827c5c..d825493cac25 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -127,7 +127,7 @@ static void move_write(struct moving_io *io) atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); atomic_inc(&io->write.ctxt->write_ios); - bch2_data_update_read_done(&io->write, io->write.rbio.pick.crc); + bch2_data_update_read_done(&io->write); } struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *ctxt) @@ -253,11 +253,6 @@ int bch2_move_extent(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - struct moving_io *io; - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - unsigned sectors = k.k->size, pages; int ret = -ENOMEM; trace_move_extent2(c, k, &io_opts, &data_opts); @@ -280,13 +275,7 @@ int bch2_move_extent(struct moving_context *ctxt, */ bch2_trans_unlock(trans); - /* write path might have to decompress data: */ - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - sectors = max_t(unsigned, sectors, p.crc.uncompressed_size); - - pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); - io = kzalloc(sizeof(struct moving_io) + - sizeof(struct bio_vec) * pages, GFP_KERNEL); + struct moving_io *io = kzalloc(sizeof(struct moving_io), GFP_KERNEL); if (!io) goto err; @@ -295,31 +284,13 @@ int bch2_move_extent(struct moving_context *ctxt, io->read_sectors = k.k->size; io->write_sectors = k.k->size; - bio_init(&io->write.op.wbio.bio, NULL, io->write.bi_inline_vecs, pages, 0); - io->write.op.wbio.bio.bi_ioprio = - IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); - - if (bch2_bio_alloc_pages(&io->write.op.wbio.bio, sectors << 9, - GFP_KERNEL)) - goto err_free; - - bio_init(&io->write.rbio.bio, NULL, io->write.bi_inline_vecs, pages, 0); - io->write.rbio.bio.bi_vcnt = pages; - io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); - io->write.rbio.bio.bi_iter.bi_size = sectors << 9; - - io->write.rbio.bio.bi_opf = REQ_OP_READ; - io->write.rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); - - rbio_init(&io->write.rbio.bio, - c, - io_opts, - move_read_endio); - ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, io_opts, data_opts, iter->btree_id, k); if (ret) - goto err_free_pages; + goto err_free; + + io->write.rbio.bio.bi_end_io = move_read_endio; + io->write.rbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); io->write.op.end_io = move_write_done; @@ -359,8 +330,6 @@ int bch2_move_extent(struct moving_context *ctxt, BCH_READ_data_update| BCH_READ_last_fragment); return 0; -err_free_pages: - bio_free_pages(&io->write.op.wbio.bio); err_free: kfree(io); err: @@ -624,7 +593,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt, if (bch2_err_matches(ret2, BCH_ERR_transaction_restart)) continue; - if (ret2 == -ENOMEM) { + if (bch2_err_matches(ret2, ENOMEM)) { /* memory allocation failure, wait for some IO to finish */ bch2_move_ctxt_wait_for_io(ctxt); continue; -- 2.51.0 From d0148e7169d5a62e5b03c2ecc3a91e508cb7d9ba Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 16 Jan 2025 03:43:03 -0500 Subject: [PATCH 08/16] bcachefs: Be stricter in bch2_read_retry_nodecode() Now that data_update embeds bch_read_bio, BCH_READ_NODECODE means that the read is embedded in a a data_update - and we can check in the retry path if the extent has changed and bail out. This likely fixes some subtle bugs with read errors and data moves. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 64 ++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index b00d43c4fdcf..829a4f1b2f03 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -369,61 +369,47 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) bio_endio(&rbio->bio); } -static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, +static noinline void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, struct bvec_iter bvec_iter, struct bch_io_failures *failed, unsigned flags) { + struct data_update *u = container_of(rbio, struct data_update, rbio); struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - struct bkey_buf sk; - struct bkey_s_c k; - int ret; - - flags &= ~BCH_READ_last_fragment; - flags |= BCH_READ_must_clone; - - bch2_bkey_buf_init(&sk); - - bch2_trans_iter_init(trans, &iter, rbio->data_btree, - rbio->read_pos, BTREE_ITER_slots); retry: bch2_trans_begin(trans); - rbio->bio.bi_status = 0; - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + struct btree_iter iter; + struct bkey_s_c k; + int ret = lockrestart_do(trans, + bkey_err(k = bch2_bkey_get_iter(trans, &iter, + u->btree_id, bkey_start_pos(&u->k.k->k), + 0))); if (ret) goto err; - bch2_bkey_buf_reassemble(&sk, c, k); - k = bkey_i_to_s_c(sk.k); - - if (!bch2_bkey_matches_ptr(c, k, - rbio->pick.ptr, - rbio->data_pos.offset - - rbio->pick.crc.offset)) { + if (!bkey_and_val_eq(k, bkey_i_to_s_c(u->k.k))) { /* extent we wanted to read no longer exists: */ rbio->hole = true; - goto out; + goto err; } ret = __bch2_read_extent(trans, rbio, bvec_iter, - rbio->read_pos, - rbio->data_btree, - k, 0, failed, flags); + bkey_start_pos(&u->k.k->k), + u->btree_id, + bkey_i_to_s_c(u->k.k), + 0, failed, flags); +err: + bch2_trans_iter_exit(trans, &iter); + if (ret == READ_RETRY) goto retry; if (ret) - goto err; -out: + rbio->bio.bi_status = BLK_STS_IOERR; + + BUG_ON(atomic_read(&rbio->bio.__bi_remaining) != 1); bch2_rbio_done(rbio); - bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); - bch2_bkey_buf_exit(&sk, c); - return; -err: - rbio->bio.bi_status = BLK_STS_IOERR; - goto out; } static void bch2_rbio_retry(struct work_struct *work) @@ -451,15 +437,13 @@ static void bch2_rbio_retry(struct work_struct *work) flags |= BCH_READ_in_retry; flags &= ~BCH_READ_may_promote; + flags &= ~BCH_READ_last_fragment; + flags |= BCH_READ_must_clone; - if (flags & BCH_READ_data_update) { + if (flags & BCH_READ_data_update) bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); - } else { - flags &= ~BCH_READ_last_fragment; - flags |= BCH_READ_must_clone; - + else __bch2_read(c, rbio, iter, inum, &failed, flags); - } } static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, -- 2.51.0 From 8ff92a9e4e49fc1fa01e8d23462097ccbe90e3b6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2025 13:11:24 -0500 Subject: [PATCH 09/16] bcachefs: Promotes should use BCH_WRITE_only_specified_devs Promotes, like most other internal moves, should only go to the specified target and not fall back to allocating from the full filesystem. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 829a4f1b2f03..7a66feb1c011 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -194,6 +194,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, update_opts.target = orig->opts.promote_target; update_opts.extra_replicas = 1; update_opts.write_flags = BCH_WRITE_alloc_nowait|BCH_WRITE_cached; + update_opts.write_flags |= BCH_WRITE_only_specified_devs; } else { update_opts.target = orig->opts.foreground_target; -- 2.51.0 From 29ad31c780d1e4e37244140442aaec41c3efb7d6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 18 Jan 2025 02:05:57 -0500 Subject: [PATCH 10/16] bcachefs: Self healing writes are BCH_WRITE_alloc_nowait If a drive is failing and we're moving data off of it, we can't necessairly depend on capacity/disk reservation calculations to avoid deadlocking/blocking on the allocator. And, we don't want to queue up infinite self healing moves anyways. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 7a66feb1c011..bdb554f6db8c 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -188,12 +188,12 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, goto err; } - struct data_update_opts update_opts = {}; + struct data_update_opts update_opts = { .write_flags = BCH_WRITE_alloc_nowait }; if (!have_io_error(failed)) { update_opts.target = orig->opts.promote_target; update_opts.extra_replicas = 1; - update_opts.write_flags = BCH_WRITE_alloc_nowait|BCH_WRITE_cached; + update_opts.write_flags |= BCH_WRITE_cached; update_opts.write_flags |= BCH_WRITE_only_specified_devs; } else { update_opts.target = orig->opts.foreground_target; -- 2.51.0 From c37d42a0e2be210e4e9b60a5a1092e1f139b64a0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2025 13:43:44 -0500 Subject: [PATCH 11/16] bcachefs: Rework init order in bch2_data_update_init() Initialize the write op first, so that in the next patch we can check if the allocator would block (for BCH_WRITE_alloc_nowait ops) and bail out before taking nocow locks/dev refs. Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 52 ++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 5873abf0a0b2..3e8ad94dca59 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -35,7 +35,7 @@ static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { - if (!bch2_dev_tryget(c, ptr->dev)) { + if (unlikely(!bch2_dev_tryget(c, ptr->dev))) { bkey_for_each_ptr(ptrs, ptr2) { if (ptr2 == ptr) break; @@ -449,14 +449,15 @@ void bch2_data_update_exit(struct data_update *update) struct bch_fs *c = update->op.c; struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + bch2_bio_free_pages_pool(c, &update->op.wbio.bio); + kfree(update->bvecs); + update->bvecs = NULL; + if (c->opts.nocow_enabled) bkey_nocow_unlock(c, k); bkey_put_dev_refs(c, k); - bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); - bch2_bio_free_pages_pool(c, &update->op.wbio.bio); - kfree(update->bvecs); - update->bvecs = NULL; + bch2_bkey_buf_exit(&update->k, c); } static int bch2_update_unwritten_extent(struct btree_trans *trans, @@ -663,15 +664,6 @@ int bch2_data_update_init(struct btree_trans *trans, if (unlikely(k.k->p.snapshot && !bch2_snapshot_exists(c, k.k->p.snapshot))) return -BCH_ERR_data_update_done_no_snapshot; - if (!bkey_get_dev_refs(c, k)) - return -BCH_ERR_data_update_done_no_dev_refs; - - if (c->opts.nocow_enabled && - !bkey_nocow_lock(c, ctxt, k)) { - bkey_put_dev_refs(c, k); - return -BCH_ERR_nocow_lock_blocked; - } - bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -764,7 +756,7 @@ int bch2_data_update_init(struct btree_trans *trans, ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts); if (!ret) ret = -BCH_ERR_data_update_done_no_writes_needed; - goto out; + goto out_bkey_buf_exit; } if (reserve_sectors) { @@ -773,13 +765,24 @@ int bch2_data_update_init(struct btree_trans *trans, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - goto out; + goto out_bkey_buf_exit; + } + + if (!bkey_get_dev_refs(c, k)) { + ret = -BCH_ERR_data_update_done_no_dev_refs; + goto out_put_disk_res; + } + + if (c->opts.nocow_enabled && + !bkey_nocow_lock(c, ctxt, k)) { + ret = -BCH_ERR_nocow_lock_blocked; + goto out_put_dev_refs; } if (bkey_extent_is_unwritten(k)) { ret = bch2_update_unwritten_extent(trans, m) ?: -BCH_ERR_data_update_done_unwritten; - goto out; + goto out_nocow_unlock; } /* write path might have to decompress data: */ @@ -807,9 +810,18 @@ int bch2_data_update_init(struct btree_trans *trans, return 0; enomem: ret = -ENOMEM; -out: - bch2_data_update_exit(m); - return ret ?: -BCH_ERR_data_update_done; + kfree(m->bvecs); + m->bvecs = NULL; +out_nocow_unlock: + if (c->opts.nocow_enabled) + bkey_nocow_unlock(c, k); +out_put_dev_refs: + bkey_put_dev_refs(c, k); +out_put_disk_res: + bch2_disk_reservation_put(c, &m->op.res); +out_bkey_buf_exit: + bch2_bkey_buf_exit(&m->k, c); + return ret; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) -- 2.51.0 From 7e9ed60f5fe58dd4b4b6dcf63e57154c6262a2af Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2025 13:55:33 -0500 Subject: [PATCH 12/16] bcachefs: Bail out early on alloc_nowait data updates If a data update doesn't want to block on allocations (promotes, self healing on read error) - check if the allocation would fail before kicking off the data update and calling into the write path. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_foreground.c | 19 +--------------- fs/bcachefs/alloc_foreground.h | 17 +++++++++++++++ fs/bcachefs/data_update.c | 40 ++++++++++++++++++++++++++++++++++ fs/bcachefs/errcode.h | 1 + 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1a539e7bedc8..1759c15a7745 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -179,23 +179,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) closure_wake_up(&c->freelist_wait); } -static inline unsigned open_buckets_reserved(enum bch_watermark watermark) -{ - switch (watermark) { - case BCH_WATERMARK_interior_updates: - return 0; - case BCH_WATERMARK_reclaim: - return OPEN_BUCKETS_COUNT / 6; - case BCH_WATERMARK_btree: - case BCH_WATERMARK_btree_copygc: - return OPEN_BUCKETS_COUNT / 4; - case BCH_WATERMARK_copygc: - return OPEN_BUCKETS_COUNT / 3; - default: - return OPEN_BUCKETS_COUNT / 2; - } -} - static inline bool may_alloc_bucket(struct bch_fs *c, struct bpos bucket, struct bucket_alloc_state *s) @@ -239,7 +222,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev * spin_lock(&c->freelist_lock); - if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(watermark))) { + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { if (cl) closure_wait(&c->open_buckets_wait, cl); diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index f25481a0d1a0..baf5dc163c8a 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -33,6 +33,23 @@ static inline struct bch_dev *ob_dev(struct bch_fs *c, struct open_bucket *ob) return bch2_dev_have_ref(c, ob->dev); } +static inline unsigned bch2_open_buckets_reserved(enum bch_watermark watermark) +{ + switch (watermark) { + case BCH_WATERMARK_interior_updates: + return 0; + case BCH_WATERMARK_reclaim: + return OPEN_BUCKETS_COUNT / 6; + case BCH_WATERMARK_btree: + case BCH_WATERMARK_btree_copygc: + return OPEN_BUCKETS_COUNT / 4; + case BCH_WATERMARK_copygc: + return OPEN_BUCKETS_COUNT / 3; + default: + return OPEN_BUCKETS_COUNT / 2; + } +} + struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum bch_watermark, enum bch_data_type, struct closure *); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 3e8ad94dca59..ec63dd494c80 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -639,6 +639,40 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } +static bool can_allocate_without_blocking(struct bch_fs *c, + struct data_update *m) +{ + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(m->op.watermark))) + return false; + + unsigned target = m->op.flags & BCH_WRITE_only_specified_devs + ? m->op.target + : 0; + struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target); + + darray_for_each(m->op.devs_have, i) + __clear_bit(*i, devs.d); + + rcu_read_lock(); + unsigned nr_replicas = 0, i; + for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { + struct bch_dev *ca = bch2_dev_rcu(c, i); + + struct bch_dev_usage usage; + bch2_dev_usage_read_fast(ca, &usage); + + if (!dev_buckets_free(ca, usage, m->op.watermark)) + continue; + + nr_replicas += ca->mi.durability; + if (nr_replicas >= m->op.nr_replicas) + break; + } + rcu_read_unlock(); + + return nr_replicas >= m->op.nr_replicas; +} + int bch2_data_update_init(struct btree_trans *trans, struct btree_iter *iter, struct moving_context *ctxt, @@ -759,6 +793,12 @@ int bch2_data_update_init(struct btree_trans *trans, goto out_bkey_buf_exit; } + if ((m->op.flags & BCH_WRITE_alloc_nowait) && + !can_allocate_without_blocking(c, m)) { + ret = -BCH_ERR_data_update_done_would_block; + goto out_bkey_buf_exit; + } + if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 82f950ea1c26..1e8f65f95d60 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -181,6 +181,7 @@ x(EINVAL, not_in_recovery) \ x(EINVAL, cannot_rewind_recovery) \ x(0, data_update_done) \ + x(BCH_ERR_data_update_done, data_update_done_would_block) \ x(BCH_ERR_data_update_done, data_update_done_unwritten) \ x(BCH_ERR_data_update_done, data_update_done_no_writes_needed) \ x(BCH_ERR_data_update_done, data_update_done_no_snapshot) \ -- 2.51.0 From 4dfb76e0ad22d959ecb477f1e982500047ce38a4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 17 Jan 2025 19:26:10 -0500 Subject: [PATCH 13/16] bcachefs: Don't start promotes from bch2_rbio_free() we don't want to block completion of the read - starting a promote calls into the write path, which will block. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index bdb554f6db8c..15494aba4547 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -80,6 +80,7 @@ struct promote_op { struct rhash_head hash; struct bpos pos; + struct work_struct work; struct data_update write; struct bio_vec bi_inline_vecs[]; /* must be last */ }; @@ -149,13 +150,21 @@ static void promote_done(struct bch_write_op *wop) promote_free(&op->write.rbio); } +static void promote_start_work(struct work_struct *work) +{ + struct promote_op *op = container_of(work, struct promote_op, work); + + bch2_data_update_read_done(&op->write); +} + static noinline void promote_start(struct bch_read_bio *rbio) { struct promote_op *op = container_of(rbio, struct promote_op, write.rbio); trace_and_count(op->write.op.c, read_promote, &rbio->bio); - bch2_data_update_read_done(&op->write); + INIT_WORK(&op->work, promote_start_work); + queue_work(rbio->c->write_ref_wq, &op->work); } static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, -- 2.51.0 From 7b1d6551060066a1fed2a1f83485b0ea37ca3001 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 19 Jan 2025 20:34:57 -0500 Subject: [PATCH 14/16] bcachefs: Don't self-heal if a data update is already rewriting Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 68 ++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 15494aba4547..bb5d1de25aa1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -97,6 +97,26 @@ static inline bool have_io_error(struct bch_io_failures *failed) return failed && failed->nr; } +static bool ptr_being_rewritten(struct bch_read_bio *orig, + unsigned dev, + unsigned flags) +{ + if (!(flags & BCH_READ_data_update)) + return false; + + struct data_update *u = container_of(orig, struct data_update, rbio); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(u->k.k)); + unsigned i = 0; + bkey_for_each_ptr(ptrs, ptr) { + if (ptr->dev == dev && + u->data_opts.rewrite_ptrs & BIT(i)) + return true; + i++; + } + + return false; +} + static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, struct bpos pos, struct bch_io_opts opts, @@ -173,30 +193,13 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, struct bpos pos, struct extent_ptr_decoded *pick, unsigned sectors, + unsigned flags, struct bch_read_bio *orig, struct bch_io_failures *failed) { struct bch_fs *c = trans->c; int ret; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) - return ERR_PTR(-BCH_ERR_nopromote_no_writes); - - struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) { - ret = -BCH_ERR_nopromote_enomem; - goto err_put; - } - - op->start_time = local_clock(); - op->pos = pos; - - if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, - bch_promote_params)) { - ret = -BCH_ERR_nopromote_in_flight; - goto err; - } - struct data_update_opts update_opts = { .write_flags = BCH_WRITE_alloc_nowait }; if (!have_io_error(failed)) { @@ -210,10 +213,32 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); unsigned ptr_bit = 1; bkey_for_each_ptr(ptrs, ptr) { - if (bch2_dev_io_failures(failed, ptr->dev)) + if (bch2_dev_io_failures(failed, ptr->dev) && + !ptr_being_rewritten(orig, ptr->dev, flags)) update_opts.rewrite_ptrs |= ptr_bit; ptr_bit <<= 1; } + + if (!update_opts.rewrite_ptrs) + return NULL; + } + + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) + return ERR_PTR(-BCH_ERR_nopromote_no_writes); + + struct promote_op *op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + ret = -BCH_ERR_nopromote_enomem; + goto err_put; + } + + op->start_time = local_clock(); + op->pos = pos; + + if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, + bch_promote_params)) { + ret = -BCH_ERR_nopromote_in_flight; + goto err; } ret = bch2_data_update_init(trans, NULL, NULL, &op->write, @@ -283,7 +308,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, k.k->type == KEY_TYPE_reflink_v ? BTREE_ID_reflink : BTREE_ID_extents, - k, pos, pick, sectors, orig, failed); + k, pos, pick, sectors, flags, orig, failed); + if (!promote) + return NULL; + ret = PTR_ERR_OR_ZERO(promote); if (ret) goto nopromote; -- 2.51.0 From dff6de9518848b5afa0bc6fec57e657701be67ec Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Dec 2024 16:32:57 -0500 Subject: [PATCH 15/16] bcachefs: Internal reads can now correct errors Rework the read path so that BCH_READ_NODECODE reads now also self-heal after a read error and a successful retry - prerequisite for scrub. - __bch2_read_endio() now handles a read that's both BCH_READ_NODECODE and a bounce. Normally, we don't want a BCH_READ_NODECODE read to ever allocate a split bch_read_bio: we want to maintain the relationship between the bch_read_bio and the data_update it's embedded in. But correcting read errors requires allocating a split/bounce rbio that's embedded in a promote_op. We do still have a 1-1 relationship, i.e. we only allocate a single split/bounce if it's a BCH_READ_NODECODE, so things hopefully don't get too crazy. - __bch2_read_extent() now is allowed to allocate the promote_op for rewriting after a failed read, even if it's BCH_READ_NODECODE. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 108 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index bb5d1de25aa1..18c8e54f455e 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -696,32 +696,40 @@ static void __bch2_read_endio(struct work_struct *work) if (unlikely(rbio->narrow_crcs)) bch2_rbio_narrow_crcs(rbio); - if (rbio->flags & BCH_READ_data_update) - goto nodecode; - - /* Adjust crc to point to subset of data we want: */ - crc.offset += rbio->offset_into_extent; - crc.live_size = bvec_iter_sectors(rbio->bvec_iter); + if (likely(!(rbio->flags & BCH_READ_data_update))) { + /* Adjust crc to point to subset of data we want: */ + crc.offset += rbio->offset_into_extent; + crc.live_size = bvec_iter_sectors(rbio->bvec_iter); + + if (crc_is_compressed(crc)) { + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + + if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && + !c->opts.no_data_io) + goto decompression_err; + } else { + /* don't need to decrypt the entire bio: */ + nonce = nonce_add(nonce, crc.offset << 9); + bio_advance(src, crc.offset << 9); - if (crc_is_compressed(crc)) { - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; + BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); + src->bi_iter.bi_size = dst_iter.bi_size; - if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && - !c->opts.no_data_io) - goto decompression_err; - } else { - /* don't need to decrypt the entire bio: */ - nonce = nonce_add(nonce, crc.offset << 9); - bio_advance(src, crc.offset << 9); + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; - BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); - src->bi_iter.bi_size = dst_iter.bi_size; + if (rbio->bounce) { + struct bvec_iter src_iter = src->bi_iter; - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } + } + } else { + if (rbio->split) + rbio->parent->pick = rbio->pick; if (rbio->bounce) { struct bvec_iter src_iter = src->bi_iter; @@ -739,7 +747,7 @@ static void __bch2_read_endio(struct work_struct *work) if (ret) goto decrypt_err; } -nodecode: + if (likely(!(rbio->flags & BCH_READ_in_retry))) { rbio = bch2_rbio_free(rbio); bch2_rbio_done(rbio); @@ -931,13 +939,35 @@ retry_pick: goto retry_pick; } - if (flags & BCH_READ_data_update) { - struct data_update *u = container_of(orig, struct data_update, rbio); + if (!(flags & BCH_READ_data_update)) { + if (!(flags & BCH_READ_last_fragment) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_must_clone; + + narrow_crcs = !(flags & BCH_READ_in_retry) && + bch2_can_narrow_extent_crcs(k, pick.crc); + + if (narrow_crcs && (flags & BCH_READ_user_mapped)) + flags |= BCH_READ_must_bounce; + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); + + if (crc_is_compressed(pick.crc) || + (pick.crc.csum_type != BCH_CSUM_none && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + (bch2_csum_type_is_encryption(pick.crc.csum_type) && + (flags & BCH_READ_user_mapped)) || + (flags & BCH_READ_must_bounce)))) { + read_full = true; + bounce = true; + } + } else { + read_full = true; /* * can happen if we retry, and the extent we were going to read * has been merged in the meantime: */ + struct data_update *u = container_of(orig, struct data_update, rbio); if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) percpu_ref_put(&ca->io_ref); @@ -945,29 +975,6 @@ retry_pick: } iter.bi_size = pick.crc.compressed_size << 9; - goto get_bio; - } - - if (!(flags & BCH_READ_last_fragment) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_must_clone; - - narrow_crcs = !(flags & BCH_READ_in_retry) && - bch2_can_narrow_extent_crcs(k, pick.crc); - - if (narrow_crcs && (flags & BCH_READ_user_mapped)) - flags |= BCH_READ_must_bounce; - - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); - - if (crc_is_compressed(pick.crc) || - (pick.crc.csum_type != BCH_CSUM_none && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - (bch2_csum_type_is_encryption(pick.crc.csum_type) && - (flags & BCH_READ_user_mapped)) || - (flags & BCH_READ_must_bounce)))) { - read_full = true; - bounce = true; } if (orig->opts.promote_target || have_io_error(failed)) @@ -991,7 +998,7 @@ retry_pick: pick.crc.offset = 0; pick.crc.live_size = bvec_iter_sectors(iter); } -get_bio: + if (rbio) { /* * promote already allocated bounce rbio: @@ -1055,9 +1062,6 @@ get_bio: rbio->version = k.k->bversion; INIT_WORK(&rbio->work, NULL); - if (flags & BCH_READ_data_update) - orig->pick = pick; - rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio; -- 2.51.0 From ca16fa6b860fe35ba97dc28bb1792b02767c01de Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Dec 2024 16:20:38 -0500 Subject: [PATCH 16/16] bcachefs: backpointer_get_key() doesn't pull in btree node We may not need to pull in a btree node when walking backpointers - don't do so unnecessarily when using backpointer_get_key(). It'll still fall back to backpointer_get_node() in a few situations, including btree roots (where an iterator can't point at just the key), and races due to the interior update path not having deleted a backpointer to an old node yet. Signed-off-by: Kent Overstreet --- fs/bcachefs/backpointers.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 1d30066e63dc..3aff2b24de4a 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -244,27 +244,31 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) return bkey_s_c_null; - if (likely(!bp.v->level)) { - bch2_trans_node_iter_init(trans, iter, - bp.v->btree_id, - bp.v->pos, - 0, 0, - iter_flags); - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); - if (bkey_err(k)) { - bch2_trans_iter_exit(trans, iter); - return k; - } + bch2_trans_node_iter_init(trans, iter, + bp.v->btree_id, + bp.v->pos, + 0, + bp.v->level, + iter_flags); + struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + if (bkey_err(k)) { + bch2_trans_iter_exit(trans, iter); + return k; + } - if (k.k && - extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) - return k; + if (k.k && + extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) + return k; - bch2_trans_iter_exit(trans, iter); + bch2_trans_iter_exit(trans, iter); + + if (!bp.v->level) { int ret = backpointer_target_not_found(trans, bp, k, last_flushed); return ret ? bkey_s_c_err(ret) : bkey_s_c_null; } else { struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); + if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) + return bkey_s_c_null; if (IS_ERR_OR_NULL(b)) return ((struct bkey_s_c) { .k = ERR_CAST(b) }); -- 2.51.0