]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
bcachefs: Self healing on read IO error
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 28 Jun 2024 17:28:30 +0000 (13:28 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 14 Jul 2024 23:00:16 +0000 (19:00 -0400)
This repurposes the promote path, which already knows how to call
data_update() after a read: we now automatically rewrite bad data when
we get a read error and then successfully retry from a different
replica.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/io_read.c

index 057df38fccf8c88924fd132878bfdf5998f74940..07973198e35fb72aa20ba1de1a2f327a089745a6 100644 (file)
@@ -37,8 +37,8 @@ static void bch2_extent_crc_pack(union bch_extent_crc *,
                                 struct bch_extent_crc_unpacked,
                                 enum bch_extent_entry_type);
 
-static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
-                                                  unsigned dev)
+struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *f,
+                                                unsigned dev)
 {
        struct bch_dev_io_failures *i;
 
@@ -52,7 +52,7 @@ static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
 void bch2_mark_io_failure(struct bch_io_failures *failed,
                          struct extent_ptr_decoded *p)
 {
-       struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+       struct bch_dev_io_failures *f = bch2_dev_io_failures(failed, p->ptr.dev);
 
        if (!f) {
                BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
@@ -140,7 +140,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
                if (p.ptr.cached && (!ca || dev_ptr_stale_rcu(ca, &p.ptr)))
                        continue;
 
-               f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+               f = failed ? bch2_dev_io_failures(failed, p.ptr.dev) : NULL;
                if (f)
                        p.idx = f->nr_failed < f->nr_retries
                                ? f->idx
index 530686aa6fd9805efd8a976d4b23beab7da5d856..facdb8a86eec849c90899d5cc33e910d194001f6 100644 (file)
@@ -399,6 +399,8 @@ out:                                                                        \
 
 /* utility code common to all keys with pointers: */
 
+struct bch_dev_io_failures *bch2_dev_io_failures(struct bch_io_failures *,
+                                                unsigned);
 void bch2_mark_io_failure(struct bch_io_failures *,
                          struct extent_ptr_decoded *);
 int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c,
index ebf39ef72fb2c6243bb20496a57bf6eb786d37d4..8b484c75757c1d070def1fc65add599b2ba0691a 100644 (file)
@@ -93,21 +93,24 @@ static const struct rhashtable_params bch_promote_params = {
 static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
                                  struct bpos pos,
                                  struct bch_io_opts opts,
-                                 unsigned flags)
+                                 unsigned flags,
+                                 struct bch_io_failures *failed)
 {
-       BUG_ON(!opts.promote_target);
+       if (!failed) {
+               BUG_ON(!opts.promote_target);
 
-       if (!(flags & BCH_READ_MAY_PROMOTE))
-               return -BCH_ERR_nopromote_may_not;
+               if (!(flags & BCH_READ_MAY_PROMOTE))
+                       return -BCH_ERR_nopromote_may_not;
 
-       if (bch2_bkey_has_target(c, k, opts.promote_target))
-               return -BCH_ERR_nopromote_already_promoted;
+               if (bch2_bkey_has_target(c, k, opts.promote_target))
+                       return -BCH_ERR_nopromote_already_promoted;
 
-       if (bkey_extent_is_unwritten(k))
-               return -BCH_ERR_nopromote_unwritten;
+               if (bkey_extent_is_unwritten(k))
+                       return -BCH_ERR_nopromote_unwritten;
 
-       if (bch2_target_congested(c, opts.promote_target))
-               return -BCH_ERR_nopromote_congested;
+               if (bch2_target_congested(c, opts.promote_target))
+                       return -BCH_ERR_nopromote_congested;
+       }
 
        if (rhashtable_lookup_fast(&c->promote_table, &pos,
                                   bch_promote_params))
@@ -164,7 +167,8 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
                                          struct extent_ptr_decoded *pick,
                                          struct bch_io_opts opts,
                                          unsigned sectors,
-                                         struct bch_read_bio **rbio)
+                                         struct bch_read_bio **rbio,
+                                         struct bch_io_failures *failed)
 {
        struct bch_fs *c = trans->c;
        struct promote_op *op = NULL;
@@ -217,14 +221,28 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        bio = &op->write.op.wbio.bio;
        bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
 
+       struct data_update_opts update_opts = {};
+
+       if (!failed) {
+               update_opts.target = opts.promote_target;
+               update_opts.extra_replicas = 1;
+               update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
+       } else {
+               update_opts.target = opts.foreground_target;
+
+               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+               unsigned i = 0;
+               bkey_for_each_ptr(ptrs, ptr) {
+                       if (bch2_dev_io_failures(failed, ptr->dev))
+                               update_opts.rewrite_ptrs |= BIT(i);
+                       i++;
+               }
+       }
+
        ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
                        writepoint_hashed((unsigned long) current),
                        opts,
-                       (struct data_update_opts) {
-                               .target         = opts.promote_target,
-                               .extra_replicas = 1,
-                               .write_flags    = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED,
-                       },
+                       update_opts,
                        btree_id, k);
        /*
         * possible errors: -BCH_ERR_nocow_lock_blocked,
@@ -258,10 +276,17 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
                                        unsigned flags,
                                        struct bch_read_bio **rbio,
                                        bool *bounce,
-                                       bool *read_full)
+                                       bool *read_full,
+                                       struct bch_io_failures *failed)
 {
        struct bch_fs *c = trans->c;
-       bool promote_full = *read_full || READ_ONCE(c->promote_whole_extents);
+       /*
+        * if failed != NULL we're not actually doing a promote, we're
+        * recovering from an io/checksum error
+        */
+       bool promote_full = (failed ||
+                            *read_full ||
+                            READ_ONCE(c->promote_whole_extents));
        /* data might have to be decompressed in the write path: */
        unsigned sectors = promote_full
                ? max(pick->crc.compressed_size, pick->crc.live_size)
@@ -272,7 +297,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
        struct promote_op *promote;
        int ret;
 
-       ret = should_promote(c, k, pos, opts, flags);
+       ret = should_promote(c, k, pos, opts, flags, failed);
        if (ret)
                goto nopromote;
 
@@ -280,7 +305,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
                                  k.k->type == KEY_TYPE_reflink_v
                                  ? BTREE_ID_reflink
                                  : BTREE_ID_extents,
-                                 k, pos, pick, opts, sectors, rbio);
+                                 k, pos, pick, opts, sectors, rbio, failed);
        ret = PTR_ERR_OR_ZERO(promote);
        if (ret)
                goto nopromote;
@@ -910,9 +935,9 @@ retry_pick:
                bounce = true;
        }
 
-       if (orig->opts.promote_target)
+       if (orig->opts.promote_target)// || failed)
                promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
-                                       &rbio, &bounce, &read_full);
+                                       &rbio, &bounce, &read_full, failed);
 
        if (!read_full) {
                EBUG_ON(crc_is_compressed(pick.crc));