]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
bcachefs: data_update now checks for extents that can't be moved
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 28 Feb 2025 16:37:36 +0000 (11:37 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 15 Mar 2025 01:02:15 +0000 (21:02 -0400)
If a device is ro or failed, we might not have anywhere to move a
replica.

Check for this early, before doing the read and attempting to write.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/data_update.c
fs/bcachefs/errcode.h

index 7e484afea5516823c8b190f3507e3003818beb8e..522574bc4197d650cba6ccc2dbd9d5ee8c125d90 100644 (file)
@@ -573,7 +573,6 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
 
        prt_str_indented(out, "extra replicas:\t");
        prt_u64(out, data_opts->extra_replicas);
-       prt_newline(out);
 }
 
 void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
@@ -707,6 +706,18 @@ int bch2_data_update_bios_init(struct data_update *m, struct bch_fs *c,
        return 0;
 }
 
+static bool can_write_extent(struct bch_fs *c,
+                            struct bch_devs_list *devs_have,
+                            unsigned target)
+{
+       struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_user, target);
+
+       darray_for_each(*devs_have, i)
+               __clear_bit(*i, devs.d);
+
+       return !bch2_is_zero(&devs, sizeof(devs));
+}
+
 int bch2_data_update_init(struct btree_trans *trans,
                          struct btree_iter *iter,
                          struct moving_context *ctxt,
@@ -788,6 +799,20 @@ int bch2_data_update_init(struct btree_trans *trans,
                ptr_bit <<= 1;
        }
 
+       if (!can_write_extent(c, &m->op.devs_have,
+                             m->op.flags & BCH_WRITE_only_specified_devs ? m->op.target : 0)) {
+               /*
+                * Check if we have rw devices not in devs_have: this can happen
+                * if we're trying to move data on a ro or failed device
+                *
+                * If we can't move it, we need to clear the rebalance_work bit,
+                * if applicable
+                *
+                * Also, copygc should skip ro/failed devices:
+                */
+               return -BCH_ERR_data_update_done_no_rw_devs;
+       }
+
        unsigned durability_required = max(0, (int) (io_opts->data_replicas - durability_have));
 
        /*
index 0d9a8198e95ea0abaa8f895000d6d5ed9347677f..ed4214e9bebaef6e40c6b5198da07bd083a6afd8 100644 (file)
        x(BCH_ERR_data_update_done,     data_update_done_no_writes_needed)      \
        x(BCH_ERR_data_update_done,     data_update_done_no_snapshot)           \
        x(BCH_ERR_data_update_done,     data_update_done_no_dev_refs)           \
+       x(BCH_ERR_data_update_done,     data_update_done_no_rw_devs)            \
        x(EINVAL,                       device_state_not_allowed)               \
        x(EINVAL,                       member_info_missing)                    \
        x(EINVAL,                       mismatched_block_size)                  \