bcachefs: Change BCH_MEMBER_STATE_failed semantics
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 7 Mar 2025 15:50:49 +0000 (10:50 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 15 Mar 2025 01:02:16 +0000 (21:02 -0400)
Previously, we woudn't try to read at all from a failed device - that
doesn't make much sense, the device may be unhealthy (perhaps taking
longer than it should to service reads), but if it's our only option we
should still try to read from it.

Now, bch2_bkey_pick_read_device() will pick failed devices only if there
are no non-failed replicas to read from.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/extents.c
fs/bcachefs/sb-members.h

index d9bdf433c118ac0095891a734bdb8a69fcc17e04..032cd0bda017da70c6b513b55febf18066fa428f 100644 (file)
@@ -79,12 +79,16 @@ void bch2_mark_io_failure(struct bch_io_failures *failed,
        }
 }
 
-static inline u64 dev_latency(struct bch_fs *c, unsigned dev)
+static inline u64 dev_latency(struct bch_dev *ca)
 {
-       struct bch_dev *ca = bch2_dev_rcu(c, dev);
        return ca ? atomic64_read(&ca->cur_latency[READ]) : S64_MAX;
 }
 
+static inline int dev_failed(struct bch_dev *ca)
+{
+       return !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
+}
+
 /*
  * returns true if p1 is better than p2:
  */
@@ -93,8 +97,16 @@ static inline bool ptr_better(struct bch_fs *c,
                              const struct extent_ptr_decoded p2)
 {
        if (likely(!p1.idx && !p2.idx)) {
-               u64 l1 = dev_latency(c, p1.ptr.dev);
-               u64 l2 = dev_latency(c, p2.ptr.dev);
+               struct bch_dev *ca1 = bch2_dev_rcu(c, p1.ptr.dev);
+               struct bch_dev *ca2 = bch2_dev_rcu(c, p2.ptr.dev);
+
+               int failed_delta = dev_failed(ca1) - dev_failed(ca2);
+
+               if (failed_delta)
+                       return failed_delta < 0;
+
+               u64 l1 = dev_latency(ca1);
+               u64 l2 = dev_latency(ca2);
 
                /*
                 * Square the latencies, to bias more in favor of the faster
@@ -170,7 +182,7 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
                                ? f->idx
                                : f->idx + 1;
 
-               if (!p.idx && (!ca || !bch2_dev_is_readable(ca)))
+               if (!p.idx && (!ca || !bch2_dev_is_online(ca)))
                        p.idx++;
 
                if (!p.idx && p.has_ec && bch2_force_reconstruct_read)
@@ -1012,7 +1024,7 @@ static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
 
        struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
 
-       return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr);
+       return ca && bch2_dev_is_healthy(ca) && !dev_ptr_stale_rcu(ca, ptr);
 }
 
 void bch2_extent_ptr_set_cached(struct bch_fs *c,
index df91b02ce57542816a707d9b4c64716ed3cdf8e6..38261638a611be8b60e418e1f502d1c85fe79eb0 100644 (file)
@@ -35,7 +35,7 @@ static inline bool bch2_dev_idx_is_online(struct bch_fs *c, unsigned dev)
        return ret;
 }
 
-static inline bool bch2_dev_is_readable(struct bch_dev *ca)
+static inline bool bch2_dev_is_healthy(struct bch_dev *ca)
 {
        return bch2_dev_is_online(ca) &&
                ca->mi.state != BCH_MEMBER_STATE_failed;