]> www.infradead.org Git - users/willy/xarray.git/commitdiff
bcachefs: stripe support for replicas tracking
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 30 Oct 2018 18:32:47 +0000 (14:32 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/super-io.c
fs/bcachefs/super-io.h
fs/bcachefs/sysfs.c

index eb14fcf15a96bdaa3fc27a1b991ce3da751123e2..ecb7a97ee533db3dfe056429e4254427f1498654 100644 (file)
@@ -888,10 +888,11 @@ struct bch_sb_field {
        x(journal,      0)      \
        x(members,      1)      \
        x(crypt,        2)      \
-       x(replicas,     3)      \
+       x(replicas_v0,  3)      \
        x(quota,        4)      \
        x(disk_groups,  5)      \
-       x(clean,        6)
+       x(clean,        6)      \
+       x(replicas,     7)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1017,16 +1018,28 @@ enum bch_data_type {
        BCH_DATA_NR             = 6,
 };
 
+struct bch_replicas_entry_v0 {
+       __u8                    data_type;
+       __u8                    nr_devs;
+       __u8                    devs[];
+} __attribute__((packed));
+
+struct bch_sb_field_replicas_v0 {
+       struct bch_sb_field     field;
+       struct bch_replicas_entry_v0 entries[];
+} __attribute__((packed, aligned(8)));
+
 struct bch_replicas_entry {
        __u8                    data_type;
        __u8                    nr_devs;
+       __u8                    nr_required;
        __u8                    devs[];
-};
+} __attribute__((packed));
 
 struct bch_sb_field_replicas {
        struct bch_sb_field     field;
        struct bch_replicas_entry entries[];
-};
+} __attribute__((packed, aligned(8)));
 
 /* BCH_SB_FIELD_quota: */
 
index 696e01f4962f63de541f22f56a030169c02a95e1..1ae8133a1ef7722b2f6e5e5e2f3065c0a8854982 100644 (file)
@@ -130,7 +130,8 @@ int bch2_fs_recovery(struct bch_fs *c)
        int ret;
 
        mutex_lock(&c->sb_lock);
-       if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
+       if (!rcu_dereference_protected(c->replicas,
+                       lockdep_is_held(&c->sb_lock))->nr) {
                bch_info(c, "building replicas info");
                set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
        }
index fb11b97cdeeeb9f37f19ac59bc925c66e86cd4ef..ef62756e8908f0cc897d02c1bbd69927528e6c60 100644 (file)
@@ -45,7 +45,10 @@ static void replicas_entry_to_text(struct printbuf *out,
 {
        unsigned i;
 
-       pr_buf(out, "%u: [", e->data_type);
+       pr_buf(out, "%s: %u/%u [",
+              bch2_data_types[e->data_type],
+              e->nr_required,
+              e->nr_devs);
 
        for (i = 0; i < e->nr_devs; i++)
                pr_buf(out, i ? " %u" : "%u", e->devs[i]);
@@ -75,6 +78,8 @@ static void extent_to_replicas(struct bkey_s_c k,
                const union bch_extent_entry *entry;
                struct extent_ptr_decoded p;
 
+               r->nr_required  = 1;
+
                extent_for_each_ptr_decode(e, p, entry)
                        if (!p.ptr.cached)
                                r->devs[r->nr_devs++] = p.ptr.dev;
@@ -115,6 +120,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
 
        e->data_type    = data_type;
        e->nr_devs      = 0;
+       e->nr_required  = 1;
 
        for (i = 0; i < devs.nr; i++)
                e->devs[e->nr_devs++] = devs.devs[i];
@@ -359,14 +365,13 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
 {
        struct bch_replicas_entry *e, *dst;
        struct bch_replicas_cpu *cpu_r;
-       unsigned nr = 0, entry_size = 0;
+       unsigned nr = 0, entry_size = 0, idx = 0;
 
-       if (sb_r)
-               for_each_replicas_entry(sb_r, e) {
-                       entry_size = max_t(unsigned, entry_size,
-                                          replicas_entry_bytes(e));
-                       nr++;
-               }
+       for_each_replicas_entry(sb_r, e) {
+               entry_size = max_t(unsigned, entry_size,
+                                  replicas_entry_bytes(e));
+               nr++;
+       }
 
        cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
                        nr * entry_size, GFP_NOIO);
@@ -376,29 +381,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
        cpu_r->nr               = nr;
        cpu_r->entry_size       = entry_size;
 
-       nr = 0;
+       for_each_replicas_entry(sb_r, e) {
+               dst = cpu_replicas_entry(cpu_r, idx++);
+               memcpy(dst, e, replicas_entry_bytes(e));
+               replicas_entry_sort(dst);
+       }
 
-       if (sb_r)
-               for_each_replicas_entry(sb_r, e) {
-                       dst = cpu_replicas_entry(cpu_r, nr++);
-                       memcpy(dst, e, replicas_entry_bytes(e));
-                       replicas_entry_sort(dst);
-               }
+       return cpu_r;
+}
+
+static struct bch_replicas_cpu *
+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r)
+{
+       struct bch_replicas_entry_v0 *e;
+       struct bch_replicas_cpu *cpu_r;
+       unsigned nr = 0, entry_size = 0, idx = 0;
+
+       for_each_replicas_entry(sb_r, e) {
+               entry_size = max_t(unsigned, entry_size,
+                                  replicas_entry_bytes(e));
+               nr++;
+       }
+
+       entry_size += sizeof(struct bch_replicas_entry) -
+               sizeof(struct bch_replicas_entry_v0);
+
+       cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
+                       nr * entry_size, GFP_NOIO);
+       if (!cpu_r)
+               return NULL;
+
+       cpu_r->nr               = nr;
+       cpu_r->entry_size       = entry_size;
+
+       for_each_replicas_entry(sb_r, e) {
+               struct bch_replicas_entry *dst =
+                       cpu_replicas_entry(cpu_r, idx++);
+
+               dst->data_type  = e->data_type;
+               dst->nr_devs    = e->nr_devs;
+               dst->nr_required = 1;
+               memcpy(dst->devs, e->devs, e->nr_devs);
+               replicas_entry_sort(dst);
+       }
 
-       bch2_cpu_replicas_sort(cpu_r);
        return cpu_r;
 }
 
 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
 {
-       struct bch_sb_field_replicas *sb_r;
+       struct bch_sb_field_replicas *sb_v1;
+       struct bch_sb_field_replicas_v0 *sb_v0;
        struct bch_replicas_cpu *cpu_r, *old_r;
 
-       sb_r    = bch2_sb_get_replicas(c->disk_sb.sb);
-       cpu_r   = __bch2_sb_replicas_to_cpu_replicas(sb_r);
+       if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
+               cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1);
+       else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
+               cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0);
+       else
+               cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO);
+
        if (!cpu_r)
                return -ENOMEM;
 
+       bch2_cpu_replicas_sort(cpu_r);
+
        old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
        rcu_assign_pointer(c->replicas, cpu_r);
        if (old_r)
@@ -407,23 +454,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
        return 0;
 }
 
+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
+                                              struct bch_replicas_cpu *r)
+{
+       struct bch_sb_field_replicas_v0 *sb_r;
+       struct bch_replicas_entry_v0 *dst;
+       struct bch_replicas_entry *src;
+       size_t bytes;
+
+       bytes = sizeof(struct bch_sb_field_replicas);
+
+       for_each_cpu_replicas_entry(r, src)
+               bytes += replicas_entry_bytes(src) - 1;
+
+       sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
+                       DIV_ROUND_UP(bytes, sizeof(u64)));
+       if (!sb_r)
+               return -ENOSPC;
+
+       bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
+       sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
+
+       memset(&sb_r->entries, 0,
+              vstruct_end(&sb_r->field) -
+              (void *) &sb_r->entries);
+
+       dst = sb_r->entries;
+       for_each_cpu_replicas_entry(r, src) {
+               dst->data_type  = src->data_type;
+               dst->nr_devs    = src->nr_devs;
+               memcpy(dst->devs, src->devs, src->nr_devs);
+
+               dst = replicas_entry_next(dst);
+
+               BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+       }
+
+       return 0;
+}
+
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
                                            struct bch_replicas_cpu *r)
 {
        struct bch_sb_field_replicas *sb_r;
        struct bch_replicas_entry *dst, *src;
+       bool need_v1 = false;
        size_t bytes;
 
        bytes = sizeof(struct bch_sb_field_replicas);
 
-       for_each_cpu_replicas_entry(r, src)
+       for_each_cpu_replicas_entry(r, src) {
                bytes += replicas_entry_bytes(src);
+               if (src->nr_required != 1)
+                       need_v1 = true;
+       }
+
+       if (!need_v1)
+               return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
 
        sb_r = bch2_sb_resize_replicas(&c->disk_sb,
                        DIV_ROUND_UP(bytes, sizeof(u64)));
        if (!sb_r)
                return -ENOSPC;
 
+       bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
+       sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
+
        memset(&sb_r->entries, 0,
               vstruct_end(&sb_r->field) -
               (void *) &sb_r->entries);
@@ -482,8 +578,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
                if (!e->nr_devs)
                        goto err;
 
-               err = "invalid replicas entry: too many devices";
-               if (e->nr_devs >= BCH_REPLICAS_MAX)
+               err = "invalid replicas entry: bad nr_required";
+               if (!e->nr_required ||
+                   (e->nr_required > 1 &&
+                    e->nr_required >= e->nr_devs))
                        goto err;
 
                err = "invalid replicas entry: invalid device";
@@ -525,6 +623,45 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
        .to_text        = bch2_sb_replicas_to_text,
 };
 
+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
+{
+       struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
+       struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+       struct bch_replicas_cpu *cpu_r = NULL;
+       struct bch_replicas_entry_v0 *e;
+       const char *err;
+       unsigned i;
+
+       for_each_replicas_entry_v0(sb_r, e) {
+               err = "invalid replicas entry: invalid data type";
+               if (e->data_type >= BCH_DATA_NR)
+                       goto err;
+
+               err = "invalid replicas entry: no devices";
+               if (!e->nr_devs)
+                       goto err;
+
+               err = "invalid replicas entry: invalid device";
+               for (i = 0; i < e->nr_devs; i++)
+                       if (!bch2_dev_exists(sb, mi, e->devs[i]))
+                               goto err;
+       }
+
+       err = "cannot allocate memory";
+       cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r);
+       if (!cpu_r)
+               goto err;
+
+       err = check_dup_replicas_entries(cpu_r);
+err:
+       kfree(cpu_r);
+       return err;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
+       .validate       = bch2_sb_validate_replicas_v0,
+};
+
 /* Query replicas: */
 
 bool bch2_replicas_marked(struct bch_fs *c,
@@ -591,7 +728,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
        memset(&ret, 0, sizeof(ret));
 
        for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
-               ret.replicas[i].nr_online = UINT_MAX;
+               ret.replicas[i].redundancy = INT_MAX;
 
        mi = bch2_sb_get_members(c->disk_sb.sb);
        rcu_read_lock();
@@ -613,9 +750,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
                                nr_offline++;
                }
 
-               ret.replicas[e->data_type].nr_online =
-                       min(ret.replicas[e->data_type].nr_online,
-                           nr_online);
+               ret.replicas[e->data_type].redundancy =
+                       min(ret.replicas[e->data_type].redundancy,
+                           (int) nr_online - (int) e->nr_required);
 
                ret.replicas[e->data_type].nr_offline =
                        max(ret.replicas[e->data_type].nr_offline,
@@ -624,6 +761,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
 
        rcu_read_unlock();
 
+       for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
+               if (ret.replicas[i].redundancy == INT_MAX)
+                       ret.replicas[i].redundancy = 0;
+
        return ret;
 }
 
@@ -638,7 +779,7 @@ static bool have_enough_devs(struct replicas_status s,
                             bool force_if_lost)
 {
        return (!s.replicas[type].nr_offline || force_if_degraded) &&
-               (s.replicas[type].nr_online || force_if_lost);
+               (s.replicas[type].redundancy >= 0 || force_if_lost);
 }
 
 bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
@@ -654,14 +795,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
                                 flags & BCH_FORCE_IF_DATA_LOST));
 }
 
-unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
+int bch2_replicas_online(struct bch_fs *c, bool meta)
 {
        struct replicas_status s = bch2_replicas_status(c);
 
-       return meta
-               ? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
-                     s.replicas[BCH_DATA_BTREE].nr_online)
-               : s.replicas[BCH_DATA_USER].nr_online;
+       return (meta
+               ? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
+                     s.replicas[BCH_DATA_BTREE].redundancy)
+               : s.replicas[BCH_DATA_USER].redundancy) + 1;
 }
 
 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
index d3d81a1a39cd14fe3ccb8995c8f2227192eea54a..a343dd9cd97ff43401a444a51499d504b4b93d32 100644 (file)
@@ -17,7 +17,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
 
 struct replicas_status {
        struct {
-               unsigned        nr_online;
+               int             redundancy;
                unsigned        nr_offline;
        }                       replicas[BCH_DATA_NR];
 };
@@ -27,7 +27,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *,
 struct replicas_status bch2_replicas_status(struct bch_fs *);
 bool bch2_have_enough_devs(struct replicas_status, unsigned);
 
-unsigned bch2_replicas_online(struct bch_fs *, bool);
+int bch2_replicas_online(struct bch_fs *, bool);
 unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
 
 int bch2_replicas_gc_end(struct bch_fs *, int);
@@ -46,8 +46,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
             (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
             (_i) = replicas_entry_next(_i))
 
+#define for_each_replicas_entry_v0(_r, _i)                             \
+       for (_i = (_r)->entries;                                        \
+            (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+            (_i) = replicas_entry_next(_i))
+
 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
 
 extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
 
 #endif /* _BCACHEFS_REPLICAS_H */
index 22e28d1eeadcb777c3149c21918cf2547e7c3c20..071543033096a7b9dfe93b180a29997a94283d66 100644 (file)
@@ -60,8 +60,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
                void *src, *dst;
 
                src = vstruct_end(f);
-               f->u64s = cpu_to_le32(u64s);
-               dst = vstruct_end(f);
+
+               if (u64s) {
+                       f->u64s = cpu_to_le32(u64s);
+                       dst = vstruct_end(f);
+               } else {
+                       dst = f;
+               }
 
                memmove(dst, src, vstruct_end(sb->sb) - src);
 
@@ -71,7 +76,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
 
        sb->sb->u64s = cpu_to_le32(sb_u64s);
 
-       return f;
+       return u64s ? f : NULL;
+}
+
+void bch2_sb_field_delete(struct bch_sb_handle *sb,
+                         enum bch_sb_field_type type)
+{
+       struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
+
+       if (f)
+               __bch2_sb_field_resize(sb, f, 0);
 }
 
 /* Superblock realloc/free: */
@@ -174,7 +188,8 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
        }
 
        f = __bch2_sb_field_resize(sb, f, u64s);
-       f->type = cpu_to_le32(type);
+       if (f)
+               f->type = cpu_to_le32(type);
        return f;
 }
 
@@ -366,6 +381,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
 {
        struct bch_sb_field *src_f, *dst_f;
        struct bch_sb *dst = dst_handle->sb;
+       unsigned i;
 
        dst->version            = src->version;
        dst->seq                = src->seq;
@@ -384,15 +400,17 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
        memcpy(dst->features,   src->features,  sizeof(dst->features));
        memcpy(dst->compat,     src->compat,    sizeof(dst->compat));
 
-       vstruct_for_each(src, src_f) {
-               if (src_f->type == BCH_SB_FIELD_journal)
+       for (i = 0; i < BCH_SB_FIELD_NR; i++) {
+               if (i == BCH_SB_FIELD_journal)
                        continue;
 
-               dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type));
+               src_f = bch2_sb_field_get(src, i);
+               dst_f = bch2_sb_field_get(dst, i);
                dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
-                                              le32_to_cpu(src_f->u64s));
+                               src_f ? le32_to_cpu(src_f->u64s) : 0);
 
-               memcpy(dst_f, src_f, vstruct_bytes(src_f));
+               if (src_f)
+                       memcpy(dst_f, src_f, vstruct_bytes(src_f));
        }
 }
 
index ceef650d55dd51533693d3ed5f9bc6d2e352e9ad..aa618fe9cd22013b83adc27bb0b4df361ed5d9c5 100644 (file)
@@ -12,6 +12,7 @@
 struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
 struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
                                          enum bch_sb_field_type, unsigned);
+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
 
 #define field_to_type(_f, _name)                                       \
        container_of_or_null(_f, struct bch_sb_field_##_name, field)
index 4273aad166757c0f5be5eb07555e23d045b8afa6..6a5da0f127139f40ae2799a38453024ef868c2da 100644 (file)
@@ -346,8 +346,8 @@ SHOW(bch2_fs)
 
        sysfs_print(promote_whole_extents,      c->promote_whole_extents);
 
-       sysfs_printf(meta_replicas_have, "%u",  bch2_replicas_online(c, true));
-       sysfs_printf(data_replicas_have, "%u",  bch2_replicas_online(c, false));
+       sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
+       sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
 
        /* Debugging: */