]> www.infradead.org Git - nvme.git/commitdiff
bcachefs: btree node scan: fall back to comparing by journal seq
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 12 Jul 2024 18:16:01 +0000 (14:16 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 14 Jul 2024 23:00:16 +0000 (19:00 -0400)
highly damaged filesystems, or filesystems that have been damaged and
repair and damaged again, may have sequence numbers we can't fully trust
- which in itself is something we need to debug.

Add a journal_seq fallback so that repair doesn't get stuck.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_io.c
fs/bcachefs/btree_node_scan.c
fs/bcachefs/btree_node_scan_types.h

index e092f541c4492f502536bcdc283d26f1151cbda8..db700caf8afa95b8c149818278d2850a88d57ecf 100644 (file)
@@ -1006,6 +1006,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v);
        unsigned u64s;
        unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key));
+       u64 max_journal_seq = 0;
        struct printbuf buf = PRINTBUF;
        int ret = 0, retry_read = 0, write = READ;
        u64 start_time = local_clock();
@@ -1181,6 +1182,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                sort_iter_add(iter,
                              vstruct_idx(i, 0),
                              vstruct_last(i));
+
+               max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq));
        }
 
        if (ptr_written) {
@@ -1217,6 +1220,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
        swap(sorted, b->data);
        set_btree_bset(b, b->set, &b->data->keys);
        b->nsets = 1;
+       b->data->keys.journal_seq = cpu_to_le64(max_journal_seq);
 
        BUG_ON(b->nr.live_u64s != u64s);
 
index 2cb0442f6cc9055915b62ec1bc658a597754fb1a..0011072263778b3bfab0a870e0f62b486711ca98 100644 (file)
@@ -22,7 +22,9 @@ struct find_btree_nodes_worker {
 
 static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
 {
-       prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
+       prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ",
+                  bch2_btree_id_str(n->btree_id), n->level, n->seq,
+                  n->journal_seq, n->cookie);
        bch2_bpos_to_text(out, n->min_key);
        prt_str(out, "-");
        bch2_bpos_to_text(out, n->max_key);
@@ -63,19 +65,37 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n
        memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
 }
 
+static inline u64 bkey_journal_seq(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode_v3:
+               return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq);
+       default:
+               return 0;
+       }
+}
+
 static bool found_btree_node_is_readable(struct btree_trans *trans,
                                         struct found_btree_node *f)
 {
-       struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
+       struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
 
-       found_btree_node_to_key(&k.k, f);
+       found_btree_node_to_key(&tmp.k, f);
 
-       struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
+       struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false);
        bool ret = !IS_ERR_OR_NULL(b);
        if (!ret)
                return ret;
 
        f->sectors_written = b->written;
+       f->journal_seq = le64_to_cpu(b->data->keys.journal_seq);
+
+       struct bkey_s_c k;
+       struct bkey unpacked;
+       struct btree_node_iter iter;
+       for_each_btree_node_key_unpack(b, k, &iter, &unpacked)
+               f->journal_seq = max(f->journal_seq, bkey_journal_seq(k));
+
        six_unlock_read(&b->c.lock);
 
        /*
@@ -84,7 +104,7 @@ static bool found_btree_node_is_readable(struct btree_trans *trans,
         * this node
         */
        if (b != btree_node_root(trans->c, b))
-               bch2_btree_node_evict(trans, &k.k);
+               bch2_btree_node_evict(trans, &tmp.k);
        return ret;
 }
 
@@ -105,7 +125,8 @@ static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
 static int found_btree_node_cmp_time(const struct found_btree_node *l,
                                     const struct found_btree_node *r)
 {
-       return cmp_int(l->seq, r->seq);
+       return  cmp_int(l->seq, r->seq) ?:
+               cmp_int(l->journal_seq, r->journal_seq);
 }
 
 static int found_btree_node_cmp_pos(const void *_l, const void *_r)
@@ -309,15 +330,15 @@ again:
                } else if (n->level) {
                        n->overwritten = true;
                } else {
-                       struct printbuf buf = PRINTBUF;
-
-                       prt_str(&buf, "overlapping btree nodes with same seq! halting\n  ");
-                       found_btree_node_to_text(&buf, c, start);
-                       prt_str(&buf, "\n  ");
-                       found_btree_node_to_text(&buf, c, n);
-                       bch_err(c, "%s", buf.buf);
-                       printbuf_exit(&buf);
-                       return -BCH_ERR_fsck_repair_unimplemented;
+                       if (bpos_cmp(start->max_key, n->max_key) >= 0)
+                               n->overwritten = true;
+                       else {
+                               n->range_updated = true;
+                               n->min_key = bpos_successor(start->max_key);
+                               n->range_updated = true;
+                               bubble_up(n, end);
+                               goto again;
+                       }
                }
        }
 
index 5cfaeb5ac831b6396399d71858d2934e7d63c544..b6c36c45d0be1938330053235877b217bbd4ba77 100644 (file)
@@ -11,6 +11,7 @@ struct found_btree_node {
        u8                      level;
        unsigned                sectors_written;
        u32                     seq;
+       u64                     journal_seq;
        u64                     cookie;
 
        struct bpos             min_key;