]> www.infradead.org Git - linux.git/commitdiff
bcachefs: Use a heap for handling overwrites in btree node scan
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 7 Dec 2024 00:23:22 +0000 (19:23 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Dec 2024 06:36:22 +0000 (01:36 -0500)
Fix an O(n^2) issue when we find many overlapping (overwritten) btree
nodes - especially when one node overwrites many smaller nodes.

This was discovered to be an issue with the bcachefs
merge_torture_flakey test - if we had a large btree that was then
emptied, the number of difficult overwrites can be unbounded.

Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_node_scan.c
fs/bcachefs/btree_node_scan_types.h

index eeafb5e7354edca0ce428683db5e221b22c6a8f2..a7f06deee13c34c56b0fd05d655e05e8fa01762b 100644 (file)
@@ -12,6 +12,7 @@
 #include "recovery_passes.h"
 
 #include <linux/kthread.h>
+#include <linux/min_heap.h>
 #include <linux/sort.h>
 
 struct find_btree_nodes_worker {
@@ -31,8 +32,6 @@ static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, con
 
        if (n->range_updated)
                prt_str(out, " range updated");
-       if (n->overwritten)
-               prt_str(out, " overwritten");
 
        for (unsigned i = 0; i < n->nr_ptrs; i++) {
                prt_char(out, ' ');
@@ -140,6 +139,24 @@ static int found_btree_node_cmp_pos(const void *_l, const void *_r)
               -found_btree_node_cmp_time(l, r);
 }
 
+static inline bool found_btree_node_cmp_pos_less(const void *l, const void *r, void *arg)
+{
+       return found_btree_node_cmp_pos(l, r) < 0;
+}
+
+static inline void found_btree_node_swap(void *_l, void *_r, void *arg)
+{
+       struct found_btree_node *l = _l;
+       struct found_btree_node *r = _r;
+
+       swap(*l, *r);
+}
+
+static const struct min_heap_callbacks found_btree_node_heap_cbs = {
+       .less = found_btree_node_cmp_pos_less,
+       .swp = found_btree_node_swap,
+};
+
 static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
                                struct bio *bio, struct btree_node *bn, u64 offset)
 {
@@ -295,55 +312,48 @@ err:
        return f->ret ?: ret;
 }
 
-static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
+static bool nodes_overlap(const struct found_btree_node *l,
+                         const struct found_btree_node *r)
 {
-       while (n + 1 < end &&
-              found_btree_node_cmp_pos(n, n + 1) > 0) {
-               swap(n[0], n[1]);
-               n++;
-       }
+       return (l->btree_id     == r->btree_id &&
+               l->level        == r->level &&
+               bpos_gt(l->max_key, r->min_key));
 }
 
 static int handle_overwrites(struct bch_fs *c,
-                            struct found_btree_node *start,
-                            struct found_btree_node *end)
+                            struct found_btree_node *l,
+                            found_btree_nodes *nodes_heap)
 {
-       struct found_btree_node *n;
-again:
-       for (n = start + 1;
-            n < end &&
-            n->btree_id        == start->btree_id &&
-            n->level           == start->level &&
-            bpos_lt(n->min_key, start->max_key);
-            n++)  {
-               int cmp = found_btree_node_cmp_time(start, n);
+       struct found_btree_node *r;
+
+       while ((r = min_heap_peek(nodes_heap)) &&
+              nodes_overlap(l, r)) {
+               int cmp = found_btree_node_cmp_time(l, r);
 
                if (cmp > 0) {
-                       if (bpos_cmp(start->max_key, n->max_key) >= 0)
-                               n->overwritten = true;
+                       if (bpos_cmp(l->max_key, r->max_key) >= 0)
+                               min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
                        else {
-                               n->range_updated = true;
-                               n->min_key = bpos_successor(start->max_key);
-                               n->range_updated = true;
-                               bubble_up(n, end);
-                               goto again;
+                               r->range_updated = true;
+                               r->min_key = bpos_successor(l->max_key);
+                               r->range_updated = true;
+                               min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
                        }
                } else if (cmp < 0) {
-                       BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
+                       BUG_ON(bpos_eq(l->min_key, r->min_key));
 
-                       start->max_key = bpos_predecessor(n->min_key);
-                       start->range_updated = true;
-               } else if (n->level) {
-                       n->overwritten = true;
+                       l->max_key = bpos_predecessor(r->min_key);
+                       l->range_updated = true;
+               } else if (r->level) {
+                       min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
                } else {
-                       if (bpos_cmp(start->max_key, n->max_key) >= 0)
-                               n->overwritten = true;
+                       if (bpos_cmp(l->max_key, r->max_key) >= 0)
+                               min_heap_pop(nodes_heap, &found_btree_node_heap_cbs, NULL);
                        else {
-                               n->range_updated = true;
-                               n->min_key = bpos_successor(start->max_key);
-                               n->range_updated = true;
-                               bubble_up(n, end);
-                               goto again;
+                               r->range_updated = true;
+                               r->min_key = bpos_successor(l->max_key);
+                               r->range_updated = true;
+                               min_heap_sift_down(nodes_heap, 0, &found_btree_node_heap_cbs, NULL);
                        }
                }
        }
@@ -355,6 +365,7 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
 {
        struct find_btree_nodes *f = &c->found_btree_nodes;
        struct printbuf buf = PRINTBUF;
+       found_btree_nodes nodes_heap = {};
        size_t dst;
        int ret = 0;
 
@@ -409,29 +420,57 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c)
                bch2_print_string_as_lines(KERN_INFO, buf.buf);
        }
 
-       dst = 0;
-       darray_for_each(f->nodes, i) {
-               if (i->overwritten)
-                       continue;
+       swap(nodes_heap, f->nodes);
+
+       {
+               /* darray must have same layout as a heap */
+               min_heap_char real_heap;
+               BUILD_BUG_ON(sizeof(nodes_heap.nr)      != sizeof(real_heap.nr));
+               BUILD_BUG_ON(sizeof(nodes_heap.size)    != sizeof(real_heap.size));
+               BUILD_BUG_ON(offsetof(found_btree_nodes, nr)    != offsetof(min_heap_char, nr));
+               BUILD_BUG_ON(offsetof(found_btree_nodes, size)  != offsetof(min_heap_char, size));
+       }
 
-               ret = handle_overwrites(c, i, &darray_top(f->nodes));
+       min_heapify_all(&nodes_heap, &found_btree_node_heap_cbs, NULL);
+
+       if (nodes_heap.nr) {
+               ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
                if (ret)
                        goto err;
 
-               BUG_ON(i->overwritten);
-               f->nodes.data[dst++] = *i;
+               min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
        }
-       f->nodes.nr = dst;
 
-       if (c->opts.verbose) {
+       while (true) {
+               ret = handle_overwrites(c, &darray_last(f->nodes), &nodes_heap);
+               if (ret)
+                       goto err;
+
+               if (!nodes_heap.nr)
+                       break;
+
+               ret = darray_push(&f->nodes, *min_heap_peek(&nodes_heap));
+               if (ret)
+                       goto err;
+
+               min_heap_pop(&nodes_heap, &found_btree_node_heap_cbs, NULL);
+       }
+
+       for (struct found_btree_node *n = f->nodes.data; n < &darray_last(f->nodes); n++)
+               BUG_ON(nodes_overlap(n, n + 1));
+
+       if (0 && c->opts.verbose) {
                printbuf_reset(&buf);
                prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
                found_btree_nodes_to_text(&buf, c, f->nodes);
                bch2_print_string_as_lines(KERN_INFO, buf.buf);
+       } else {
+               bch_info(c, "btree node scan found %zu nodes after overwrites", f->nodes.nr);
        }
 
        eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
 err:
+       darray_exit(&nodes_heap);
        printbuf_exit(&buf);
        return ret;
 }
index b6c36c45d0be1938330053235877b217bbd4ba77..2811b6857c970d0ffc2593d21fa6750020a1bbf5 100644 (file)
@@ -6,7 +6,6 @@
 
 struct found_btree_node {
        bool                    range_updated:1;
-       bool                    overwritten:1;
        u8                      btree_id;
        u8                      level;
        unsigned                sectors_written;