]> www.infradead.org Git - users/willy/pagecache.git/commitdiff
bcachefs: Split out journal pins by btree level
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 10 Feb 2025 16:34:59 +0000 (11:34 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 11 Feb 2025 15:10:32 +0000 (10:10 -0500)
This lets us flush the journal to go read-only more effectively.

Flushing the journal and going read-only requires halting mutually
recursive processes, which strictly speaking are not guaranteed to
terminate.

Flushing btree node journal pins will kick off a btree node write, and
btree node writes on completion must do another btree update to the
parent node to update the 'sectors_written' field for that node's key.

If the parent node is full and requires a split or compaction, that's
going to generate a whole bunch of additional btree updates - alloc
info, LRU btree, and more - which then have to be flushed, and the cycle
repeats.

This process will terminate much more effectively if we tweak journal
reclaim to flush btree updates leaf to root: i.e., don't flush updates
for a given btree node (kicking off a write, and consuming space within
that node up to the next block boundary) if there might still be
unflushed updates in child nodes.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h

index 6a9cefb635d63bf04ff06294b59b7192ee3d7b60..d373cd181a7f532d6c6d0fd868e4fdbaa1a90ec8 100644 (file)
@@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
        spin_unlock(&j->lock);
 }
 
-static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
+static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
+                                             journal_pin_flush_fn fn)
 {
        if (fn == bch2_btree_node_flush0 ||
-           fn == bch2_btree_node_flush1)
-               return JOURNAL_PIN_TYPE_btree;
-       else if (fn == bch2_btree_key_cache_journal_flush)
+           fn == bch2_btree_node_flush1) {
+               unsigned idx = fn == bch2_btree_node_flush1;
+               struct btree *b = container_of(pin, struct btree, writes[idx].journal);
+
+               return JOURNAL_PIN_TYPE_btree0 - b->c.level;
+       } else if (fn == bch2_btree_key_cache_journal_flush)
                return JOURNAL_PIN_TYPE_key_cache;
        else
                return JOURNAL_PIN_TYPE_other;
@@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
 
        bool reclaim = __journal_pin_drop(j, dst);
 
-       bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
+       bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
 
        if (reclaim)
                bch2_journal_reclaim_fast(j);
@@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
 
        bool reclaim = __journal_pin_drop(j, pin);
 
-       bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
+       bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
 
        if (reclaim)
                bch2_journal_reclaim_fast(j);
@@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
                spin_lock(&j->lock);
                /* Pin might have been dropped or rearmed: */
                if (likely(!err && !j->flush_in_progress_dropped))
-                       list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
+                       list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
                j->flush_in_progress = NULL;
                j->flush_in_progress_dropped = false;
                spin_unlock(&j->lock);
@@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
 
        mutex_lock(&j->reclaim_lock);
 
-       if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
-                              BIT(JOURNAL_PIN_TYPE_key_cache)|
-                              BIT(JOURNAL_PIN_TYPE_other))) {
-               *did_work = true;
-               goto unlock;
-       }
-
-       if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
-                              BIT(JOURNAL_PIN_TYPE_btree))) {
-               *did_work = true;
-               goto unlock;
-       }
+       for (int type = JOURNAL_PIN_TYPE_NR - 1;
+            type >= 0;
+            --type)
+               if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
+                       *did_work = true;
+                       goto unlock;
+               }
 
        if (seq_to_flush > journal_cur_seq(j))
                bch2_journal_entry_close(j);
index a198a81d7478469964e3d480f2b4ff603e667ac1..1ef3a28ed6ab388130af75d4fb825ceeee5e6233 100644 (file)
@@ -53,7 +53,10 @@ struct journal_buf {
  */
 
 enum journal_pin_type {
-       JOURNAL_PIN_TYPE_btree,
+       JOURNAL_PIN_TYPE_btree3,
+       JOURNAL_PIN_TYPE_btree2,
+       JOURNAL_PIN_TYPE_btree1,
+       JOURNAL_PIN_TYPE_btree0,
        JOURNAL_PIN_TYPE_key_cache,
        JOURNAL_PIN_TYPE_other,
        JOURNAL_PIN_TYPE_NR,