]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
bcachefs: Fix discard path journal flushing
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 27 Jan 2025 06:21:44 +0000 (01:21 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Fri, 7 Feb 2025 03:35:11 +0000 (22:35 -0500)
The discard path is supposed to issue journal flushes when there's too
many buckets empty buckets that need a journal commit before they can be
written to again, but at some point this code seems to have been lost.

Bring it back with a new optimization to make sure we don't issue too
many journal flushes: the journal now tracks the sequence number of the
most recent flush in progress, which the discard path uses when deciding
which buckets need a journal flush.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_types.h
fs/bcachefs/buckets_waiting_for_journal.c
fs/bcachefs/buckets_waiting_for_journal.h
fs/bcachefs/journal.c
fs/bcachefs/journal_types.h
fs/bcachefs/trace.h

index fc2ef33b67b38d36ab6e297eeeddea10cd666165..3ea809990ef1a5bd2f9c2d00d56e6deecd08988d 100644 (file)
@@ -1803,7 +1803,6 @@ struct discard_buckets_state {
        u64             open;
        u64             need_journal_commit;
        u64             discarded;
-       u64             need_journal_commit_this_dev;
 };
 
 static int bch2_discard_one_bucket(struct btree_trans *trans,
@@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                goto out;
        }
 
-       if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-                       c->journal.flushed_seq_ondisk,
-                       pos.inode, pos.offset)) {
-               s->need_journal_commit++;
-               s->need_journal_commit_this_dev++;
+       u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
+                                                     pos.inode, pos.offset);
+       if (seq_ready > c->journal.flushed_seq_ondisk) {
+               if (seq_ready > c->journal.flushing_seq)
+                       s->need_journal_commit++;
                goto out;
        }
 
@@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                discard_locked = true;
        }
 
-       if (!bkey_eq(*discard_pos_done, iter.pos) &&
-           ca->mi.discard && !c->opts.nochanges) {
-               /*
-                * This works without any other locks because this is the only
-                * thread that removes items from the need_discard tree
-                */
-               bch2_trans_unlock_long(trans);
-               blkdev_issue_discard(ca->disk_sb.bdev,
-                                    k.k->p.offset * ca->mi.bucket_size,
-                                    ca->mi.bucket_size,
-                                    GFP_KERNEL);
-               *discard_pos_done = iter.pos;
+       if (!bkey_eq(*discard_pos_done, iter.pos)) {
                s->discarded++;
+               *discard_pos_done = iter.pos;
 
-               ret = bch2_trans_relock_notrace(trans);
-               if (ret)
-                       goto out;
+               if (ca->mi.discard && !c->opts.nochanges) {
+                       /*
+                        * This works without any other locks because this is the only
+                        * thread that removes items from the need_discard tree
+                        */
+                       bch2_trans_unlock_long(trans);
+                       blkdev_issue_discard(ca->disk_sb.bdev,
+                                            k.k->p.offset * ca->mi.bucket_size,
+                                            ca->mi.bucket_size,
+                                            GFP_KERNEL);
+                       ret = bch2_trans_relock_notrace(trans);
+                       if (ret)
+                               goto out;
+               }
        }
 
        SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
@@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work)
                                   POS(ca->dev_idx, U64_MAX), 0, k,
                        bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
 
+       if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
+               bch2_journal_flush_async(&c->journal, NULL);
+
        trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
                              bch2_err_str(ret));
 
@@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
                        break;
        }
 
-       trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
+       trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
 
        bch2_trans_put(trans);
        percpu_ref_put(&ca->io_ref);
index 6df41c331a52e3cc8345ff771d4a57ca25ed50c4..5a781fb4c794b96238d25b8e87a780a2af265eb3 100644 (file)
@@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c,
                return false;
        }
 
-       if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
-                       c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) {
+       u64 journal_seq_ready =
+               bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
+                                             bucket.inode, bucket.offset);
+       if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
+               if (journal_seq_ready > c->journal.flushing_seq)
+                       s->need_journal_commit++;
                s->skipped_need_journal_commit++;
                return false;
        }
@@ -570,7 +574,7 @@ alloc:
                ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
                : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
 
-       if (s.skipped_need_journal_commit * 2 > avail)
+       if (s.need_journal_commit * 2 > avail)
                bch2_journal_flush_async(&c->journal, NULL);
 
        if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
index 9bbb28e90b934fa27618537e85c901e0bf8efbda..4aa8ee026cb847fa402c12ef12e5cf98195252f4 100644 (file)
@@ -18,6 +18,7 @@ struct bucket_alloc_state {
        u64     buckets_seen;
        u64     skipped_open;
        u64     skipped_need_journal_commit;
+       u64     need_journal_commit;
        u64     skipped_nocow;
        u64     skipped_nouse;
        u64     skipped_mi_btree_bitmap;
index f9fb150eda706cb670a38ef9e167a896ad31e203..c8a488e6b7b86547cf1120fef13a93b0972dc1ed 100644 (file)
@@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_
        memset(t->d, 0, sizeof(t->d[0]) << t->bits);
 }
 
-bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
-                                     u64 flushed_seq,
-                                     unsigned dev, u64 bucket)
+u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b,
+                                 unsigned dev, u64 bucket)
 {
        struct buckets_waiting_for_journal_table *t;
        u64 dev_bucket = (u64) dev << 56 | bucket;
-       bool ret = false;
-       unsigned i;
+       u64 ret = 0;
 
        mutex_lock(&b->lock);
        t = b->t;
 
-       for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+       for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
                struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
 
                if (h->dev_bucket == dev_bucket) {
-                       ret = h->journal_seq > flushed_seq;
+                       ret = h->journal_seq;
                        break;
                }
        }
index d2ae19cbe18c484e50ac5f04af80ae91bd79a4af..365619ca44c87e2590650c66360bb531db63f75d 100644 (file)
@@ -4,8 +4,8 @@
 
 #include "buckets_waiting_for_journal_types.h"
 
-bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
-                                     u64, unsigned, u64);
+u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *,
+                                 unsigned, u64);
 int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
                                         u64, unsigned, u64, u64);
 
index 0a943a27ef44985f6c5bbb35b37aadadf1f17d1c..24c294d4634e0c36d4ffa472c89a5e24778e29c6 100644 (file)
@@ -796,6 +796,7 @@ recheck_need_open:
        }
 
        buf->must_flush = true;
+       j->flushing_seq = max(j->flushing_seq, seq);
 
        if (parent && !closure_wait(&buf->wait, parent))
                BUG();
index 3ba433a48eb8aed01f8c45907b3458cd120851ef..a198a81d7478469964e3d480f2b4ff603e667ac1 100644 (file)
@@ -237,6 +237,7 @@ struct journal {
        /* seq, last_seq from the most recent journal entry successfully written */
        u64                     seq_ondisk;
        u64                     flushed_seq_ondisk;
+       u64                     flushing_seq;
        u64                     last_seq_ondisk;
        u64                     err_seq;
        u64                     last_empty_seq;
index 56a5a7fbc0fd1497cbe6178e532108da60fe3337..c1b51009edf6b17e85d6c42b4ef8150c0788099a 100644 (file)
@@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail,
        TP_ARGS(c, str)
 );
 
-TRACE_EVENT(discard_buckets,
+DECLARE_EVENT_CLASS(discard_buckets_class,
        TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
                 u64 need_journal_commit, u64 discarded, const char *err),
        TP_ARGS(c, seen, open, need_journal_commit, discarded, err),
@@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets,
                  __entry->err)
 );
 
+DEFINE_EVENT(discard_buckets_class, discard_buckets,
+       TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+                u64 need_journal_commit, u64 discarded, const char *err),
+       TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+);
+
+DEFINE_EVENT(discard_buckets_class, discard_buckets_fast,
+       TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+                u64 need_journal_commit, u64 discarded, const char *err),
+       TP_ARGS(c, seen, open, need_journal_commit, discarded, err)
+);
+
 TRACE_EVENT(bucket_invalidate,
        TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors),
        TP_ARGS(c, dev, bucket, sectors),