]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
bcachefs: "Journal stuck" timeout now takes into account device latency
authorKent Overstreet <kent.overstreet@linux.dev>
Tue, 21 Jan 2025 22:42:25 +0000 (17:42 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 21 Jan 2025 23:32:05 +0000 (18:32 -0500)
If a block device (e.g. your typical consumer SSD) is taking multiple
seconds for IOs (typically flushes), we don't want to emit the "journal
stuck" message prematurely.

Also, make sure to drop the btree_trans srcu lock if we're blocking for
more than a second.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h

index 6b79b672e0b1b1cb731c19c1e4d3d967d1471d7a..2760dd9569ed91666fd2f0c615ee55807720366c 100644 (file)
@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
                                                      unsigned flags)
 {
        return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
-                                   trans->journal_u64s, flags);
+                                   trans->journal_u64s, flags, trans);
 }
 
 #define JSET_ENTRY_LOG_U64s            4
index 2cd20114b74b98611119b3aa84f835c297ffc7f5..46d53d3ba018996ea68dbaa99830a46fbf662dcc 100644 (file)
@@ -601,6 +601,16 @@ out:
                : -BCH_ERR_journal_res_get_blocked;
 }
 
+static unsigned max_dev_latency(struct bch_fs *c)
+{
+       u64 nsecs = 0;
+
+       for_each_rw_member(c, ca)
+               nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
+
+       return nsecs_to_jiffies(nsecs);
+}
+
 /*
  * Essentially the entry function to the journaling code. When bcachefs is doing
  * a btree insert, it calls this function to get the current journal write.
@@ -612,17 +622,31 @@ out:
  * btree node write locks.
  */
 int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
-                                 unsigned flags)
+                                 unsigned flags,
+                                 struct btree_trans *trans)
 {
        int ret;
 
        if (closure_wait_event_timeout(&j->async_wait,
                   (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
                   (flags & JOURNAL_RES_GET_NONBLOCK),
-                  HZ * 10))
+                  HZ))
                return ret;
 
+       if (trans)
+               bch2_trans_unlock_long(trans);
+
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);
+
+       remaining_wait = max(0, remaining_wait - HZ);
+
+       if (closure_wait_event_timeout(&j->async_wait,
+                  (ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
+                  (flags & JOURNAL_RES_GET_NONBLOCK),
+                  remaining_wait))
+               return ret;
+
        struct printbuf buf = PRINTBUF;
        bch2_journal_debug_to_text(&buf, j);
        bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
@@ -727,7 +751,7 @@ recheck_need_open:
                 * livelock:
                 */
                sched_annotate_sleep();
-               ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+               ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
                if (ret)
                        return ret;
 
@@ -848,7 +872,7 @@ out:
 static int __bch2_journal_meta(struct journal *j)
 {
        struct journal_res res = {};
-       int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
+       int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
        if (ret)
                return ret;
 
index cb0df0663946bb46a1b34b277342b0ef04efef55..a01dae1a57e317c30451f9fa6b1ddbc8ee4b5d45 100644 (file)
@@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j,
 }
 
 int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
-                                 unsigned);
+                                 unsigned, struct btree_trans *);
 
 /* First bits for BCH_WATERMARK: */
 enum journal_res_flags {
@@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j,
 }
 
 static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
-                                      unsigned u64s, unsigned flags)
+                                      unsigned u64s, unsigned flags,
+                                      struct btree_trans *trans)
 {
        int ret;
 
@@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
        if (journal_res_get_fast(j, res, flags))
                goto out;
 
-       ret = bch2_journal_res_get_slowpath(j, res, flags);
+       ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
        if (ret)
                return ret;
 out: