]> www.infradead.org Git - users/hch/misc.git/commitdiff
bcachefs: Make allocator stuck timeout configurable, ratelimit messages
authorKent Overstreet <kent.overstreet@linux.dev>
Wed, 7 Aug 2024 17:58:57 +0000 (13:58 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 8 Aug 2024 01:04:55 +0000 (21:04 -0400)
Limit these messages to once every 2 minutes to avoid spamming logs;
with multiple devices the output can be quite significant.

Also, up the default timeout to 30 seconds from 10 seconds.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_foreground.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/io_misc.c
fs/bcachefs/io_write.c
fs/bcachefs/opts.h
fs/bcachefs/super-io.c

index 8683fe4fae5b84be5040af332ecbfcd1ad3da971..02de5ad2be2c93fc954ae23812d9e4ae2e3f423e 100644 (file)
@@ -1758,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
        prt_printf(out, "buckets to invalidate\t%llu\r\n",      should_invalidate_buckets(ca, stats));
 }
 
-void bch2_print_allocator_stuck(struct bch_fs *c)
+static noinline void bch2_print_allocator_stuck(struct bch_fs *c)
 {
        struct printbuf buf = PRINTBUF;
 
-       prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
+       prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n",
+                  c->opts.allocator_stuck_timeout);
 
        prt_printf(&buf, "Allocator debug:\n");
        printbuf_indent_add(&buf, 2);
@@ -1792,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c)
        bch2_print_string_as_lines(KERN_ERR, buf.buf);
        printbuf_exit(&buf);
 }
+
+static inline unsigned allocator_wait_timeout(struct bch_fs *c)
+{
+       if (c->allocator_last_stuck &&
+           time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies))
+               return 0;
+
+       return c->opts.allocator_stuck_timeout * HZ;
+}
+
+void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
+{
+       unsigned t = allocator_wait_timeout(c);
+
+       if (t && closure_sync_timeout(cl, t)) {
+               c->allocator_last_stuck = jiffies;
+               bch2_print_allocator_stuck(c);
+       }
+
+       closure_sync(cl);
+}
index c78a64ec05537410a710b1e0fb32eac08a7bc4fb..386d231ceca3f667f8871d32e540ca47c1fafc44 100644 (file)
@@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
 void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
 void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
 
-void bch2_print_allocator_stuck(struct bch_fs *);
+void __bch2_wait_on_allocator(struct bch_fs *, struct closure *);
+static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl)
+{
+       if (cl->closure_get_happened)
+               __bch2_wait_on_allocator(c, cl);
+}
 
 #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
index 91361a167dcd6329ed10ac7c791311d7412f21db..eedf2d6045e73fc54571d20b6efb786779bcb80d 100644 (file)
@@ -893,6 +893,8 @@ struct bch_fs {
        struct bch_fs_usage_base __percpu *usage;
        u64 __percpu            *online_reserved;
 
+       unsigned long           allocator_last_stuck;
+
        struct io_clock         io_clock[2];
 
        /* JOURNAL SEQ BLACKLIST */
index 74a60b1a4ddfa70f7353b7c99584b0a216d4b7c3..ad893684db524c37766275f4c85615d327df11b2 100644 (file)
@@ -836,6 +836,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI,
 
 LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE,
                                        struct bch_sb, flags[5],  0, 16);
+LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
+                                       struct bch_sb, flags[5], 16, 32);
 
 static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
 {
index 2cf6297756f8e1eab9b87fb3fbe806a0604510a5..177ed331c00b1d4a05d5eb523e430732b251dd8d 100644 (file)
@@ -126,11 +126,7 @@ err_noprint:
 
        if (closure_nr_remaining(&cl) != 1) {
                bch2_trans_unlock_long(trans);
-
-               if (closure_sync_timeout(&cl, HZ * 10)) {
-                       bch2_print_allocator_stuck(c);
-                       closure_sync(&cl);
-               }
+               bch2_wait_on_allocator(c, &cl);
        }
 
        return ret;
index d31c8d006d979138821a0223dbf6aa9bf0645bd1..1d4761d150023af0c1a9dca31b765c00683e901a 100644 (file)
@@ -1503,10 +1503,7 @@ err:
        if ((op->flags & BCH_WRITE_SYNC) ||
            (!(op->flags & BCH_WRITE_SUBMITTED) &&
             !(op->flags & BCH_WRITE_IN_WORKER))) {
-               if (closure_sync_timeout(&op->cl, HZ * 10)) {
-                       bch2_print_allocator_stuck(c);
-                       closure_sync(&op->cl);
-               }
+               bch2_wait_on_allocator(c, &op->cl);
 
                __bch2_write_index(op);
 
index 60b93018501f9814c3504ab43bf24d7d82e5c42f..cda1725702eaf7aafa74258cdfbb952583058b23 100644 (file)
@@ -391,6 +391,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH_SB_JOURNAL_TRANSACTION_NAMES, true,                       \
          NULL,         "Log transaction function names in journal")    \
+       x(allocator_stuck_timeout,      u16,                            \
+         OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME,                      \
+         OPT_UINT(0, U16_MAX),                                         \
+         BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30,                           \
+         NULL,         "Default timeout in seconds for stuck allocator messages")\
        x(noexcl,                       u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
index 8bc8198327904454da03e34a9da8931d7f92d5f0..c8c2ccbdfbb5891faa1de7e2c5ca2a50724a93ec 100644 (file)
@@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
 
                if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb))
                        SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version));
+
+               if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 &&
+                   !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb))
+                       SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30);
        }
 
        for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {