xfs: AIL needs asynchronous CIL forcing

author Dave Chinner <dchinner@redhat.com>

Wed, 11 Aug 2021 01:00:44 +0000 (18:00 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Mon, 16 Aug 2021 19:09:30 +0000 (12:09 -0700)
author Dave Chinner <dchinner@redhat.com>
Wed, 11 Aug 2021 01:00:44 +0000 (18:00 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Mon, 16 Aug 2021 19:09:30 +0000 (12:09 -0700)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 098f5c8ceb29c4eca7ed901e4094dae9a150ef8a..c738e8c79efa7e4336dd3982fa7b093a115e1cdb 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -52,11 +52,6 @@ xlog_state_get_iclog_space(
         int                     *continued_write,
         int                     *logoffsetp);
  STATIC void
-xlog_state_switch_iclogs(
-       struct xlog             *log,
-       struct xlog_in_core     *iclog,
-       int                     eventual_size);
-STATIC void
  xlog_grant_push_ail(
         struct xlog             *log,
         int                     need_bytes);
@@ -3174,7 +3169,7 @@ xfs_log_ticket_ungrant(
   * This routine will mark the current iclog in the ring as WANT_SYNC and move
   * the current iclog pointer to the next iclog in the ring.
   */
-STATIC void
+void
  xlog_state_switch_iclogs(
         struct xlog             *log,
         struct xlog_in_core     *iclog,
@@ -3346,6 +3341,20 @@ out_error:
         return -EIO;
  }
  
+/*
+ * Force the log to a specific LSN.
+ *
+ * If an iclog with that lsn can be found:
+ *     If it is in the DIRTY state, just return.
+ *     If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
+ *             state and go to sleep or return.
+ *     If it is in any other state, go to sleep or return.
+ *
+ * Synchronous forces are implemented with a wait queue.  All callers trying
+ * to force a given lsn to disk must wait on the queue attached to the
+ * specific in-core log.  When given in-core log finally completes its write
+ * to disk, that thread will wake up all threads waiting on the queue.
+ */
  static int
  xlog_force_lsn(
         struct xlog             *log,
@@ -3431,18 +3440,13 @@ out_error:
  }
  
  /*
- * Force the in-core log to disk for a specific LSN.
+ * Force the log to a specific checkpoint sequence.
   *
- * Find in-core log with lsn.
- *     If it is in the DIRTY state, just return.
- *     If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
- *             state and go to sleep or return.
- *     If it is in any other state, go to sleep or return.
- *
- * Synchronous forces are implemented with a wait queue.  All callers trying
- * to force a given lsn to disk must wait on the queue attached to the
- * specific in-core log.  When given in-core log finally completes its write
- * to disk, that thread will wake up all threads waiting on the queue.
+ * First force the CIL so that all the required changes have been flushed to the
+ * iclogs. If the CIL force completed it will return a commit LSN that indicates
+ * the iclog that needs to be flushed to stable storage. If the caller needs
+ * a synchronous log force, we will wait on the iclog with the LSN returned by
+ * xlog_cil_force_seq() to be completed.
   */
  int
  xfs_log_force_seq(
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h

index 8431a2f4bd13a8e2828378a8060d5a4a391d7beb..dc1b77b92fc1756d7941f1757429e6e85a314dfc 100644 (file)
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -104,6 +104,7 @@ struct xlog_ticket;
  struct xfs_log_item;
  struct xfs_item_ops;
  struct xfs_trans;
+struct xlog;
  
  int      xfs_log_force(struct xfs_mount *mp, uint flags);
  int      xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index f6c4e4e8f11208d9b7361eee50d981b4f6c1f528..59d3bd45543bdbbe34dd596f332881ac9b5153d4 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -840,6 +840,7 @@ xlog_cil_push_work(
         xfs_csn_t               push_seq;
         struct bio              bio;
         DECLARE_COMPLETION_ONSTACK(bdev_flush);
+       bool                    push_commit_stable;
  
         new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
         new_ctx->ticket = xlog_cil_ticket_alloc(log);
@@ -850,6 +851,8 @@ xlog_cil_push_work(
         spin_lock(&cil->xc_push_lock);
         push_seq = cil->xc_push_seq;
         ASSERT(push_seq <= ctx->sequence);
+       push_commit_stable = cil->xc_push_commit_stable;
+       cil->xc_push_commit_stable = false;
  
         /*
          * As we are about to switch to a new, empty CIL context, we no longer
@@ -1066,8 +1069,16 @@ xlog_cil_push_work(
          * The commit iclog must be written to stable storage to guarantee
          * journal IO vs metadata writeback IO is correctly ordered on stable
          * storage.
+        *
+        * If the push caller needs the commit to be immediately stable and the
+        * commit_iclog is not yet marked as XLOG_STATE_WANT_SYNC to indicate it
+        * will be written when released, switch it's state to WANT_SYNC right
+        * now.
          */
         ctx->commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
+       if (push_commit_stable &&
+           ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE)
+               xlog_state_switch_iclogs(log, ctx->commit_iclog, 0);
         xlog_state_release_iclog(log, ctx->commit_iclog, preflush_tail_lsn);
  
         /* Not safe to reference ctx now! */
@@ -1161,13 +1172,26 @@ xlog_cil_push_background(
  /*
   * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
   * number that is passed. When it returns, the work will be queued for
- * @push_seq, but it won't be completed. The caller is expected to do any
- * waiting for push_seq to complete if it is required.
+ * @push_seq, but it won't be completed.
+ *
+ * If the caller is performing a synchronous force, we will flush the workqueue
+ * to get previously queued work moving to minimise the wait time they will
+ * undergo waiting for all outstanding pushes to complete. The caller is
+ * expected to do the required waiting for push_seq to complete.
+ *
+ * If the caller is performing an async push, we need to ensure that the
+ * checkpoint is fully flushed out of the iclogs when we finish the push. If we
+ * don't do this, then the commit record may remain sitting in memory in an
+ * ACTIVE iclog. This then requires another full log force to push to disk,
+ * which defeats the purpose of having an async, non-blocking CIL force
+ * mechanism. Hence in this case we need to pass a flag to the push work to
+ * indicate it needs to flush the commit record itself.
   */
  static void
  xlog_cil_push_now(
         struct xlog     *log,
-       xfs_lsn_t       push_seq)
+       xfs_lsn_t       push_seq,
+       bool            async)
  {
         struct xfs_cil  *cil = log->l_cilp;
  
@@ -1177,7 +1201,8 @@ xlog_cil_push_now(
         ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
  
         /* start on any pending background push to minimise wait time on it */
-       flush_work(&cil->xc_push_work);
+       if (!async)
+               flush_work(&cil->xc_push_work);
  
         /*
          * If the CIL is empty or we've already pushed the sequence then
@@ -1190,6 +1215,7 @@ xlog_cil_push_now(
         }
  
         cil->xc_push_seq = push_seq;
+       cil->xc_push_commit_stable = async;
         queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
         spin_unlock(&cil->xc_push_lock);
  }
@@ -1274,12 +1300,27 @@ xlog_cil_commit(
         xlog_cil_push_background(log);
  }
  
+/*
+ * Flush the CIL to stable storage but don't wait for it to complete. This
+ * requires the CIL push to ensure the commit record for the push hits the disk,
+ * but otherwise is no different to a push done from a log force.
+ */
+void
+xlog_cil_flush(
+       struct xlog     *log)
+{
+       xfs_csn_t       seq = log->l_cilp->xc_current_sequence;
+
+       trace_xfs_log_force(log->l_mp, seq, _RET_IP_);
+       xlog_cil_push_now(log, seq, true);
+}
+
  /*
   * Conditionally push the CIL based on the sequence passed in.
   *
- * We only need to push if we haven't already pushed the sequence
- * number given. Hence the only time we will trigger a push here is
- * if the push sequence is the same as the current context.
+ * We only need to push if we haven't already pushed the sequence number given.
+ * Hence the only time we will trigger a push here is if the push sequence is
+ * the same as the current context.
   *
   * We return the current commit lsn to allow the callers to determine if a
   * iclog flush is necessary following this call.
@@ -1295,13 +1336,17 @@ xlog_cil_force_seq(
  
         ASSERT(sequence <= cil->xc_current_sequence);
  
+       if (!sequence)
+               sequence = cil->xc_current_sequence;
+       trace_xfs_log_force(log->l_mp, sequence, _RET_IP_);
+
         /*
          * check to see if we need to force out the current context.
          * xlog_cil_push() handles racing pushes for the same sequence,
          * so no need to deal with it here.
          */
  restart:
-       xlog_cil_push_now(log, sequence);
+       xlog_cil_push_now(log, sequence, false);
  
         /*
          * See if we can find a previous sequence still committing.
@@ -1325,6 +1370,7 @@ restart:
                          * It is still being pushed! Wait for the push to
                          * complete, then start again from the beginning.
                          */
+                       XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
                         xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
                         goto restart;
                 }
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 1ed29980390427498882640e7ac6918a87150195..014e0dc0ba9748c8ce2400170608c1a0ae5c165f 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -277,6 +277,7 @@ struct xfs_cil {
  
         spinlock_t              xc_push_lock ____cacheline_aligned_in_smp;
         xfs_csn_t               xc_push_seq;
+       bool                    xc_push_commit_stable;
         struct list_head        xc_committing;
         wait_queue_head_t       xc_commit_wait;
         wait_queue_head_t       xc_start_wait;
@@ -520,6 +521,8 @@ int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
  void   xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
  void   xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
  
+void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog,
+               int eventual_size);
  int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
                 xfs_lsn_t log_tail_lsn);
  
@@ -594,6 +597,7 @@ void        xlog_cil_set_ctx_write_state(struct xfs_cil_ctx *ctx,
  /*
   * CIL force routines
   */
+void xlog_cil_flush(struct xlog *log);
  xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);
  
  static inline void
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index f1bc88f4367cebe9751f94c7d35b436a68d8df6c..18dc5eca6c04501a11b20877eb6c6b44164958af 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,6 +10,7 @@
  #include "xfs_log_format.h"
  #include "xfs_trans_resv.h"
  #include "xfs_sysfs.h"
+#include "xfs_log.h"
  #include "xfs_log_priv.h"
  #include "xfs_mount.h"
  
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

index 7e01e00550ac820e0325329266892baec49d78cb..4c86afad1617b8cb08645bfeca227c969d55af56 100644 (file)
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -20,6 +20,7 @@
  #include "xfs_bmap.h"
  #include "xfs_attr.h"
  #include "xfs_trans.h"
+#include "xfs_log.h"
  #include "xfs_log_priv.h"
  #include "xfs_buf_item.h"
  #include "xfs_quota.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index b52394b0e1f44a4b79ef0b6383fd2165bb2433cf..70868f8b5911791deea3f9cd655461c7f8067c54 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -9,7 +9,6 @@
  #include "xfs_shared.h"
  #include "xfs_format.h"
  #include "xfs_log_format.h"
-#include "xfs_log_priv.h"
  #include "xfs_trans_resv.h"
  #include "xfs_mount.h"
  #include "xfs_extent_busy.h"
@@ -17,6 +16,7 @@
  #include "xfs_trans.h"
  #include "xfs_trans_priv.h"
  #include "xfs_log.h"
+#include "xfs_log_priv.h"
  #include "xfs_trace.h"
  #include "xfs_error.h"
  #include "xfs_defer.h"
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index dbb69b4bf3ed5fbe4bacb65a1b9ca78e79ae9ffa..69aac416e2cee427fabb79e2f541fb2d0b53c044 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -17,6 +17,7 @@
  #include "xfs_errortag.h"
  #include "xfs_error.h"
  #include "xfs_log.h"
+#include "xfs_log_priv.h"
  
  #ifdef DEBUG
  /*
@@ -429,8 +430,12 @@ xfsaild_push(
  
         /*
          * If we encountered pinned items or did not finish writing out all
-        * buffers the last time we ran, force the log first and wait for it
-        * before pushing again.
+        * buffers the last time we ran, force a background CIL push to get the
+        * items unpinned in the near future. We do not wait on the CIL push as
+        * that could stall us for seconds if there is enough background IO
+        * load. Stalling for that long when the tail of the log is pinned and
+        * needs flushing will hard stop the transaction subsystem when log
+        * space runs out.
          */
         if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
             (!list_empty_careful(&ailp->ail_buf_list) ||
@@ -438,7 +443,7 @@ xfsaild_push(
                 ailp->ail_log_flush = 0;
  
                 XFS_STATS_INC(mp, xs_push_ail_flush);
-               xfs_log_force(mp, XFS_LOG_SYNC);
+               xlog_cil_flush(mp->m_log);
         }
  
         spin_lock(&ailp->ail_lock);
author	Dave Chinner <dchinner@redhat.com>
	Wed, 11 Aug 2021 01:00:44 +0000 (18:00 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Mon, 16 Aug 2021 19:09:30 +0000 (12:09 -0700)
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_log.h		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| history
fs/xfs/xfs_trace.c		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history
fs/xfs/xfs_trans_ail.c		patch \| blob \| history