xfs: allow queued realtime intents to drain before scrubbing

author Darrick J. Wong <djwong@kernel.org>

Thu, 21 Nov 2024 00:20:33 +0000 (16:20 -0800)

committer Darrick J. Wong <djwong@kernel.org>

Mon, 23 Dec 2024 21:06:06 +0000 (13:06 -0800)
author Darrick J. Wong <djwong@kernel.org>
Thu, 21 Nov 2024 00:20:33 +0000 (16:20 -0800)
committer Darrick J. Wong <djwong@kernel.org>
Mon, 23 Dec 2024 21:06:06 +0000 (13:06 -0800)
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c

index 0d7ad692822d48cdbee051bfa802ace74e719de9..dd99366643f832d798ba76fa55122dfd70177800 100644 (file)
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -324,10 +324,15 @@ xchk_bmap_rt_iextent_xref(
                         irec->br_startoff, &error))
                 return;
  
-       xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL);
+       error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL);
+       if (!xchk_fblock_process_error(info->sc, info->whichfork,
+                       irec->br_startoff, &error))
+               goto out_free;
+
         xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
                         irec->br_blockcount);
  
+out_free:
         xchk_rtgroup_free(info->sc, &info->sc->sr);
  }
  
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c

index 5cbd94b56582a47cf33668f78532f7b44d106466..613fb54e723edee5bf311a13f823a5ee174cc901 100644 (file)
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -719,13 +719,79 @@ xchk_rtgroup_init(
         return 0;
  }
  
-void
+/* Lock all the rt group metadata inode ILOCKs and wait for intents. */
+int
  xchk_rtgroup_lock(
+       struct xfs_scrub        *sc,
         struct xchk_rt          *sr,
         unsigned int            rtglock_flags)
  {
-       xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+       int                     error = 0;
+
+       ASSERT(sr->rtg != NULL);
+
+       /*
+        * If we're /only/ locking the rtbitmap in shared mode, then we're
+        * obviously not trying to compare records in two metadata inodes.
+        * There's no need to drain intents here because the caller (most
+        * likely the rgsuper scanner) doesn't need that level of consistency.
+        */
+       if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) {
+               xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+               sr->rtlock_flags = rtglock_flags;
+               return 0;
+       }
+
+       do {
+               if (xchk_should_terminate(sc, &error))
+                       return error;
+
+               xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+
+               /*
+                * If we've grabbed a non-metadata file for scrubbing, we
+                * assume that holding its ILOCK will suffice to coordinate
+                * with any rt intent chains involving this inode.
+                */
+               if (sc->ip && !xfs_is_internal_inode(sc->ip))
+                       break;
+
+               /*
+                * Decide if the rt group is quiet enough for all metadata to
+                * be consistent with each other.  Regular file IO doesn't get
+                * to lock all the rt inodes at the same time, which means that
+                * there could be other threads in the middle of processing a
+                * chain of deferred ops.
+                *
+                * We just locked all the metadata inodes for this rt group;
+                * now take a look to see if there are any intents in progress.
+                * If there are, drop the rt group inode locks and wait for the
+                * intents to drain.  Since we hold the rt group inode locks
+                * for the duration of the scrub, this is the only time we have
+                * to sample the intents counter; any threads increasing it
+                * after this point can't possibly be in the middle of a chain
+                * of rt metadata updates.
+                *
+                * Obviously, this should be slanted against scrub and in favor
+                * of runtime threads.
+                */
+               if (!xfs_group_intent_busy(rtg_group(sr->rtg)))
+                       break;
+
+               xfs_rtgroup_unlock(sr->rtg, rtglock_flags);
+
+               if (!(sc->flags & XCHK_FSGATES_DRAIN))
+                       return -ECHRNG;
+               error = xfs_group_intent_drain(rtg_group(sr->rtg));
+               if (error) {
+                       if (error == -ERESTARTSYS)
+                               error = -EINTR;
+                       return error;
+               }
+       } while (1);
+
         sr->rtlock_flags = rtglock_flags;
+       return 0;
  }
  
  /*
@@ -1379,7 +1445,7 @@ xchk_fsgates_enable(
         trace_xchk_fsgates_enable(sc, scrub_fsgates);
  
         if (scrub_fsgates & XCHK_FSGATES_DRAIN)
-               xfs_drain_wait_enable();
+               xfs_defer_drain_wait_enable();
  
         if (scrub_fsgates & XCHK_FSGATES_QUOTA)
                 xfs_dqtrx_hook_enable();
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h

index 9ff3cafd867962755b97742a766be53109c7aaef..e734572a8dd6ec417143d69f90f25d56ab5ed9f8 100644 (file)
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -141,12 +141,13 @@ xchk_rtgroup_init_existing(
         return error == -ENOENT ? -EFSCORRUPTED : error;
  }
  
-void xchk_rtgroup_lock(struct xchk_rt *sr, unsigned int rtglock_flags);
+int xchk_rtgroup_lock(struct xfs_scrub *sc, struct xchk_rt *sr,
+               unsigned int rtglock_flags);
  void xchk_rtgroup_free(struct xfs_scrub *sc, struct xchk_rt *sr);
  #else
  # define xchk_rtgroup_init(sc, rgno, sr)               (-EFSCORRUPTED)
  # define xchk_rtgroup_init_existing(sc, rgno, sr)      (-EFSCORRUPTED)
-# define xchk_rtgroup_lock(sc, lockflags)              do { } while (0)
+# define xchk_rtgroup_lock(sc, sr, lockflags)          (-EFSCORRUPTED)
  # define xchk_rtgroup_free(sc, sr)                     do { } while (0)
  #endif /* CONFIG_XFS_RT */
  
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c

index 463b3573bb761b35a2a89100eb5185226080025a..e062c7d12565cd47638050bcb2f6736183c8d568 100644 (file)
--- a/fs/xfs/scrub/rgsuper.c
+++ b/fs/xfs/scrub/rgsuper.c
@@ -61,7 +61,9 @@ xchk_rgsuperblock(
         if (!xchk_xref_process_error(sc, 0, 0, &error))
                 return error;
  
-       xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+       error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+       if (error)
+               return error;
  
         /*
          * Since we already validated the rt superblock at mount time, we don't
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c

index fb4970c877abd3bba7de1eee03e0e64544596b7b..819026ea2d741f7f9a79a29664fa79d89fbec175 100644 (file)
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -30,6 +30,9 @@ xchk_setup_rtbitmap(
         struct xchk_rtbitmap    *rtb;
         int                     error;
  
+       if (xchk_need_intent_drain(sc))
+               xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
         rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS);
         if (!rtb)
                 return -ENOMEM;
@@ -57,12 +60,15 @@ xchk_setup_rtbitmap(
         if (error)
                 return error;
  
+       error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+       if (error)
+               return error;
+
         /*
          * Now that we've locked the rtbitmap, we can't race with growfsrt
          * trying to expand the bitmap or change the size of the rt volume.
          * Hence it is safe to compute and check the geometry values.
          */
-       xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
         if (mp->m_sb.sb_rblocks) {
                 rtb->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
                 rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c

index f1af5431b38856f844e126ac6824e8befc593d13..4ac679c1bd29cd81a84f0d27f8fe3d267a6576e4 100644 (file)
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -89,6 +89,10 @@ xchk_setup_rtsummary(
         if (error)
                 return error;
  
+       error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP);
+       if (error)
+               return error;
+
         /*
          * Now that we've locked the rtbitmap and rtsummary, we can't race with
          * growfsrt trying to expand the summary or change the size of the rt
@@ -99,7 +103,6 @@ xchk_setup_rtsummary(
          * exclusively here.  If we ever start caring about running concurrent
          * fsmap with scrub this could be changed.
          */
-       xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
         if (mp->m_sb.sb_rblocks) {
                 rts->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
                 rts->rbmblocks = xfs_rtbitmap_blockcount(mp);
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c

index 950f5a58dcd967bc94023d736ecf0a6e83bc00b3..652d347cee9929053019b573b684b59d5a01b8cc 100644 (file)
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -164,7 +164,7 @@ xchk_fsgates_disable(
         trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
  
         if (sc->flags & XCHK_FSGATES_DRAIN)
-               xfs_drain_wait_disable();
+               xfs_defer_drain_wait_disable();
  
         if (sc->flags & XCHK_FSGATES_QUOTA)
                 xfs_dqtrx_hook_disable();
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c

index 5ede81fadbd8ca6756ae8c163d590ad1657e6c60..fa5f31931efdb573104c7a3f9aa707f49c60c765 100644 (file)
--- a/fs/xfs/xfs_drain.c
+++ b/fs/xfs/xfs_drain.c
@@ -13,28 +13,28 @@
  #include "xfs_trace.h"
  
  /*
- * Use a static key here to reduce the overhead of xfs_drain_rele.  If the
- * compiler supports jump labels, the static branch will be replaced by a nop
- * sled when there are no xfs_drain_wait callers.  Online fsck is currently
- * the only caller, so this is a reasonable tradeoff.
+ * Use a static key here to reduce the overhead of xfs_defer_drain_rele.  If
+ * the compiler supports jump labels, the static branch will be replaced by a
+ * nop sled when there are no xfs_defer_drain_wait callers.  Online fsck is
+ * currently the only caller, so this is a reasonable tradeoff.
   *
   * Note: Patching the kernel code requires taking the cpu hotplug lock.  Other
   * parts of the kernel allocate memory with that lock held, which means that
   * XFS callers cannot hold any locks that might be used by memory reclaim or
   * writeback when calling the static_branch_{inc,dec} functions.
   */
-static DEFINE_STATIC_KEY_FALSE(xfs_drain_waiter_gate);
+static DEFINE_STATIC_KEY_FALSE(xfs_defer_drain_waiter_gate);
  
  void
-xfs_drain_wait_disable(void)
+xfs_defer_drain_wait_disable(void)
  {
-       static_branch_dec(&xfs_drain_waiter_gate);
+       static_branch_dec(&xfs_defer_drain_waiter_gate);
  }
  
  void
-xfs_drain_wait_enable(void)
+xfs_defer_drain_wait_enable(void)
  {
-       static_branch_inc(&xfs_drain_waiter_gate);
+       static_branch_inc(&xfs_defer_drain_waiter_gate);
  }
  
  void
@@ -71,7 +71,7 @@ static inline bool has_waiters(struct wait_queue_head *wq_head)
  static inline void xfs_defer_drain_rele(struct xfs_defer_drain *dr)
  {
         if (atomic_dec_and_test(&dr->dr_count) &&
-           static_branch_unlikely(&xfs_drain_waiter_gate) &&
+           static_branch_unlikely(&xfs_defer_drain_waiter_gate) &&
             has_waiters(&dr->dr_waiters))
                 wake_up(&dr->dr_waiters);
  }
diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h

index efcf88df9a5e70e5a493341898032777d0729cea..4d446dbf65e519231b5a2b2fa063741e8e4225df 100644 (file)
--- a/fs/xfs/xfs_drain.h
+++ b/fs/xfs/xfs_drain.h
@@ -26,8 +26,8 @@ struct xfs_defer_drain {
  void xfs_defer_drain_init(struct xfs_defer_drain *dr);
  void xfs_defer_drain_free(struct xfs_defer_drain *dr);
  
-void xfs_drain_wait_disable(void);
-void xfs_drain_wait_enable(void);
+void xfs_defer_drain_wait_disable(void);
+void xfs_defer_drain_wait_enable(void);
  
  /*
   * Deferred Work Intent Drains
@@ -61,6 +61,9 @@ void xfs_drain_wait_enable(void);
   * All functions that create work items must increment the intent counter as
   * soon as the item is added to the transaction and cannot drop the counter
   * until the item is finished or cancelled.
+ *
+ * The same principles apply to realtime groups because the rt metadata inode
+ * ILOCKs are not held across transaction rolls.
   */
  struct xfs_group *xfs_group_intent_get(struct xfs_mount *mp,
                 xfs_fsblock_t fsbno, enum xfs_group_type type);
author	Darrick J. Wong <djwong@kernel.org>
	Thu, 21 Nov 2024 00:20:33 +0000 (16:20 -0800)
committer	Darrick J. Wong <djwong@kernel.org>
	Mon, 23 Dec 2024 21:06:06 +0000 (13:06 -0800)
fs/xfs/scrub/bmap.c		patch \| blob \| history
fs/xfs/scrub/common.c		patch \| blob \| history
fs/xfs/scrub/common.h		patch \| blob \| history
fs/xfs/scrub/rgsuper.c		patch \| blob \| history
fs/xfs/scrub/rtbitmap.c		patch \| blob \| history
fs/xfs/scrub/rtsummary.c		patch \| blob \| history
fs/xfs/scrub/scrub.c		patch \| blob \| history
fs/xfs/xfs_drain.c		patch \| blob \| history
fs/xfs/xfs_drain.h		patch \| blob \| history