]> www.infradead.org Git - users/hch/xfsprogs.git/commitdiff
xfs: allow queued realtime intents to drain before scrubbing
authorDarrick J. Wong <djwong@kernel.org>
Tue, 7 Mar 2023 03:56:00 +0000 (19:56 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 22 Nov 2023 23:03:38 +0000 (15:03 -0800)
When a writer thread executes a chain of log intent items for the
realtime volume, the ILOCKs taken during each step are for each rt
metadata file, not the entire rt volume itself.  Although scrub takes
all rt metadata ILOCKs, this isn't sufficient to guard against scrub
checking the rt volume while that writer thread is in the middle of
finishing a chain because there's no higher level locking primitive
guarding the realtime volume.

When there's a collision, cross-referencing between data structures
(e.g. rtrmapbt and rtrefcountbt) yields false corruption events; if
repair is running, this results in incorrect repairs, which is
catastrophic.

Fix this by adding to the mount structure the same drain that we use to
protect scrub against concurrent AG updates, but this time for the
realtime volume.

[Contains a few cleanups from hch]

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
include/xfs_mount.h
libxfs/defer_item.c
libxfs/xfs_rtgroup.c
libxfs/xfs_rtgroup.h

index bb379b45a2e8926b0e3ac364a021e6e4d4301346..d52a2ec1efd3b425ff8aa1bd1309918884c824cd 100644 (file)
@@ -334,6 +334,18 @@ struct xfs_defer_drain { /* empty */ };
 static inline void xfs_perag_intent_hold(struct xfs_perag *pag) {}
 static inline void xfs_perag_intent_rele(struct xfs_perag *pag) {}
 
+struct xfs_rtgroup;
+
+#define xfs_rtgroup_intent_get(mp, rgno) \
+       xfs_rtgroup_get((mp), xfs_rtb_to_rgno((mp), (rgno)))
+#define xfs_rtgroup_intent_put(rtg)            xfs_rtgroup_put(rtg)
+
+static inline void xfs_rtgroup_intent_hold(struct xfs_rtgroup *rtg) { }
+static inline void xfs_rtgroup_intent_rele(struct xfs_rtgroup *rtg) { }
+
+#define xfs_drain_free(dr)             ((void)0)
+#define xfs_drain_init(dr)             ((void)0)
+
 static inline void libxfs_buftarg_drain(struct xfs_buftarg *btp)
 {
        cache_purge(btp->bcache);
index 25ed6a97f88fb893c1de9358460195dc91493770..9e521b497ed961ef1a2ab4d3a285aefba2d230f9 100644 (file)
@@ -97,14 +97,12 @@ xfs_extent_free_defer_add(
        else
                optype = XFS_DEFER_OPS_TYPE_FREE;
 
-       if (xfs_efi_is_realtime(xefi)) {
-               xfs_rgnumber_t          rgno;
-
-               rgno = xfs_rtb_to_rgno(mp, xefi->xefi_startblock);
-               xefi->xefi_rtg = xfs_rtgroup_get(mp, rgno);
-       } else {
-               xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock);
-       }
+       if (xfs_efi_is_realtime(xefi))
+               xefi->xefi_rtg = xfs_rtgroup_intent_get(mp,
+                                               xefi->xefi_startblock);
+       else
+               xefi->xefi_pag = xfs_perag_intent_get(mp,
+                                               xefi->xefi_startblock);
        *dfpp = xfs_defer_add(tp, optype, &xefi->xefi_list);
 }
 
@@ -204,7 +202,7 @@ xfs_rtextent_free_cancel_item(
 {
        struct xfs_extent_free_item     *xefi = xefi_entry(item);
 
-       xfs_rtgroup_put(xefi->xefi_rtg);
+       xfs_rtgroup_intent_put(xefi->xefi_rtg);
        kmem_cache_free(xfs_extfree_item_cache, xefi);
 }
 
@@ -337,13 +335,12 @@ xfs_rmap_defer_add(
         * section updates.
         */
        if (ri->ri_realtime) {
-               xfs_rgnumber_t  rgno;
-
-               rgno = xfs_rtb_to_rgno(mp, ri->ri_bmap.br_startblock);
-               ri->ri_rtg = xfs_rtgroup_get(mp, rgno);
+               ri->ri_rtg = xfs_rtgroup_intent_get(mp,
+                                               ri->ri_bmap.br_startblock);
                optype = XFS_DEFER_OPS_TYPE_RMAP_RT;
        } else {
-               ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_bmap.br_startblock);
+               ri->ri_pag = xfs_perag_intent_get(mp,
+                                               ri->ri_bmap.br_startblock);
                optype = XFS_DEFER_OPS_TYPE_RMAP;
        }
        xfs_defer_add(tp, optype, &ri->ri_list);
@@ -444,7 +441,7 @@ xfs_rtrmap_update_cancel_item(
 {
        struct xfs_rmap_intent          *ri = ri_entry(item);
 
-       xfs_rtgroup_put(ri->ri_rtg);
+       xfs_rtgroup_intent_put(ri->ri_rtg);
        kmem_cache_free(xfs_rmap_intent_cache, ri);
 }
 
@@ -653,10 +650,8 @@ xfs_bmap_update_get_group(
 {
        if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork)) {
                if (xfs_has_rtgroups(mp)) {
-                       xfs_rgnumber_t  rgno;
-
-                       rgno = xfs_rtb_to_rgno(mp, bi->bi_bmap.br_startblock);
-                       bi->bi_rtg = xfs_rtgroup_get(mp, rgno);
+                       bi->bi_rtg = xfs_rtgroup_intent_get(mp,
+                                               bi->bi_bmap.br_startblock);
                } else {
                        bi->bi_rtg = NULL;
                }
@@ -692,8 +687,9 @@ xfs_bmap_update_put_group(
        struct xfs_bmap_intent  *bi)
 {
        if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork)) {
-               if (xfs_has_rtgroups(bi->bi_owner->i_mount))
-                       xfs_rtgroup_put(bi->bi_rtg);
+               if (xfs_has_rtgroups(bi->bi_owner->i_mount)) {
+                       xfs_rtgroup_intent_put(bi->bi_rtg);
+               }
                return;
        }
 
index 5f25ca0ffe68cc183fe8140d872c20adafc62a67..533f1bca4c697129e0a0e7cd7c2c1f74ee78a74d 100644 (file)
@@ -159,6 +159,7 @@ xfs_initialize_rtgroups(
                /* Place kernel structure only init below this point. */
                spin_lock_init(&rtg->rtg_state_lock);
                init_waitqueue_head(&rtg->rtg_active_wq);
+               xfs_defer_drain_init(&rtg->rtg_intents_drain);
 #endif /* __KERNEL__ */
 
                /* Active ref owned by mount indicates rtgroup is online. */
@@ -213,6 +214,7 @@ xfs_free_rtgroups(
                spin_unlock(&mp->m_rtgroup_lock);
                ASSERT(rtg);
                XFS_IS_CORRUPT(mp, atomic_read(&rtg->rtg_ref) != 0);
+               xfs_defer_drain_free(&rtg->rtg_intents_drain);
 
                /* drop the mount's active reference */
                xfs_rtgroup_rele(rtg);
index 81c1a0c249bb199815b615603930a1d367515445..5579c06fe0e96bd7d8dfa4bb1932c21a53a6bce4 100644 (file)
@@ -39,6 +39,15 @@ struct xfs_rtgroup {
 #ifdef __KERNEL__
        /* -- kernel only structures below this line -- */
        spinlock_t              rtg_state_lock;
+
+       /*
+        * We use xfs_drain to track the number of deferred log intent items
+        * that have been queued (but not yet processed) so that waiters (e.g.
+        * scrub) will not lock resources when other threads are in the middle
+        * of processing a chain of intent items only to find momentary
+        * inconsistencies.
+        */
+       struct xfs_defer_drain  rtg_intents_drain;
 #endif /* __KERNEL__ */
 };