]> www.infradead.org Git - users/hch/xfsprogs.git/commitdiff
xfs: allow queued realtime intents to drain before scrubbing
authorDarrick J. Wong <djwong@kernel.org>
Wed, 3 Jul 2024 21:22:19 +0000 (14:22 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 9 Jul 2024 22:37:19 +0000 (15:37 -0700)
When a writer thread executes a chain of log intent items for the
realtime volume, the ILOCKs taken during each step are for each rt
metadata file, not the entire rt volume itself.  Although scrub takes
all rt metadata ILOCKs, this isn't sufficient to guard against scrub
checking the rt volume while that writer thread is in the middle of
finishing a chain because there's no higher level locking primitive
guarding the realtime volume.

When there's a collision, cross-referencing between data structures
(e.g. rtrmapbt and rtrefcountbt) yields false corruption events; if
repair is running, this results in incorrect repairs, which is
catastrophic.

Fix this by adding to the mount structure the same drain that we use to
protect scrub against concurrent AG updates, but this time for the
realtime volume.

[Contains a few cleanups from hch]

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
include/xfs_mount.h
libxfs/defer_item.c
libxfs/xfs_rtgroup.c
libxfs/xfs_rtgroup.h

index 5515c512cbc19d04961c9a7c56f2fe2e191c38aa..4e39963fb165e40d53f5a32473a7f1f330915415 100644 (file)
@@ -331,6 +331,18 @@ struct xfs_defer_drain { /* empty */ };
 static inline void xfs_perag_intent_hold(struct xfs_perag *pag) {}
 static inline void xfs_perag_intent_rele(struct xfs_perag *pag) {}
 
+struct xfs_rtgroup;
+
+#define xfs_rtgroup_intent_get(mp, rgno) \
+       xfs_rtgroup_get((mp), xfs_rtb_to_rgno((mp), (rgno)))
+#define xfs_rtgroup_intent_put(rtg)            xfs_rtgroup_put(rtg)
+
+static inline void xfs_rtgroup_intent_hold(struct xfs_rtgroup *rtg) { }
+static inline void xfs_rtgroup_intent_rele(struct xfs_rtgroup *rtg) { }
+
+#define xfs_drain_free(dr)             ((void)0)
+#define xfs_drain_init(dr)             ((void)0)
+
 static inline void libxfs_buftarg_drain(struct xfs_buftarg *btp)
 {
        cache_purge(btp->bcache);
index d6a0f7276a5bf1754fb43e7cd10003e43bab5dfa..f48c345cddbe3587b7561822c9db1fefd303af16 100644 (file)
@@ -87,11 +87,8 @@ xfs_extent_free_defer_add(
        struct xfs_mount                *mp = tp->t_mountp;
 
        if (xfs_efi_is_realtime(xefi)) {
-               xfs_rgnumber_t          rgno;
-
-               rgno = xfs_rtb_to_rgno(mp, xefi->xefi_startblock);
-               xefi->xefi_rtg = xfs_rtgroup_get(mp, rgno);
-
+               xefi->xefi_rtg = xfs_rtgroup_intent_get(mp,
+                                               xefi->xefi_startblock);
                *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
                                &xfs_rtextent_free_defer_type);
                return;
@@ -203,7 +200,7 @@ xfs_rtextent_free_cancel_item(
 {
        struct xfs_extent_free_item     *xefi = xefi_entry(item);
 
-       xfs_rtgroup_put(xefi->xefi_rtg);
+       xfs_rtgroup_intent_put(xefi->xefi_rtg);
        kmem_cache_free(xfs_extfree_item_cache, xefi);
 }
 
@@ -337,13 +334,12 @@ xfs_rmap_defer_add(
         * section updates.
         */
        if (ri->ri_realtime) {
-               xfs_rgnumber_t  rgno;
-
-               rgno = xfs_rtb_to_rgno(mp, ri->ri_bmap.br_startblock);
-               ri->ri_rtg = xfs_rtgroup_get(mp, rgno);
+               ri->ri_rtg = xfs_rtgroup_intent_get(mp,
+                                               ri->ri_bmap.br_startblock);
                xfs_defer_add(tp, &ri->ri_list, &xfs_rtrmap_update_defer_type);
        } else {
-               ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_bmap.br_startblock);
+               ri->ri_pag = xfs_perag_intent_get(mp,
+                                               ri->ri_bmap.br_startblock);
                xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
        }
 }
@@ -444,7 +440,7 @@ xfs_rtrmap_update_cancel_item(
 {
        struct xfs_rmap_intent          *ri = ri_entry(item);
 
-       xfs_rtgroup_put(ri->ri_rtg);
+       xfs_rtgroup_intent_put(ri->ri_rtg);
        kmem_cache_free(xfs_rmap_intent_cache, ri);
 }
 
@@ -667,10 +663,8 @@ xfs_bmap_update_get_group(
 {
        if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork)) {
                if (xfs_has_rtgroups(mp)) {
-                       xfs_rgnumber_t  rgno;
-
-                       rgno = xfs_rtb_to_rgno(mp, bi->bi_bmap.br_startblock);
-                       bi->bi_rtg = xfs_rtgroup_get(mp, rgno);
+                       bi->bi_rtg = xfs_rtgroup_intent_get(mp,
+                                               bi->bi_bmap.br_startblock);
                } else {
                        bi->bi_rtg = NULL;
                }
@@ -706,8 +700,9 @@ xfs_bmap_update_put_group(
        struct xfs_bmap_intent  *bi)
 {
        if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork)) {
-               if (xfs_has_rtgroups(bi->bi_owner->i_mount))
-                       xfs_rtgroup_put(bi->bi_rtg);
+               if (xfs_has_rtgroups(bi->bi_owner->i_mount)) {
+                       xfs_rtgroup_intent_put(bi->bi_rtg);
+               }
                return;
        }
 
index d94ca72b2e8ff5afa0fb043059d42cdc57843287..a7a227c2a20b1314f12c4bfaa29ef277eae813bb 100644 (file)
@@ -163,6 +163,7 @@ xfs_initialize_rtgroups(
                init_waitqueue_head(&rtg->rtg_active_wq);
                memset(&rtg->lock_class, 0, sizeof(rtg->lock_class));
                lockdep_register_key(&rtg->lock_class);
+               xfs_defer_drain_init(&rtg->rtg_intents_drain);
 #endif /* __KERNEL__ */
 
                /* Active ref owned by mount indicates rtgroup is online. */
@@ -204,6 +205,7 @@ xfs_free_unused_rtgroup_range(
                        break;
 #ifdef __KERNEL__
                lockdep_unregister_key(&rtg->lock_class);
+               xfs_defer_drain_free(&rtg->rtg_intents_drain);
 #endif
                kfree(rtg);
        }
@@ -240,6 +242,7 @@ xfs_free_rtgroups(
                XFS_IS_CORRUPT(mp, atomic_read(&rtg->rtg_ref) != 0);
 #ifdef __KERNEL__
                lockdep_unregister_key(&rtg->lock_class);
+               xfs_defer_drain_free(&rtg->rtg_intents_drain);
 #endif
 
                /* drop the mount's active reference */
index ecafe96af9376b3d592be50facf5dd50270f9668..4f8c7fff7829040d4811765470784d098c927376 100644 (file)
@@ -41,6 +41,15 @@ struct xfs_rtgroup {
        spinlock_t              rtg_state_lock;
 
        struct lock_class_key   lock_class;
+
+       /*
+        * We use xfs_drain to track the number of deferred log intent items
+        * that have been queued (but not yet processed) so that waiters (e.g.
+        * scrub) will not lock resources when other threads are in the middle
+        * of processing a chain of intent items only to find momentary
+        * inconsistencies.
+        */
+       struct xfs_defer_drain  rtg_intents_drain;
 #endif /* __KERNEL__ */
 };