]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: create a shadow rmap btree during realtime rmap repair
authorDarrick J. Wong <djwong@kernel.org>
Mon, 23 Sep 2024 20:42:18 +0000 (13:42 -0700)
committerChristoph Hellwig <hch@lst.de>
Wed, 9 Oct 2024 13:55:47 +0000 (15:55 +0200)
Create an in-memory btree of rmap records instead of an array.  This
enables us to do live record collection instead of freezing the fs.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
fs/xfs/libxfs/xfs_btree_mem.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rtrmap_btree.c
fs/xfs/libxfs/xfs_rtrmap_btree.h
fs/xfs/libxfs/xfs_shared.h
fs/xfs/scrub/rtrmap_repair.c
fs/xfs/xfs_stats.c
fs/xfs/xfs_stats.h

index df3d613675a15aac1413e7a0a1e37b7a5b2eff94..f2f7b4305413e9c45686f8658ae5834a57e5e6fe 100644 (file)
@@ -18,6 +18,7 @@
 #include "xfs_ag.h"
 #include "xfs_buf_item.h"
 #include "xfs_trace.h"
+#include "xfs_rtgroup.h"
 
 /* Set the root of an in-memory btree. */
 void
index 32068c1182e317130ed0320f7df03304108363e6..82637a12a0239d04a5d013a01b39b9bd289345c0 100644 (file)
@@ -329,7 +329,8 @@ xfs_rmap_check_btrec(
        struct xfs_btree_cur            *cur,
        const struct xfs_rmap_irec      *irec)
 {
-       if (xfs_btree_is_rtrmap(cur->bc_ops))
+       if (xfs_btree_is_rtrmap(cur->bc_ops) ||
+           xfs_btree_is_mem_rtrmap(cur->bc_ops))
                return xfs_rtrmap_check_irec(to_rtg(cur->bc_group), irec);
        return xfs_rmap_check_irec(to_perag(cur->bc_group), irec);
 }
index 9c3911ed16a8cef783bf5ffca6ff9aa24462e601..ca1fd1d9eab8cb9987d88ffc34795be78ed4b908 100644 (file)
@@ -28,6 +28,8 @@
 #include "xfs_rtgroup.h"
 #include "xfs_bmap.h"
 #include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
 
 static struct kmem_cache       *xfs_rtrmapbt_cur_cache;
 
@@ -544,6 +546,121 @@ xfs_rtrmapbt_init_cursor(
        return cur;
 }
 
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+/*
+ * Validate an in-memory realtime rmap btree block.  Callers are allowed to
+ * generate an in-memory btree even if the ondisk feature is not enabled.
+ */
+static xfs_failaddr_t
+xfs_rtrmapbt_mem_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       xfs_failaddr_t          fa;
+       unsigned int            level;
+       unsigned int            maxrecs;
+
+       if (!xfs_verify_magic(bp, block->bb_magic))
+               return __this_address;
+
+       fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+       if (fa)
+               return fa;
+
+       level = be16_to_cpu(block->bb_level);
+       if (xfs_has_rmapbt(mp)) {
+               if (level >= mp->m_rtrmap_maxlevels)
+                       return __this_address;
+       } else {
+               if (level >= xfs_rtrmapbt_maxlevels_ondisk())
+                       return __this_address;
+       }
+
+       maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
+       return xfs_btree_memblock_verify(bp, maxrecs);
+}
+
+static void
+xfs_rtrmapbt_mem_rw_verify(
+       struct xfs_buf  *bp)
+{
+       xfs_failaddr_t  fa = xfs_rtrmapbt_mem_verify(bp);
+
+       if (fa)
+               xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+/* skip crc checks on in-memory btrees to save time */
+static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = {
+       .name                   = "xfs_rtrmapbt_mem",
+       .magic                  = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
+       .verify_read            = xfs_rtrmapbt_mem_rw_verify,
+       .verify_write           = xfs_rtrmapbt_mem_rw_verify,
+       .verify_struct          = xfs_rtrmapbt_mem_verify,
+};
+
+const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
+       .type                   = XFS_BTREE_TYPE_MEM,
+       .geom_flags             = XFS_BTGEO_OVERLAPPING,
+
+       .rec_len                = sizeof(struct xfs_rmap_rec),
+       /* Overlapping btree; 2 keys per pointer. */
+       .key_len                = 2 * sizeof(struct xfs_rmap_key),
+       .ptr_len                = XFS_BTREE_LONG_PTR_LEN,
+
+       .lru_refs               = XFS_RMAP_BTREE_REF,
+       .statoff                = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2),
+
+       .dup_cursor             = xfbtree_dup_cursor,
+       .set_root               = xfbtree_set_root,
+       .alloc_block            = xfbtree_alloc_block,
+       .free_block             = xfbtree_free_block,
+       .get_minrecs            = xfbtree_get_minrecs,
+       .get_maxrecs            = xfbtree_get_maxrecs,
+       .init_key_from_rec      = xfs_rtrmapbt_init_key_from_rec,
+       .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
+       .init_rec_from_cur      = xfs_rtrmapbt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfbtree_init_ptr_from_cur,
+       .key_diff               = xfs_rtrmapbt_key_diff,
+       .buf_ops                = &xfs_rtrmapbt_mem_buf_ops,
+       .diff_two_keys          = xfs_rtrmapbt_diff_two_keys,
+       .keys_inorder           = xfs_rtrmapbt_keys_inorder,
+       .recs_inorder           = xfs_rtrmapbt_recs_inorder,
+       .keys_contiguous        = xfs_rtrmapbt_keys_contiguous,
+};
+
+/* Create a cursor for an in-memory btree. */
+struct xfs_btree_cur *
+xfs_rtrmapbt_mem_cursor(
+       struct xfs_rtgroup      *rtg,
+       struct xfs_trans        *tp,
+       struct xfbtree          *xfbt)
+{
+       struct xfs_mount        *mp = rtg_mount(rtg);
+       struct xfs_btree_cur    *cur;
+
+       cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops,
+                       mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
+       cur->bc_mem.xfbtree = xfbt;
+       cur->bc_nlevels = xfbt->nlevels;
+       cur->bc_group = xfs_group_hold(&rtg->rtg_group);
+       return cur;
+}
+
+/* Create an in-memory realtime rmap btree. */
+int
+xfs_rtrmapbt_mem_init(
+       struct xfs_mount        *mp,
+       struct xfbtree          *xfbt,
+       struct xfs_buftarg      *btp,
+       xfs_rgnumber_t          rgno)
+{
+       xfbt->owner = rgno;
+       return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops);
+}
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
 /*
  * Install a new rt reverse mapping btree root.  Caller is responsible for
  * invalidating and freeing the old btree blocks.
index 6e3dab8c44f7c25949b8f7950fc4fa7a6caa0b5e..6a2d432b55ad78ecf238f2295189a805af3cd525 100644 (file)
@@ -11,6 +11,7 @@ struct xfs_btree_cur;
 struct xfs_mount;
 struct xbtree_ifakeroot;
 struct xfs_rtgroup;
+struct xfbtree;
 
 /* rmaps only exist on crc enabled filesystems */
 #define XFS_RTRMAP_BLOCK_LEN   XFS_BTREE_LBLOCK_CRC_LEN
@@ -201,4 +202,9 @@ int xfs_rtrmapbt_init_rtsb(struct xfs_mount *mp, struct xfs_rtgroup *rtg,
 unsigned long long xfs_rtrmapbt_calc_size(struct xfs_mount *mp,
                unsigned long long len);
 
+struct xfs_btree_cur *xfs_rtrmapbt_mem_cursor(struct xfs_rtgroup *rtg,
+               struct xfs_trans *tp, struct xfbtree *xfbtree);
+int xfs_rtrmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
+               struct xfs_buftarg *btp, xfs_rgnumber_t rgno);
+
 #endif /* __XFS_RTRMAP_BTREE_H__ */
index da23dac22c3f08238c99f037b08cbb5ab0a99d52..960716c387cc2baf5bd6d665ed19cee5470fb362 100644 (file)
@@ -57,6 +57,7 @@ extern const struct xfs_btree_ops xfs_refcountbt_ops;
 extern const struct xfs_btree_ops xfs_rmapbt_ops;
 extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
 extern const struct xfs_btree_ops xfs_rtrmapbt_ops;
+extern const struct xfs_btree_ops xfs_rtrmapbt_mem_ops;
 
 static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
 {
@@ -98,8 +99,14 @@ static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
 {
        return ops == &xfs_rmapbt_mem_ops;
 }
+
+static inline bool xfs_btree_is_mem_rtrmap(const struct xfs_btree_ops *ops)
+{
+       return ops == &xfs_rtrmapbt_mem_ops;
+}
 #else
 # define xfs_btree_is_mem_rmap(...)    (false)
+# define xfs_btree_is_mem_rtrmap(...)  (false)
 #endif
 
 static inline bool xfs_btree_is_rtrmap(const struct xfs_btree_ops *ops)
index 2434d9c30bd2646b0b2b18d4ca35c9ad2392873e..ab9b5ae49d7927c300c6b72266082ab8419e793c 100644 (file)
@@ -12,6 +12,8 @@
 #include "xfs_defer.h"
 #include "xfs_btree.h"
 #include "xfs_btree_staging.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
 #include "xfs_bit.h"
 #include "xfs_log_format.h"
 #include "xfs_trans.h"
  * We use the 'xrep_rtrmap' prefix for all the rmap functions.
  */
 
-/*
- * Packed rmap record.  The UNWRITTEN flags are hidden in the upper bits of
- * offset, just like the on-disk record.
- */
-struct xrep_rtrmap_extent {
-       xfs_rgblock_t   startblock;
-       xfs_extlen_t    blockcount;
-       uint64_t        owner;
-       uint64_t        offset;
-} __packed;
-
 /* Context for collecting rmaps */
 struct xrep_rtrmap {
        /* new rtrmapbt information */
        struct xrep_newbt       new_btree;
 
        /* rmap records generated from primary metadata */
-       struct xfarray          *rtrmap_records;
+       struct xfbtree          rtrmap_btree;
 
        struct xfs_scrub        *sc;
 
@@ -91,8 +82,11 @@ struct xrep_rtrmap {
        /* inode scan cursor */
        struct xchk_iscan       iscan;
 
-       /* get_records()'s position in the free space record array. */
-       xfarray_idx_t           array_cur;
+       /* in-memory btree cursor for the ->get_blocks walk */
+       struct xfs_btree_cur    *mcur;
+
+       /* Number of records we're staging in the new btree. */
+       uint64_t                nr_records;
 };
 
 /* Set us up to repair rt reverse mapping btrees. */
@@ -101,6 +95,14 @@ xrep_setup_rtrmapbt(
        struct xfs_scrub        *sc)
 {
        struct xrep_rtrmap      *rr;
+       char                    *descr;
+       int                     error;
+
+       descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
+       error = xrep_setup_xfbtree(sc, descr);
+       kfree(descr);
+       if (error)
+               return error;
 
        rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
        if (!rr)
@@ -135,11 +137,6 @@ xrep_rtrmap_stash(
        uint64_t                offset,
        unsigned int            flags)
 {
-       struct xrep_rtrmap_extent       rre = {
-               .startblock     = startblock,
-               .blockcount     = blockcount,
-               .owner          = owner,
-       };
        struct xfs_rmap_irec    rmap = {
                .rm_startblock  = startblock,
                .rm_blockcount  = blockcount,
@@ -148,6 +145,7 @@ xrep_rtrmap_stash(
                .rm_flags       = flags,
        };
        struct xfs_scrub        *sc = rr->sc;
+       struct xfs_btree_cur    *mcur;
        int                     error = 0;
 
        if (xchk_should_terminate(sc, &error))
@@ -155,8 +153,18 @@ xrep_rtrmap_stash(
 
        trace_xrep_rtrmap_found(sc->mp, &rmap);
 
-       rre.offset = xfs_rmap_irec_offset_pack(&rmap);
-       return xfarray_append(rr->rtrmap_records, &rre);
+       /* Add entry to in-memory btree. */
+       mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
+       error = xfs_rmap_map_raw(mcur, &rmap);
+       xfs_btree_del_cursor(mcur, error);
+       if (error)
+               goto out_cancel;
+
+       return xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
+
+out_cancel:
+       xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
+       return error;
 }
 
 /* Finding all file and bmbt extents. */
@@ -395,6 +403,24 @@ xrep_rtrmap_scan_ag(
        return error;
 }
 
+/* Count and check all collected records. */
+STATIC int
+xrep_rtrmap_check_record(
+       struct xfs_btree_cur            *cur,
+       const struct xfs_rmap_irec      *rec,
+       void                            *priv)
+{
+       struct xrep_rtrmap              *rr = priv;
+       int                             error;
+
+       error = xrep_rtrmap_check_mapping(rr->sc, rec);
+       if (error)
+               return error;
+
+       rr->nr_records++;
+       return 0;
+}
+
 /* Generate all the reverse-mappings for the realtime device. */
 STATIC int
 xrep_rtrmap_find_rmaps(
@@ -403,6 +429,7 @@ xrep_rtrmap_find_rmaps(
        struct xfs_scrub        *sc = rr->sc;
        struct xfs_perag        *pag = NULL;
        struct xfs_inode        *ip;
+       struct xfs_btree_cur    *mcur;
        int                     error;
 
        /* Generate rmaps for the realtime superblock */
@@ -468,7 +495,19 @@ xrep_rtrmap_find_rmaps(
                }
        }
 
-       return 0;
+       /*
+        * Now that we have everything locked again, we need to count the
+        * number of rmap records stashed in the btree.  This should reflect
+        * all actively-owned rt files in the filesystem.  At the same time,
+        * check all our records before we start building a new btree, which
+        * requires the rtbitmap lock.
+        */
+       mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
+       rr->nr_records = 0;
+       error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
+       xfs_btree_del_cursor(mcur, error);
+
+       return error;
 }
 
 /* Building the new rtrmap btree. */
@@ -482,29 +521,25 @@ xrep_rtrmap_get_records(
        unsigned int                    nr_wanted,
        void                            *priv)
 {
-       struct xrep_rtrmap_extent       rec;
-       struct xfs_rmap_irec            *irec = &cur->bc_rec.r;
        struct xrep_rtrmap              *rr = priv;
        union xfs_btree_rec             *block_rec;
        unsigned int                    loaded;
        int                             error;
 
        for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
-               error = xfarray_load_next(rr->rtrmap_records, &rr->array_cur,
-                               &rec);
+               int                     stat = 0;
+
+               error = xfs_btree_increment(rr->mcur, 0, &stat);
                if (error)
                        return error;
-
-               irec->rm_startblock = rec.startblock;
-               irec->rm_blockcount = rec.blockcount;
-               irec->rm_owner = rec.owner;
-
-               if (xfs_rmap_irec_offset_unpack(rec.offset, irec) != NULL)
+               if (!stat)
                        return -EFSCORRUPTED;
 
-               error = xrep_rtrmap_check_mapping(rr->sc, irec);
+               error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
                if (error)
                        return error;
+               if (!stat)
+                       return -EFSCORRUPTED;
 
                block_rec = xfs_btree_rec_addr(cur, idx, block);
                cur->bc_ops->init_rec_from_cur(cur, block_rec);
@@ -549,7 +584,6 @@ xrep_rtrmap_build_new_tree(
        struct xfs_scrub        *sc = rr->sc;
        struct xfs_rtgroup      *rtg = sc->sr.rtg;
        struct xfs_btree_cur    *rmap_cur;
-       uint64_t                nr_records;
        int                     error;
 
        /*
@@ -569,11 +603,9 @@ xrep_rtrmap_build_new_tree(
        rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
        xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
 
-       nr_records = xfarray_length(rr->rtrmap_records);
-
        /* Compute how many blocks we'll need for the rmaps collected. */
        error = xfs_btree_bload_compute_geometry(rmap_cur,
-                       &rr->new_btree.bload, nr_records);
+                       &rr->new_btree.bload, rr->nr_records);
        if (error)
                goto err_cur;
 
@@ -600,12 +632,20 @@ xrep_rtrmap_build_new_tree(
        if (error)
                goto err_cur;
 
+       /*
+        * Create a cursor to the in-memory btree so that we can bulk load the
+        * new btree.
+        */
+       rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
+       error = xfs_btree_goto_left_edge(rr->mcur);
+       if (error)
+               goto err_mcur;
+
        /* Add all observed rmap records. */
        rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_RMAP;
-       rr->array_cur = XFARRAY_CURSOR_INIT;
        error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
        if (error)
-               goto err_cur;
+               goto err_mcur;
 
        /*
         * Install the new rtrmap btree in the inode.  After this point the old
@@ -615,6 +655,14 @@ xrep_rtrmap_build_new_tree(
        xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
        xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
        xfs_btree_del_cursor(rmap_cur, 0);
+       xfs_btree_del_cursor(rr->mcur, 0);
+       rr->mcur = NULL;
+
+       /*
+        * Now that we've written the new btree to disk, we don't need to keep
+        * updating the in-memory btree.  Abort the scan to stop live updates.
+        */
+       xchk_iscan_abort(&rr->iscan);
 
        /* Dispose of any unused blocks and the accounting information. */
        error = xrep_newbt_commit(&rr->new_btree);
@@ -623,6 +671,8 @@ xrep_rtrmap_build_new_tree(
 
        return xrep_roll_trans(sc);
 
+err_mcur:
+       xfs_btree_del_cursor(rr->mcur, error);
 err_cur:
        xfs_btree_del_cursor(rmap_cur, error);
        xrep_newbt_cancel(&rr->new_btree);
@@ -659,16 +709,13 @@ xrep_rtrmap_setup_scan(
        struct xrep_rtrmap      *rr)
 {
        struct xfs_scrub        *sc = rr->sc;
-       char                    *descr;
        int                     error;
 
        xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
 
        /* Set up some storage */
-       descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
-       error = xfarray_create(descr, 0, sizeof(struct xrep_rtrmap_extent),
-                       &rr->rtrmap_records);
-       kfree(descr);
+       error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
+                       rtg_rgno(sc->sr.rtg));
        if (error)
                goto out_bitmap;
 
@@ -687,7 +734,7 @@ xrep_rtrmap_teardown(
        struct xrep_rtrmap      *rr)
 {
        xchk_iscan_teardown(&rr->iscan);
-       xfarray_destroy(rr->rtrmap_records);
+       xfbtree_destroy(&rr->rtrmap_btree);
        xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
 }
 
index f8a650b4bc1d72130ec793ada3095bf3571d8d16..6355d8e2a6d2cced487635fc75bb9291d5065d1a 100644 (file)
@@ -53,7 +53,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
                { "refcntbt",           xfsstats_offset(xs_rmap_mem_2)  },
                { "rmapbt_mem",         xfsstats_offset(xs_rcbag_2)     },
                { "rcbagbt",            xfsstats_offset(xs_rtrmap_2)    },
-               { "rtrmapbt",           xfsstats_offset(xs_qm_dqreclaims)},
+               { "rtrmapbt",           xfsstats_offset(xs_rtrmap_mem_2)},
+               { "rtrmapbt_mem",       xfsstats_offset(xs_qm_dqreclaims)},
                /* we print both series of quota information together */
                { "qm",                 xfsstats_offset(xs_xstrat_bytes)},
        };
index 05dc69c6d949061110df340a432963fab3d7eed7..9c47de5dff2dd63a4c6f6e216b5bd6c9c04bac8d 100644 (file)
@@ -128,6 +128,7 @@ struct __xfsstats {
        uint32_t                xs_rmap_mem_2[__XBTS_MAX];
        uint32_t                xs_rcbag_2[__XBTS_MAX];
        uint32_t                xs_rtrmap_2[__XBTS_MAX];
+       uint32_t                xs_rtrmap_mem_2[__XBTS_MAX];
        uint32_t                xs_qm_dqreclaims;
        uint32_t                xs_qm_dqreclaim_misses;
        uint32_t                xs_qm_dquot_dups;