]> www.infradead.org Git - users/hch/xfsprogs.git/commitdiff
xfs_repair: use realtime refcount btree data to check block types
authorDarrick J. Wong <djwong@kernel.org>
Thu, 15 Aug 2024 18:58:30 +0000 (11:58 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Fri, 16 Aug 2024 21:57:44 +0000 (14:57 -0700)
Use the realtime refcount btree to pre-populate the block type information
so that when repair iterates the primary metadata, we can confirm the
block type.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
repair/dinode.c
repair/rt.h
repair/scan.c
repair/scan.h

index 1a82c9e25a15ff51d70a465cb3f19daca69a0b80..a02efcb94dd6e7e53edd1aa926d18531455eaa1c 100644 (file)
@@ -308,6 +308,8 @@ _("bad state in rt extent map %" PRIu64 "\n"),
                        break;
                case XR_E_INUSE:
                case XR_E_MULT:
+                       if (xfs_has_rtreflink(mp))
+                               break;
                        set_rtbmap(ext, XR_E_MULT);
                        break;
                case XR_E_FREE1:
@@ -382,6 +384,8 @@ _("data fork in rt inode %" PRIu64 " found rt metadata extent %" PRIu64 " in rt
                        return 1;
                case XR_E_INUSE:
                case XR_E_MULT:
+                       if (xfs_has_rtreflink(mp))
+                               break;
                        do_warn(
 _("data fork in rt inode %" PRIu64 " claims used rt extent %" PRIu64 "\n"),
                                ino, b);
@@ -1067,6 +1071,150 @@ _("bad rtrmap btree ptr 0x%" PRIx64 " in ino %" PRIu64 "\n"),
        return suspect ? 1 : 0;
 }
 
+/*
+ * return 1 if inode should be cleared, 0 otherwise
+ */
+static int
+process_rtrefc(
+       struct xfs_mount                *mp,
+       xfs_agnumber_t                  agno,
+       xfs_agino_t                     ino,
+       struct xfs_dinode               *dip,
+       int                             type,
+       int                             *dirty,
+       xfs_rfsblock_t                  *tot,
+       uint64_t                        *nex,
+       blkmap_t                        **blkmapp,
+       int                             check_dups)
+{
+       struct refc_priv                priv = { .nr_blocks = 0 };
+       struct xfs_rtrefcount_root      *dib;
+       xfs_rtrefcount_ptr_t            *pp;
+       struct xfs_refcount_key         *kp;
+       struct xfs_refcount_rec         *rp;
+       char                            *forkname = get_forkname(XFS_DATA_FORK);
+       xfs_rgblock_t                   oldkey, key;
+       xfs_ino_t                       lino;
+       xfs_fsblock_t                   bno;
+       size_t                          droot_sz;
+       int                             i;
+       int                             level;
+       int                             numrecs;
+       int                             dmxr;
+       int                             suspect = 0;
+       int                             error;
+
+       /* We rebuild the rtrefcountbt, so no need to process blocks again. */
+       if (check_dups) {
+               *tot = be64_to_cpu(dip->di_nblocks);
+               return 0;
+       }
+
+       lino = XFS_AGINO_TO_INO(mp, agno, ino);
+
+       /*
+        * This refcount btree inode must be a metadata inode reachable via
+        * /rtgroups/$rgno.refcount in the metadata directory tree.
+        */
+       if (!(dip->di_flags2 & be64_to_cpu(XFS_DIFLAG2_METADATA))) {
+               do_warn(
+_("rtrefcount inode %" PRIu64 " not flagged as metadata\n"),
+                       lino);
+               return 1;
+       }
+
+       if (!is_rtrefcount_inode(lino)) {
+               do_warn(
+_("could not associate refcount inode %" PRIu64 " with any rtgroup\n"),
+                       lino);
+               return 1;
+       }
+
+       priv.rgno = (xfs_rgnumber_t)be16_to_cpu(dip->di_projid_hi) << 16 |
+                                   be16_to_cpu(dip->di_projid_lo);
+
+       dib = (struct xfs_rtrefcount_root *)XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+       *tot = 0;
+       *nex = 0;
+
+       level = be16_to_cpu(dib->bb_level);
+       numrecs = be16_to_cpu(dib->bb_numrecs);
+
+       if (level > mp->m_rtrefc_maxlevels) {
+               do_warn(
+_("bad level %d in inode %" PRIu64 " rtrefcount btree root block\n"),
+                       level, lino);
+               return 1;
+       }
+
+       /*
+        * use rtroot/dfork_dsize since the root block is in the data fork
+        */
+       droot_sz = xfs_rtrefcount_droot_space_calc(level, numrecs);
+       if (droot_sz > XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK)) {
+               do_warn(
+_("computed size of rtrefcountbt root (%zu bytes) is greater than space in "
+         "inode %" PRIu64 " %s fork\n"),
+                               droot_sz, lino, forkname);
+               return 1;
+       }
+
+       if (level == 0) {
+               rp = xfs_rtrefcount_droot_rec_addr(dib, 1);
+               error = process_rtrefc_reclist(mp, rp, numrecs,
+                               &priv, "rtrefcountbt root");
+               if (error) {
+                       refcount_avoid_check();
+                       return 1;
+               }
+               return 0;
+       }
+
+       dmxr = libxfs_rtrefcountbt_droot_maxrecs(
+                       XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK), false);
+       pp = xfs_rtrefcount_droot_ptr_addr(dib, 1, dmxr);
+
+       /* check for in-order keys */
+       for (i = 0; i < numrecs; i++)  {
+               kp = xfs_rtrefcount_droot_key_addr(dib, i + 1);
+
+               key = be32_to_cpu(kp->rc_startblock);
+               if (i == 0) {
+                       oldkey = key;
+                       continue;
+               }
+               if (key < oldkey) {
+                       do_warn(
+_("out of order key %u in rtrefcount root ino %" PRIu64 "\n"),
+                               i, lino);
+                       suspect++;
+                       continue;
+               }
+               oldkey = key;
+       }
+
+       /* probe keys */
+       for (i = 0; i < numrecs; i++)  {
+               bno = get_unaligned_be64(&pp[i]);
+
+               if (!libxfs_verify_fsbno(mp, bno))  {
+                       do_warn(
+_("bad rtrefcount btree ptr 0x%" PRIx64 " in ino %" PRIu64 "\n"),
+                               bno, lino);
+                       return 1;
+               }
+
+               if (scan_lbtree(bno, level, scan_rtrefcbt,
+                               type, XFS_DATA_FORK, lino, tot, nex, blkmapp,
+                               NULL, 0, 1, check_dups, XFS_RTREFC_CRC_MAGIC,
+                               &priv, &xfs_rtrefcountbt_buf_ops))
+                       return 1;
+       }
+
+       *tot = priv.nr_blocks;
+       return suspect ? 1 : 0;
+}
+
 /*
  * return 1 if inode should be cleared, 0 otherwise
  */
@@ -1864,6 +2012,7 @@ check_dinode_mode_format(
                case XFS_DINODE_FMT_RMAP:
                case XFS_DINODE_FMT_EXTENTS:
                case XFS_DINODE_FMT_BTREE:
+               case XFS_DINODE_FMT_REFCOUNT:
                        return 0;
                }
                return -1;
@@ -2308,6 +2457,10 @@ retry:
                err = process_rtrmap(mp, agno, ino, dino, type, dirty,
                                totblocks, nextents, dblkmap, check_dups);
                break;
+       case XFS_DINODE_FMT_REFCOUNT:
+               err = process_rtrefc(mp, agno, ino, dino, type, dirty,
+                       totblocks, nextents, dblkmap, check_dups);
+               break;
        case XFS_DINODE_FMT_DEV:
                err = 0;
                break;
@@ -2368,6 +2521,7 @@ _("would have tried to rebuild inode %"PRIu64" data fork\n"),
                        break;
                case XFS_DINODE_FMT_DEV:
                case XFS_DINODE_FMT_RMAP:
+               case XFS_DINODE_FMT_REFCOUNT:
                        err = 0;
                        break;
                default:
index 13558706e4ec150b9c192267265c66086294742b..e4f3d5d9af3188d400d9f3c263f1dd5db6c70a77 100644 (file)
@@ -33,6 +33,10 @@ static inline bool is_rtrmap_inode(xfs_ino_t ino)
 {
        return is_rtgroup_inode(ino, XFS_RTGI_RMAP);
 }
+static inline bool is_rtrefcount_inode(xfs_ino_t ino)
+{
+       return is_rtgroup_inode(ino, XFS_RTGI_REFCOUNT);
+}
 
 void mark_rtgroup_inodes_bad(struct xfs_mount *mp, enum xfs_rtg_inodes type);
 bool rtgroup_inodes_were_bad(enum xfs_rtg_inodes type);
index e0ecd922ef4eb43980006e80ac9e6db9dbd1e9b3..ecb43e6452b26d2d2a90522d7014a4d613d9ad22 100644 (file)
@@ -1745,12 +1745,6 @@ out:
        return 0;
 }
 
-struct refc_priv {
-       struct xfs_refcount_irec        last_rec;
-       xfs_agblock_t                   nr_blocks;
-};
-
-
 static void
 scan_refcbt(
        struct xfs_btree_block  *block,
@@ -1990,6 +1984,303 @@ out:
        return;
 }
 
+
+int
+process_rtrefc_reclist(
+       struct xfs_mount        *mp,
+       struct xfs_refcount_rec *rp,
+       int                     numrecs,
+       struct refc_priv        *refc_priv,
+       const char              *name)
+{
+       xfs_rgnumber_t          rgno = refc_priv->rgno;
+       xfs_rtblock_t           lastblock = 0;
+       xfs_rtblock_t           rtbno, next_rtbno;
+       int                     state;
+       int                     suspect = 0;
+       int                     i;
+
+       for (i = 0; i < numrecs; i++) {
+               enum xfs_refc_domain    domain;
+               xfs_rgblock_t           b, rgbno, end;
+               xfs_extlen_t            len;
+               xfs_nlink_t             nr;
+
+               b = rgbno = be32_to_cpu(rp[i].rc_startblock);
+               len = be32_to_cpu(rp[i].rc_blockcount);
+               nr = be32_to_cpu(rp[i].rc_refcount);
+
+               if (b & XFS_REFC_COWFLAG) {
+                       domain = XFS_REFC_DOMAIN_COW;
+                       rgbno &= ~XFS_REFC_COWFLAG;
+               } else {
+                       domain = XFS_REFC_DOMAIN_SHARED;
+               }
+
+               if (domain == XFS_REFC_DOMAIN_COW && nr != 1) {
+                       do_warn(
+_("leftover rt CoW extent has incorrect refcount in record %u of %s\n"),
+                                       i, name);
+                       suspect++;
+               }
+               if (nr == 1) {
+                       if (domain != XFS_REFC_DOMAIN_COW) {
+                               do_warn(
+_("leftover rt CoW extent has invalid startblock in record %u of %s\n"),
+                                       i, name);
+                               suspect++;
+                       }
+               }
+               end = rgbno + len;
+
+               rtbno = xfs_rgbno_to_rtb(mp, rgno, rgbno);
+               if (!libxfs_verify_rtbno(mp, rtbno)) {
+                       do_warn(
+_("invalid start block %llu in record %u of %s\n"),
+                                       (unsigned long long)b, i, name);
+                       suspect++;
+                       continue;
+               }
+
+               next_rtbno = xfs_rgbno_to_rtb(mp, rgno, end);
+               if (len == 0 || end <= rgbno ||
+                   !libxfs_verify_rtbno(mp, next_rtbno - 1)) {
+                       do_warn(
+_("invalid length %llu in record %u of %s\n"),
+                                       (unsigned long long)len, i, name);
+                       suspect++;
+                       continue;
+               }
+
+               if (nr < 2 || nr > XFS_REFC_REFCOUNT_MAX) {
+                       do_warn(
+_("invalid rt reference count %u in record %u of %s\n"),
+                                       nr, i, name);
+                       suspect++;
+                       continue;
+               }
+
+               if (nr == 1) {
+                       xfs_rgblock_t           b;
+                       xfs_extlen_t            blen;
+
+                       for (b = rgbno; b < end; b += len) {
+                               state = get_bmap_ext(rgno, b, end, &blen, true);
+                               blen = min(blen, len);
+
+                               switch (state) {
+                               case XR_E_UNKNOWN:
+                               case XR_E_COW:
+                                       do_warn(
+_("leftover CoW rtextent (%llu)\n"),
+                                               (unsigned long long)rgbno);
+                                       set_bmap_ext(rgno, b, len, XR_E_FREE,
+                                                       true);
+                                       break;
+                               default:
+                                       do_warn(
+_("rtextent (%llu) claimed, state is %d\n"),
+                                               (unsigned long long)rgbno, state);
+                                       break;
+                               }
+                               suspect++;
+                       }
+               }
+
+               if (b && b <= lastblock) {
+                       do_warn(_(
+"out-of-order %s btree record %d (%llu %llu) in %s\n"),
+                                       name, i, (unsigned long long)b,
+                                       (unsigned long long)len, name);
+                       suspect++;
+               } else {
+                       lastblock = end - 1;
+               }
+
+               /* Is this record mergeable with the last one? */
+               if (refc_priv->last_rec.rc_domain == domain &&
+                   refc_priv->last_rec.rc_startblock +
+                   refc_priv->last_rec.rc_blockcount == rgbno &&
+                   refc_priv->last_rec.rc_refcount == nr) {
+                       do_warn(
+_("record %d of %s tree should be merged with previous record\n"),
+                                       i, name);
+                       suspect++;
+                       refc_priv->last_rec.rc_blockcount += len;
+               } else {
+                       refc_priv->last_rec.rc_domain = domain;
+                       refc_priv->last_rec.rc_startblock = rgbno;
+                       refc_priv->last_rec.rc_blockcount = len;
+                       refc_priv->last_rec.rc_refcount = nr;
+               }
+
+               /* XXX: probably want to mark the reflinked areas? */
+       }
+
+       return suspect;
+}
+
+int
+scan_rtrefcbt(
+       struct xfs_btree_block          *block,
+       int                             level,
+       int                             type,
+       int                             whichfork,
+       xfs_fsblock_t                   fsbno,
+       xfs_ino_t                       ino,
+       xfs_rfsblock_t                  *tot,
+       uint64_t                        *nex,
+       struct blkmap                   **blkmapp,
+       bmap_cursor_t                   *bm_cursor,
+       int                             suspect,
+       int                             isroot,
+       int                             check_dups,
+       int                             *dirty,
+       uint64_t                        magic,
+       void                            *priv)
+{
+       const char                      *name = "rtrefcount";
+       char                            rootname[256];
+       int                             i;
+       xfs_rtrefcount_ptr_t            *pp;
+       struct xfs_refcount_rec *rp;
+       struct refc_priv                *refc_priv = priv;
+       int                             hdr_errors = 0;
+       int                             numrecs;
+       int                             state;
+       xfs_agnumber_t                  agno;
+       xfs_agblock_t                   agbno;
+       int                             error;
+
+       agno = XFS_FSB_TO_AGNO(mp, fsbno);
+       agbno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+       if (magic != XFS_RTREFC_CRC_MAGIC) {
+               name = "(unknown)";
+               hdr_errors++;
+               suspect++;
+               goto out;
+       }
+
+       if (be32_to_cpu(block->bb_magic) != magic) {
+               do_warn(_("bad magic # %#x in %s btree block %d/%d\n"),
+                               be32_to_cpu(block->bb_magic), name, agno,
+                               agbno);
+               hdr_errors++;
+               if (suspect)
+                       goto out;
+       }
+
+       if (be16_to_cpu(block->bb_level) != level) {
+               do_warn(_("expected level %d got %d in %s btree block %d/%d\n"),
+                               level, be16_to_cpu(block->bb_level), name,
+                               agno, agbno);
+               hdr_errors++;
+               if (suspect)
+                       goto out;
+       }
+
+       refc_priv->nr_blocks++;
+
+       /*
+        * Check for btree blocks multiply claimed.  We're going to regenerate
+        * the btree anyway, so mark the blocks as metadata so they get freed.
+        */
+       state = get_bmap(agno, agbno);
+       if (!(state == XR_E_UNKNOWN || state == XR_E_INUSE1))  {
+               do_warn(
+_("%s btree block claimed (state %d), agno %d, agbno %d, suspect %d\n"),
+                               name, state, agno, agbno, suspect);
+               goto out;
+       }
+       set_bmap(agno, agbno, XR_E_METADATA);
+
+       numrecs = be16_to_cpu(block->bb_numrecs);
+       if (level == 0) {
+               if (numrecs > mp->m_rtrefc_mxr[0])  {
+                       numrecs = mp->m_rtrefc_mxr[0];
+                       hdr_errors++;
+               }
+               if (isroot == 0 && numrecs < mp->m_rtrefc_mnr[0])  {
+                       numrecs = mp->m_rtrefc_mnr[0];
+                       hdr_errors++;
+               }
+
+               if (hdr_errors) {
+                       do_warn(
+       _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
+                                       be16_to_cpu(block->bb_numrecs),
+                                       mp->m_rtrefc_mnr[0],
+                                       mp->m_rtrefc_mxr[0], name, agno, agbno);
+                       suspect++;
+               }
+
+               rp = xfs_rtrefcount_rec_addr(block, 1);
+               snprintf(rootname, 256, "%s btree block %u/%u", name, agno,
+                               agbno);
+               error = process_rtrefc_reclist(mp, rp, numrecs, refc_priv,
+                               rootname);
+               if (error)
+                       suspect++;
+               goto out;
+       }
+
+       /*
+        * interior record
+        */
+       pp = xfs_rtrefcount_ptr_addr(block, 1, mp->m_rtrefc_mxr[1]);
+
+       if (numrecs > mp->m_rtrefc_mxr[1])  {
+               numrecs = mp->m_rtrefc_mxr[1];
+               hdr_errors++;
+       }
+       if (isroot == 0 && numrecs < mp->m_rtrefc_mnr[1])  {
+               numrecs = mp->m_rtrefc_mnr[1];
+               hdr_errors++;
+       }
+
+       /*
+        * don't pass bogus tree flag down further if this block
+        * looked ok.  bail out if two levels in a row look bad.
+        */
+       if (hdr_errors)  {
+               do_warn(
+       _("bad btree nrecs (%u, min=%u, max=%u) in %s btree block %u/%u\n"),
+                               be16_to_cpu(block->bb_numrecs),
+                               mp->m_rtrefc_mnr[1], mp->m_rtrefc_mxr[1], name,
+                               agno, agbno);
+               if (suspect)
+                       goto out;
+               suspect++;
+       } else if (suspect) {
+               suspect = 0;
+       }
+
+       for (i = 0; i < numrecs; i++)  {
+               xfs_fsblock_t           pbno = be64_to_cpu(pp[i]);
+
+               if (!libxfs_verify_fsbno(mp, pbno)) {
+                       do_warn(
+       _("bad btree pointer (%u) in %sbt block %u/%u\n"),
+                                       agbno, name, agno, agbno);
+                       suspect++;
+                       return 0;
+               }
+
+               scan_lbtree(pbno, level, scan_rtrefcbt, type, whichfork, ino,
+                               tot, nex, blkmapp, bm_cursor, suspect, 0,
+                               check_dups, magic, refc_priv,
+                               &xfs_rtrefcountbt_buf_ops);
+       }
+out:
+       if (suspect) {
+               refcount_avoid_check();
+               return 1;
+       }
+
+       return 0;
+}
+
 /*
  * The following helpers are to help process and validate individual on-disk
  * inode btree records. We have two possible inode btrees with slightly
index a624c882734c7700846314c7686d623aa5621d85..1643a2397aeaf55874457417684d91f6e601e459 100644 (file)
@@ -100,4 +100,37 @@ int scan_rtrmapbt(
        uint64_t                magic,
        void                    *priv);
 
+struct refc_priv {
+       struct xfs_refcount_irec        last_rec;
+       xfs_agblock_t                   nr_blocks;
+       xfs_rgnumber_t                  rgno;
+};
+
+int
+process_rtrefc_reclist(
+       struct xfs_mount        *mp,
+       struct xfs_refcount_rec *rp,
+       int                     numrecs,
+       struct refc_priv        *refc_priv,
+       const char              *name);
+
+int
+scan_rtrefcbt(
+       struct xfs_btree_block  *block,
+       int                     level,
+       int                     type,
+       int                     whichfork,
+       xfs_fsblock_t           bno,
+       xfs_ino_t               ino,
+       xfs_rfsblock_t          *tot,
+       uint64_t                *nex,
+       struct blkmap           **blkmapp,
+       bmap_cursor_t           *bm_cursor,
+       int                     suspect,
+       int                     isroot,
+       int                     check_dups,
+       int                     *dirty,
+       uint64_t                magic,
+       void                    *priv);
+
 #endif /* _XR_SCAN_H */