From 702c90f451622384d6c65897b619f647704b06a9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 20 Nov 2024 16:20:27 -0800 Subject: [PATCH] xfs: support file data forks containing metadata btrees Create a new fork format type for metadata btrees. This fork type requires that the inode is in the metadata directory tree, and only applies to the data fork. The actual type of the metadata btree itself is determined by the di_metatype field. Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_format.h | 6 +++-- fs/xfs/libxfs/xfs_inode_buf.c | 23 ++++++++++++++++--- fs/xfs/libxfs/xfs_inode_fork.c | 19 ++++++++++++++++ fs/xfs/scrub/bmap.c | 1 + fs/xfs/scrub/bmap_repair.c | 1 + fs/xfs/scrub/inode.c | 4 ++++ fs/xfs/scrub/inode_repair.c | 36 +++++++++++++++++++++++++++++ fs/xfs/scrub/rmap_repair.c | 31 ++++++++++++++++--------- fs/xfs/xfs_inode.c | 19 +++++++++++++++- fs/xfs/xfs_inode_item.c | 2 ++ fs/xfs/xfs_inode_item_recover.c | 40 ++++++++++++++++++++++++++++----- fs/xfs/xfs_trace.h | 1 + 12 files changed, 162 insertions(+), 21 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 469fc7afa591b..41ea4283c43cb 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -997,7 +997,8 @@ enum xfs_dinode_fmt { XFS_DINODE_FMT_LOCAL, /* bulk data */ XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ - XFS_DINODE_FMT_UUID /* added long ago, but never used */ + XFS_DINODE_FMT_UUID, /* added long ago, but never used */ + XFS_DINODE_FMT_META_BTREE, /* metadata btree */ }; #define XFS_INODE_FORMAT_STR \ @@ -1005,7 +1006,8 @@ enum xfs_dinode_fmt { { XFS_DINODE_FMT_LOCAL, "local" }, \ { XFS_DINODE_FMT_EXTENTS, "extent" }, \ { XFS_DINODE_FMT_BTREE, "btree" }, \ - { XFS_DINODE_FMT_UUID, "uuid" } + { XFS_DINODE_FMT_UUID, "uuid" }, \ + { XFS_DINODE_FMT_META_BTREE, "meta_btree" } /* * Max values for extnum and aextnum. diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 424861fbf1bd4..1648d72d6ed95 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -441,6 +441,16 @@ xfs_dinode_verify_fork( if (di_nextents > max_extents) return __this_address; break; + case XFS_DINODE_FMT_META_BTREE: + if (!xfs_has_metadir(mp)) + return __this_address; + if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA))) + return __this_address; + switch (be16_to_cpu(dip->di_metatype)) { + default: + return __this_address; + } + break; default: return __this_address; } @@ -460,6 +470,10 @@ xfs_dinode_verify_forkoff( if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) return __this_address; break; + case XFS_DINODE_FMT_META_BTREE: + if (!xfs_has_metadir(mp) || !xfs_has_parent(mp)) + return __this_address; + fallthrough; case XFS_DINODE_FMT_LOCAL: /* fall through ... */ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ case XFS_DINODE_FMT_BTREE: @@ -637,9 +651,6 @@ xfs_dinode_verify( if (mode && nextents + naextents > nblocks) return __this_address; - if (nextents + naextents == 0 && nblocks != 0) - return __this_address; - if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) return __this_address; @@ -743,6 +754,12 @@ xfs_dinode_verify( return fa; } + /* metadata inodes containing btrees always have zero extent count */ + if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_META_BTREE) { + if (nextents + naextents == 0 && nblocks != 0) + return __this_address; + } + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 60853bac289a3..1a782339396dc 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -267,6 +267,12 @@ xfs_iformat_data_fork( return xfs_iformat_extents(ip, dip, XFS_DATA_FORK); case XFS_DINODE_FMT_BTREE: return xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + case XFS_DINODE_FMT_META_BTREE: + switch (ip->i_metatype) { + default: + break; + } + fallthrough; default: xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); @@ -601,6 +607,19 @@ xfs_iflush_fork( } break; + case XFS_DINODE_FMT_META_BTREE: + ASSERT(whichfork == XFS_DATA_FORK); + + if (!(iip->ili_fields & brootflag[whichfork])) + break; + + switch (ip->i_metatype) { + default: + ASSERT(0); + break; + } + break; + default: ASSERT(0); break; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 7e00312225ed1..0d7ad692822d4 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -983,6 +983,7 @@ xchk_bmap( case XFS_DINODE_FMT_UUID: case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_META_BTREE: /* No mappings to check. */ if (whichfork == XFS_COW_FORK) xchk_fblock_set_corrupt(sc, whichfork, 0); diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c index 7c4955482641f..141d36f1da9a7 100644 --- a/fs/xfs/scrub/bmap_repair.c +++ b/fs/xfs/scrub/bmap_repair.c @@ -731,6 +731,7 @@ xrep_bmap_check_inputs( case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_UUID: + case XFS_DINODE_FMT_META_BTREE: return -ECANCELED; case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 25ee66e7649d4..2e911f38deaeb 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -502,6 +502,10 @@ xchk_dinode( if (!S_ISREG(mode) && !S_ISDIR(mode)) xchk_ino_set_corrupt(sc, ino); break; + case XFS_DINODE_FMT_META_BTREE: + if (!S_ISREG(mode)) + xchk_ino_set_corrupt(sc, ino); + break; case XFS_DINODE_FMT_UUID: default: xchk_ino_set_corrupt(sc, ino); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 5a58ddd27bd2f..7faa27472b912 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -888,6 +888,25 @@ xrep_dinode_bad_bmbt_fork( return false; } +/* Check a metadata-btree fork. */ +STATIC bool +xrep_dinode_bad_metabt_fork( + struct xfs_scrub *sc, + struct xfs_dinode *dip, + unsigned int dfork_size, + int whichfork) +{ + if (whichfork != XFS_DATA_FORK) + return true; + + switch (be16_to_cpu(dip->di_metatype)) { + default: + return true; + } + + return false; +} + /* * Check the data fork for things that will fail the ifork verifiers or the * ifork formatters. @@ -968,6 +987,11 @@ xrep_dinode_check_dfork( XFS_DATA_FORK)) return true; break; + case XFS_DINODE_FMT_META_BTREE: + if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size, + XFS_DATA_FORK)) + return true; + break; default: return true; } @@ -1088,6 +1112,11 @@ xrep_dinode_check_afork( XFS_ATTR_FORK)) return true; break; + case XFS_DINODE_FMT_META_BTREE: + if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size, + XFS_ATTR_FORK)) + return true; + break; default: return true; } @@ -1241,6 +1270,13 @@ xrep_dinode_ensure_forkoff( bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); break; + case XFS_DINODE_FMT_META_BTREE: + switch (be16_to_cpu(dip->di_metatype)) { + default: + dfork_min = 0; + break; + } + break; default: dfork_min = 0; break; diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c index a0a227d183d28..2a0b9e3d0fbae 100644 --- a/fs/xfs/scrub/rmap_repair.c +++ b/fs/xfs/scrub/rmap_repair.c @@ -499,6 +499,14 @@ xrep_rmap_scan_iext( return xrep_rmap_stash_accumulated(rf); } +static int +xrep_rmap_scan_meta_btree( + struct xrep_rmap_ifork *rf, + struct xfs_inode *ip) +{ + return -EFSCORRUPTED; /* XXX placeholder */ +} + /* Find all the extents from a given AG in an inode fork. */ STATIC int xrep_rmap_scan_ifork( @@ -512,14 +520,14 @@ xrep_rmap_scan_ifork( .whichfork = whichfork, }; struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); + bool mappings_done; int error = 0; if (!ifp) return 0; - if (ifp->if_format == XFS_DINODE_FMT_BTREE) { - bool mappings_done; - + switch (ifp->if_format) { + case XFS_DINODE_FMT_BTREE: /* * Scan the bmap btree for data device mappings. This includes * the btree blocks themselves, even if this is a realtime @@ -528,15 +536,18 @@ xrep_rmap_scan_ifork( error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done); if (error || mappings_done) return error; - } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) { - return 0; + fallthrough; + case XFS_DINODE_FMT_EXTENTS: + /* Scan incore extent cache if this isn't a realtime file. */ + if (xfs_ifork_is_realtime(ip, whichfork)) + return 0; + + return xrep_rmap_scan_iext(&rf, ifp); + case XFS_DINODE_FMT_META_BTREE: + return xrep_rmap_scan_meta_btree(&rf, ip); } - /* Scan incore extent cache if this isn't a realtime file. */ - if (xfs_ifork_is_realtime(ip, whichfork)) - return 0; - - return xrep_rmap_scan_iext(&rf, ifp); + return 0; } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c8ad2606f928b..c95fe1b1de4e6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2382,7 +2382,16 @@ xfs_iflush( __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto flush_out; } - if (S_ISREG(VFS_I(ip)->i_mode)) { + if (ip->i_df.if_format == XFS_DINODE_FMT_META_BTREE) { + if (!S_ISREG(VFS_I(ip)->i_mode) || + !(ip->i_diflags2 & XFS_DIFLAG2_METADATA)) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: Bad %s meta btree inode %Lu, ptr "PTR_FMT, + __func__, xfs_metafile_type_str(ip->i_metatype), + ip->i_ino, ip); + goto flush_out; + } + } else if (S_ISREG(VFS_I(ip)->i_mode)) { if (XFS_TEST_ERROR( ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && ip->i_df.if_format != XFS_DINODE_FMT_BTREE, @@ -2422,6 +2431,14 @@ xfs_iflush( goto flush_out; } + if (xfs_inode_has_attr_fork(ip) && + ip->i_af.if_format == XFS_DINODE_FMT_META_BTREE) { + xfs_alert_tag(mp, XFS_PTAG_IFLUSH, + "%s: meta btree in inode %Lu attr fork, ptr "PTR_FMT, + __func__, ip->i_ino, ip); + goto flush_out; + } + /* * Inode item log recovery for v2 inodes are dependent on the flushiter * count for correct sequencing. We bump the flush iteration count so diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 912f0b1bc3cb7..a174f64b8bb25 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -242,6 +242,7 @@ xfs_inode_item_data_fork_size( } break; case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_META_BTREE: if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { *nbytes += ip->i_df.if_broot_bytes; @@ -362,6 +363,7 @@ xfs_inode_item_format_data_fork( } break; case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_META_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index e70d2611456bc..6e9b3bfc718c0 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -266,6 +266,35 @@ xlog_dinode_verify_extent_counts( return 0; } +static inline int +xlog_recover_inode_dbroot( + struct xfs_mount *mp, + void *src, + unsigned int len, + struct xfs_dinode *dip) +{ + void *dfork = XFS_DFORK_DPTR(dip); + unsigned int dsize = XFS_DFORK_DSIZE(dip, mp); + + switch (dip->di_format) { + case XFS_DINODE_FMT_BTREE: + xfs_bmbt_to_bmdr(mp, src, len, dfork, dsize); + break; + case XFS_DINODE_FMT_META_BTREE: + switch (be16_to_cpu(dip->di_metatype)) { + default: + ASSERT(0); + return -EFSCORRUPTED; + } + break; + default: + ASSERT(0); + return -EFSCORRUPTED; + } + + return 0; +} + STATIC int xlog_recover_inode_commit_pass2( struct xlog *log, @@ -393,8 +422,9 @@ xlog_recover_inode_commit_pass2( if (unlikely(S_ISREG(ldip->di_mode))) { - if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && - (ldip->di_format != XFS_DINODE_FMT_BTREE)) { + if (ldip->di_format != XFS_DINODE_FMT_EXTENTS && + ldip->di_format != XFS_DINODE_FMT_BTREE && + ldip->di_format != XFS_DINODE_FMT_META_BTREE) { XFS_CORRUPTION_ERROR( "Bad log dinode data fork format for regular file", XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip)); @@ -475,9 +505,9 @@ xlog_recover_inode_commit_pass2( break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, - (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip), - XFS_DFORK_DSIZE(dip, mp)); + error = xlog_recover_inode_dbroot(mp, src, len, dip); + if (error) + goto out_release; break; default: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 8b7bb1f5ae3c6..a098935163b7c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2299,6 +2299,7 @@ TRACE_DEFINE_ENUM(XFS_DINODE_FMT_LOCAL); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_EXTENTS); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_BTREE); TRACE_DEFINE_ENUM(XFS_DINODE_FMT_UUID); +TRACE_DEFINE_ENUM(XFS_DINODE_FMT_META_BTREE); DECLARE_EVENT_CLASS(xfs_swap_extent_class, TP_PROTO(struct xfs_inode *ip, int which), -- 2.50.1