]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: define the zoned on-disk format
authorChristoph Hellwig <hch@lst.de>
Sat, 30 Nov 2024 03:48:33 +0000 (04:48 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Feb 2025 04:49:09 +0000 (05:49 +0100)
Zone file systems reuse the basic RT group enabled XFS file system
structure to support a mode where each RT group is always written from
start to end and then reset for reuse (after moving out any remaining
data).  There are few minor but important changes, which are indicated
by a new incompat flag:

1) there are no bitmap and summary inodes, thus the
   /rtgroups/{rgno}.{bitmap,summary} metadir files do not exist and the
   sb_rbmblocks superblock field must be cleared to zero.

2) there is a new superblock field that specifies the start of an
   internal RT section.  This allows supporting SMR HDDs that have random
   writable space at the beginning which is used for the XFS data device
   (which really is the metadata device for this configuration), directly
   followed by a RT device on the same block device.  While something
   similar could be achieved using dm-linear just having a single device
   directly consumed by XFS makes handling the file systems a lot easier.

3) Another superblock field that tracks the amount of reserved space (or
   overprovisioning) that is never used for user capacity, but allows GC
   to run more smoothly.

4) an overlay of the cowextsize field for the rtrmap inode so that we
   can persistently track the total amount of rtblocks currently used in
   a RT group.  There is no data structure other than the rmap that
   tracks used space in an RT group, and this counter is used to decide
   when a RT group has been entirely emptied, and to select one that
   is relatively empty if garbage collection needs to be performed.
   While this counter could be tracked entirely in memory and rebuilt
   from the rmap at mount time, that would lead to very long mount times
   with the large number of RT groups implied by the number of hardware
   zones especially on SMR hard drives with 256MB zone sizes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
23 files changed:
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_util.c
fs/xfs/libxfs/xfs_log_format.h
fs/xfs/libxfs/xfs_ondisk.h
fs/xfs/libxfs/xfs_rtbitmap.c
fs/xfs/libxfs/xfs_rtgroup.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/scrub/agheader.c
fs/xfs/scrub/inode.c
fs/xfs/scrub/inode_repair.c
fs/xfs/scrub/scrub.c
fs/xfs/xfs_fsmap.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inode_item_recover.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_message.c
fs/xfs/xfs_message.h
fs/xfs/xfs_mount.h
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_super.c

index b1007fb661ba73822ea39b801937eb9823f1cf40..e2cf3af120a30dec3ef34f4d548825abe861e6c1 100644 (file)
@@ -178,9 +178,10 @@ typedef struct xfs_sb {
 
        xfs_rgnumber_t  sb_rgcount;     /* number of realtime groups */
        xfs_rtxlen_t    sb_rgextents;   /* size of a realtime group in rtx */
-
        uint8_t         sb_rgblklog;    /* rt group number shift */
        uint8_t         sb_pad[7];      /* zeroes */
+       xfs_fsblock_t   sb_rtstart;     /* start of internal RT section (FSB) */
+       xfs_filblks_t   sb_rtreserved;  /* reserved (zoned) RT blocks */
 
        /* must be padded to 64 bit alignment */
 } xfs_sb_t;
@@ -270,9 +271,10 @@ struct xfs_dsb {
        __be64          sb_metadirino;  /* metadata directory tree root */
        __be32          sb_rgcount;     /* # of realtime groups */
        __be32          sb_rgextents;   /* size of rtgroup in rtx */
-
        __u8            sb_rgblklog;    /* rt group number shift */
        __u8            sb_pad[7];      /* zeroes */
+       __be64          sb_rtstart;     /* start of internal RT section (FSB) */
+       __be64          sb_rtreserved;  /* reserved (zoned) RT blocks */
 
        /*
         * The size of this structure must be padded to 64 bit alignment.
@@ -395,6 +397,8 @@ xfs_sb_has_ro_compat_feature(
 #define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6)  /* exchangerange supported */
 #define XFS_SB_FEAT_INCOMPAT_PARENT    (1 << 7)  /* parent pointers */
 #define XFS_SB_FEAT_INCOMPAT_METADIR   (1 << 8)  /* metadata dir tree */
+#define XFS_SB_FEAT_INCOMPAT_ZONED     (1 << 9)  /* zoned RT allocator */
+
 #define XFS_SB_FEAT_INCOMPAT_ALL \
                (XFS_SB_FEAT_INCOMPAT_FTYPE | \
                 XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -952,7 +956,12 @@ struct xfs_dinode {
        __be64          di_changecount; /* number of attribute changes */
        __be64          di_lsn;         /* flush sequence */
        __be64          di_flags2;      /* more random flags */
-       __be32          di_cowextsize;  /* basic cow extent size for file */
+       union {
+               /* basic cow extent size for (regular) file */
+               __be32          di_cowextsize;
+               /* used blocks in RTG for (zoned) rtrmap inode */
+               __be32          di_used_blocks;
+       };
        __u8            di_pad2[12];    /* more padding for future expansion */
 
        /* fields only written to during inode creation */
index f24fa628fecf1e1b01cb21c91170b40d85437290..992e6d33770940644c04f25e47554906e7456eda 100644 (file)
@@ -252,7 +252,10 @@ xfs_inode_from_disk(
                                           be64_to_cpu(from->di_changecount));
                ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
                ip->i_diflags2 = be64_to_cpu(from->di_flags2);
+               /* also covers the di_used_blocks union arm: */
                ip->i_cowextsize = be32_to_cpu(from->di_cowextsize);
+               BUILD_BUG_ON(sizeof(from->di_cowextsize) !=
+                            sizeof(from->di_used_blocks));
        }
 
        error = xfs_iformat_data_fork(ip, from);
@@ -349,6 +352,7 @@ xfs_inode_to_disk(
                to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
                to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime);
                to->di_flags2 = cpu_to_be64(ip->i_diflags2);
+               /* also covers the di_used_blocks union arm: */
                to->di_cowextsize = cpu_to_be32(ip->i_cowextsize);
                to->di_ino = cpu_to_be64(ip->i_ino);
                to->di_lsn = cpu_to_be64(lsn);
@@ -752,11 +756,18 @@ xfs_dinode_verify(
            !xfs_has_rtreflink(mp))
                return __this_address;
 
-       /* COW extent size hint validation */
-       fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
-                       mode, flags, flags2);
-       if (fa)
-               return fa;
+       if (xfs_has_zoned(mp) &&
+           dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)) {
+               if (be32_to_cpu(dip->di_used_blocks) > mp->m_sb.sb_rgextents)
+                       return __this_address;
+       } else {
+               /* COW extent size hint validation */
+               fa = xfs_inode_validate_cowextsize(mp,
+                               be32_to_cpu(dip->di_cowextsize),
+                               mode, flags, flags2);
+               if (fa)
+                       return fa;
+       }
 
        /* bigtime iflag can only happen on bigtime filesystems */
        if (xfs_dinode_has_bigtime(dip) &&
index deb0b7c00a1ffa90f1a4bbd8ded01d08898bbe14..48fe49a5f050f3463dc385677655e2f38a6344fe 100644 (file)
@@ -322,6 +322,7 @@ xfs_inode_init(
 
        if (xfs_has_v3inodes(mp)) {
                inode_set_iversion(inode, 1);
+               /* also covers the di_used_blocks union arm: */
                ip->i_cowextsize = 0;
                times |= XFS_ICHGTIME_CREATE;
        }
index a472ac2e45d0d86fab3b98fd0d3d31675878689a..0d637c276db053dce1dedfb5aa8d07ddb484ed5c 100644 (file)
@@ -475,7 +475,12 @@ struct xfs_log_dinode {
        xfs_lsn_t       di_lsn;
 
        uint64_t        di_flags2;      /* more random flags */
-       uint32_t        di_cowextsize;  /* basic cow extent size for file */
+       union {
+               /* basic cow extent size for (regular) file */
+               uint32_t                di_cowextsize;
+               /* used blocks in RTG for (zoned) rtrmap inode */
+               uint32_t                di_used_blocks;
+       };
        uint8_t         di_pad2[12];    /* more padding for future expansion */
 
        /* fields only written to during inode creation */
index a85ecddaa48eed37ecbe796fb055ff7c6f5332f8..5ed44fdf7491056a4011596151285cd00be29c91 100644 (file)
@@ -233,8 +233,8 @@ xfs_check_ondisk_structs(void)
                        16299260424LL);
 
        /* superblock field checks we got from xfs/122 */
-       XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,           288);
-       XFS_CHECK_STRUCT_SIZE(struct xfs_sb,            288);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_dsb,           304);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_sb,            304);
        XFS_CHECK_SB_OFFSET(sb_magicnum,                0);
        XFS_CHECK_SB_OFFSET(sb_blocksize,               4);
        XFS_CHECK_SB_OFFSET(sb_dblocks,                 8);
@@ -295,6 +295,8 @@ xfs_check_ondisk_structs(void)
        XFS_CHECK_SB_OFFSET(sb_rgextents,               276);
        XFS_CHECK_SB_OFFSET(sb_rgblklog,                280);
        XFS_CHECK_SB_OFFSET(sb_pad,                     281);
+       XFS_CHECK_SB_OFFSET(sb_rtstart,                 288);
+       XFS_CHECK_SB_OFFSET(sb_rtreserved,              296);
 }
 
 #endif /* __XFS_ONDISK_H */
index 770adf60dd739226f8486bba3301e7aed92509e7..5057536e586ca44071a74eb546a64ef687245852 100644 (file)
@@ -1123,6 +1123,7 @@ xfs_rtfree_blocks(
        xfs_extlen_t            mod;
        int                     error;
 
+       ASSERT(!xfs_has_zoned(mp));
        ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
 
        mod = xfs_blen_to_rtxoff(mp, rtlen);
@@ -1174,6 +1175,9 @@ xfs_rtalloc_query_range(
 
        end = min(end, rtg->rtg_extents - 1);
 
+       if (xfs_has_zoned(mp))
+               return -EINVAL;
+
        /* Iterate the bitmap, looking for discrepancies. */
        while (start <= end) {
                struct xfs_rtalloc_rec  rec;
@@ -1268,6 +1272,8 @@ xfs_rtbitmap_blockcount_len(
        struct xfs_mount        *mp,
        xfs_rtbxlen_t           rtextents)
 {
+       if (xfs_has_zoned(mp))
+               return 0;
        return howmany_64(rtextents, xfs_rtbitmap_rtx_per_rbmblock(mp));
 }
 
@@ -1308,6 +1314,11 @@ xfs_rtsummary_blockcount(
        xfs_rtbxlen_t           rextents = xfs_rtbitmap_bitcount(mp);
        unsigned long long      rsumwords;
 
+       if (xfs_has_zoned(mp)) {
+               *rsumlevels = 0;
+               return 0;
+       }
+
        *rsumlevels = xfs_compute_rextslog(rextents) + 1;
        rsumwords = xfs_rtbitmap_blockcount_len(mp, rextents) * (*rsumlevels);
        return howmany_64(rsumwords, mp->m_blockwsize);
index 97aad896714912d1681dcea52592ba11cc0c1ab6..9186c58e83d50c0ab80f1e850d2114e0715075e5 100644 (file)
@@ -194,15 +194,17 @@ xfs_rtgroup_lock(
        ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
               !(rtglock_flags & XFS_RTGLOCK_BITMAP));
 
-       if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
-               /*
-                * Lock both realtime free space metadata inodes for a freespace
-                * update.
-                */
-               xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
-               xfs_ilock(rtg_summary(rtg), XFS_ILOCK_EXCL);
-       } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
-               xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
+       if (!xfs_has_zoned(rtg_mount(rtg))) {
+               if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+                       /*
+                        * Lock both realtime free space metadata inodes for a
+                        * freespace update.
+                        */
+                       xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
+                       xfs_ilock(rtg_summary(rtg), XFS_ILOCK_EXCL);
+               } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+                       xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
+               }
        }
 
        if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg))
@@ -228,11 +230,13 @@ xfs_rtgroup_unlock(
        if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg))
                xfs_iunlock(rtg_rmap(rtg), XFS_ILOCK_EXCL);
 
-       if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
-               xfs_iunlock(rtg_summary(rtg), XFS_ILOCK_EXCL);
-               xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
-       } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
-               xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
+       if (!xfs_has_zoned(rtg_mount(rtg))) {
+               if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+                       xfs_iunlock(rtg_summary(rtg), XFS_ILOCK_EXCL);
+                       xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
+               } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+                       xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
+               }
        }
 }
 
@@ -249,7 +253,8 @@ xfs_rtgroup_trans_join(
        ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
        ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED));
 
-       if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+       if (!xfs_has_zoned(rtg_mount(rtg)) &&
+           (rtglock_flags & XFS_RTGLOCK_BITMAP)) {
                xfs_trans_ijoin(tp, rtg_bitmap(rtg), XFS_ILOCK_EXCL);
                xfs_trans_ijoin(tp, rtg_summary(rtg), XFS_ILOCK_EXCL);
        }
@@ -354,6 +359,7 @@ static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
                .sick           = XFS_SICK_RG_BITMAP,
                .fmt_mask       = (1U << XFS_DINODE_FMT_EXTENTS) |
                                  (1U << XFS_DINODE_FMT_BTREE),
+               .enabled        = xfs_has_nonzoned,
                .create         = xfs_rtbitmap_create,
        },
        [XFS_RTGI_SUMMARY] = {
@@ -362,6 +368,7 @@ static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
                .sick           = XFS_SICK_RG_SUMMARY,
                .fmt_mask       = (1U << XFS_DINODE_FMT_EXTENTS) |
                                  (1U << XFS_DINODE_FMT_BTREE),
+               .enabled        = xfs_has_nonzoned,
                .create         = xfs_rtsummary_create,
        },
        [XFS_RTGI_RMAP] = {
index 80e383f641551e5917b019518a34f0967d5fc948..8f57b14995c87913f8368f39cd65c3ab412907b7 100644 (file)
@@ -30,6 +30,7 @@
 #include "xfs_rtgroup.h"
 #include "xfs_rtrmap_btree.h"
 #include "xfs_rtrefcount_btree.h"
+#include "xfs_rtbitmap.h"
 
 /*
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -185,6 +186,8 @@ xfs_sb_version_to_features(
                features |= XFS_FEAT_PARENT;
        if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
                features |= XFS_FEAT_METADIR;
+       if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)
+               features |= XFS_FEAT_ZONED;
 
        return features;
 }
@@ -266,6 +269,9 @@ static uint64_t
 xfs_expected_rbmblocks(
        struct xfs_sb           *sbp)
 {
+       if (xfs_sb_is_v5(sbp) &&
+           (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED))
+               return 0;
        return howmany_64(xfs_extents_per_rbm(sbp),
                          NBBY * xfs_rtbmblock_size(sbp));
 }
@@ -275,9 +281,15 @@ bool
 xfs_validate_rt_geometry(
        struct xfs_sb           *sbp)
 {
-       if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
-           sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)
-               return false;
+       if (xfs_sb_is_v5(sbp) &&
+           (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) {
+               if (sbp->sb_rextsize != 1)
+                       return false;
+       } else {
+               if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
+                   sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)
+                       return false;
+       }
 
        if (sbp->sb_rblocks == 0) {
                if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
@@ -435,6 +447,34 @@ xfs_validate_sb_rtgroups(
        return 0;
 }
 
+static int
+xfs_validate_sb_zoned(
+       struct xfs_mount        *mp,
+       struct xfs_sb           *sbp)
+{
+       if (sbp->sb_frextents != 0) {
+               xfs_warn(mp,
+"sb_frextents must be zero for zoned file systems.");
+               return -EINVAL;
+       }
+
+       if (sbp->sb_rtstart && sbp->sb_rtstart < sbp->sb_dblocks) {
+               xfs_warn(mp,
+"sb_rtstart (%lld) overlaps sb_dblocks (%lld).",
+                       sbp->sb_rtstart, sbp->sb_dblocks);
+               return -EINVAL;
+       }
+
+       if (sbp->sb_rtreserved && sbp->sb_rtreserved >= sbp->sb_rblocks) {
+               xfs_warn(mp,
+"sb_rtreserved (%lld) larger than sb_rblocks (%lld).",
+                       sbp->sb_rtreserved, sbp->sb_rblocks);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 /* Check the validity of the SB. */
 STATIC int
 xfs_validate_sb_common(
@@ -523,6 +563,11 @@ xfs_validate_sb_common(
                        if (error)
                                return error;
                }
+               if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) {
+                       error = xfs_validate_sb_zoned(mp, sbp);
+                       if (error)
+                               return error;
+               }
        } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
                                XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
                        xfs_notice(mp,
@@ -835,6 +880,14 @@ __xfs_sb_from_disk(
                to->sb_rgcount = 1;
                to->sb_rgextents = 0;
        }
+
+       if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) {
+               to->sb_rtstart = be64_to_cpu(from->sb_rtstart);
+               to->sb_rtreserved = be64_to_cpu(from->sb_rtreserved);
+       } else {
+               to->sb_rtstart = 0;
+               to->sb_rtreserved = 0;
+       }
 }
 
 void
@@ -1001,6 +1054,11 @@ xfs_sb_to_disk(
                to->sb_rbmino = cpu_to_be64(0);
                to->sb_rsumino = cpu_to_be64(0);
        }
+
+       if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) {
+               to->sb_rtstart = cpu_to_be64(from->sb_rtstart);
+               to->sb_rtreserved = cpu_to_be64(from->sb_rtreserved);
+       }
 }
 
 /*
index 9f8c312dfd3c8264327a0032491add617e8d9475..303374df44bd2ca62178c79bd7c1f51cf917dc74 100644 (file)
@@ -69,6 +69,8 @@ STATIC size_t
 xchk_superblock_ondisk_size(
        struct xfs_mount        *mp)
 {
+       if (xfs_has_zoned(mp))
+               return offsetofend(struct xfs_dsb, sb_rtreserved);
        if (xfs_has_metadir(mp))
                return offsetofend(struct xfs_dsb, sb_pad);
        if (xfs_has_metauuid(mp))
index db6edd5a5fe5d8f47fc93c28e4efc75443a0f5c1..bb3f475b63532e819697fb32185318e3379bde94 100644 (file)
@@ -273,6 +273,13 @@ xchk_inode_cowextsize(
        xfs_failaddr_t          fa;
        uint32_t                value = be32_to_cpu(dip->di_cowextsize);
 
+       /*
+        * The used block counter for rtrmap is checked and repaired elsewhere.
+        */
+       if (xfs_has_zoned(sc->mp) &&
+           dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP))
+               return;
+
        fa = xfs_inode_validate_cowextsize(sc->mp, value, mode, flags, flags2);
        if (fa)
                xchk_ino_set_corrupt(sc, ino);
index 2f641b6d663eb2b8d231a80265879c08df6be7a5..c8d17dd4fc3246bdf806f4d3c3507f9ffbf8a346 100644 (file)
@@ -710,7 +710,9 @@ xrep_dinode_extsize_hints(
                                              XFS_DIFLAG_EXTSZINHERIT);
        }
 
-       if (dip->di_version < 3)
+       if (dip->di_version < 3 ||
+           (xfs_has_zoned(sc->mp) &&
+            dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)))
                return;
 
        fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
index 7567dd5cad14f4734fbfbcfc0e0278b46fc29525..1a05c27ba47197f2e39df3c3879e8b378beb2f5d 100644 (file)
@@ -387,12 +387,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
        },
        [XFS_SCRUB_TYPE_RTBITMAP] = {   /* realtime bitmap */
                .type   = ST_RTGROUP,
+               .has    = xfs_has_nonzoned,
                .setup  = xchk_setup_rtbitmap,
                .scrub  = xchk_rtbitmap,
                .repair = xrep_rtbitmap,
        },
        [XFS_SCRUB_TYPE_RTSUM] = {      /* realtime summary */
                .type   = ST_RTGROUP,
+               .has    = xfs_has_nonzoned,
                .setup  = xchk_setup_rtsummary,
                .scrub  = xchk_rtsummary,
                .repair = xrep_rtsummary,
index 1dbd2d75f7ae3e3b59aeec1bd79a6b91ae87d048..917d4d0e51b3ac23b1f955c6dd36bdda269f3ffd 100644 (file)
@@ -1138,7 +1138,11 @@ xfs_getfsmap(
                handlers[1].fn = xfs_getfsmap_logdev;
        }
 #ifdef CONFIG_XFS_RT
-       if (mp->m_rtdev_targp) {
+       /*
+        * For zoned file systems there is no rtbitmap, so only support fsmap
+        * if the callers is privileged enough to use the full rmap version.
+        */
+       if (mp->m_rtdev_targp && (use_rmap || !xfs_has_zoned(mp))) {
                handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
                handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
                if (use_rmap)
index c95fe1b1de4e6f3dc2651cd68804e1c14ff49447..4624d40c664c3eaab5e362793d380d91bc01dd6c 100644 (file)
@@ -3071,5 +3071,6 @@ bool
 xfs_is_always_cow_inode(
        const struct xfs_inode  *ip)
 {
-       return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
+       return xfs_is_zoned_inode(ip) ||
+               (ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount));
 }
index 1648dc5a806882278d7eec1cc7451cc9818332fa..4bb7a99e0dc4c0cd89bda2dd9fad120583292557 100644 (file)
@@ -59,8 +59,13 @@ typedef struct xfs_inode {
        xfs_rfsblock_t          i_nblocks;      /* # of direct & btree blocks */
        prid_t                  i_projid;       /* owner's project id */
        xfs_extlen_t            i_extsize;      /* basic/minimum extent size */
-       /* cowextsize is only used for v3 inodes, flushiter for v1/2 */
+       /*
+        * i_used_blocks is used for zoned rtrmap inodes,
+        * i_cowextsize is used for other v3 inodes,
+        * i_flushiter for v1/2 inodes
+        */
        union {
+               uint32_t        i_used_blocks;  /* used blocks in RTG */
                xfs_extlen_t    i_cowextsize;   /* basic cow extent size */
                uint16_t        i_flushiter;    /* incremented on flush */
        };
@@ -299,6 +304,11 @@ static inline bool xfs_is_internal_inode(const struct xfs_inode *ip)
               xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
 }
 
+static inline bool xfs_is_zoned_inode(const struct xfs_inode *ip)
+{
+       return xfs_has_zoned(ip->i_mount) && XFS_IS_REALTIME_INODE(ip);
+}
+
 bool xfs_is_always_cow_inode(const struct xfs_inode *ip);
 
 static inline bool xfs_is_cow_inode(const struct xfs_inode *ip)
index 35803fcf0bebddc86489c570df219c8b900c0e11..40fc1bf900af90ac6425c036955c41b1b8285204 100644 (file)
@@ -596,6 +596,7 @@ xfs_inode_to_log_dinode(
                to->di_changecount = inode_peek_iversion(inode);
                to->di_crtime = xfs_inode_to_log_dinode_ts(ip, ip->i_crtime);
                to->di_flags2 = ip->i_diflags2;
+               /* also covers the di_used_blocks union arm: */
                to->di_cowextsize = ip->i_cowextsize;
                to->di_ino = ip->i_ino;
                to->di_lsn = lsn;
index f3bfb814378c066b26f16e3803de13515b750fd3..7205fd14f6b38c3055b0f2ef5780b8180c01374e 100644 (file)
@@ -203,6 +203,7 @@ xfs_log_dinode_to_disk(
                to->di_crtime = xfs_log_dinode_to_disk_ts(from,
                                                          from->di_crtime);
                to->di_flags2 = cpu_to_be64(from->di_flags2);
+               /* also covers the di_used_blocks union arm: */
                to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
                to->di_ino = cpu_to_be64(from->di_ino);
                to->di_lsn = cpu_to_be64(lsn);
index f3f4b5c328c3dfd95919beebb5f3622f6418df14..aa1db0dc1d98beaf6aa979865816455710183005 100644 (file)
@@ -1216,6 +1216,7 @@ retry:
 
        fdblocks = indlen;
        if (XFS_IS_REALTIME_INODE(ip)) {
+               ASSERT(!xfs_is_zoned_inode(ip));
                error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
                if (error)
                        goto out_unreserve_quota;
index 6ed485ff27568c59fbe313c321a0c8fccf22703c..15d410d16bb27c62e338e86d6c87d4bc2229ee86 100644 (file)
@@ -173,6 +173,10 @@ xfs_warn_experimental(
                        .opstate        = XFS_OPSTATE_WARNED_METADIR,
                        .name           = "metadata directory tree",
                },
+               [XFS_EXPERIMENTAL_ZONED] = {
+                       .opstate        = XFS_OPSTATE_WARNED_ZONED,
+                       .name           = "zoned RT device",
+               },
        };
        ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX);
        BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX);
index 7fb36ced9df7474ec749c3104fe30bab342bad74..a92a4d09c8e9fa0b129400bcbfda3e10afcfff0b 100644 (file)
@@ -99,6 +99,7 @@ enum xfs_experimental_feat {
        XFS_EXPERIMENTAL_EXCHRANGE,
        XFS_EXPERIMENTAL_PPTR,
        XFS_EXPERIMENTAL_METADIR,
+       XFS_EXPERIMENTAL_ZONED,
 
        XFS_EXPERIMENTAL_MAX,
 };
index 9bfb5e08715d9349696edc69922087f063528632..5a1184ff1995622a6e95296d979303da54d01b29 100644 (file)
@@ -342,6 +342,7 @@ typedef struct xfs_mount {
 #define XFS_FEAT_NREXT64       (1ULL << 26)    /* large extent counters */
 #define XFS_FEAT_EXCHANGE_RANGE        (1ULL << 27)    /* exchange range */
 #define XFS_FEAT_METADIR       (1ULL << 28)    /* metadata directory tree */
+#define XFS_FEAT_ZONED         (1ULL << 29)    /* zoned RT device */
 
 /* Mount features */
 #define XFS_FEAT_NOATTR2       (1ULL << 48)    /* disable attr2 creation */
@@ -398,6 +399,7 @@ __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
 __XFS_HAS_FEAT(large_extent_counts, NREXT64)
 __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE)
 __XFS_HAS_FEAT(metadir, METADIR)
+__XFS_HAS_FEAT(zoned, ZONED)
 
 static inline bool xfs_has_rtgroups(const struct xfs_mount *mp)
 {
@@ -408,7 +410,9 @@ static inline bool xfs_has_rtgroups(const struct xfs_mount *mp)
 static inline bool xfs_has_rtsb(const struct xfs_mount *mp)
 {
        /* all rtgroups filesystems with an rt section have an rtsb */
-       return xfs_has_rtgroups(mp) && xfs_has_realtime(mp);
+       return xfs_has_rtgroups(mp) &&
+               xfs_has_realtime(mp) &&
+               !xfs_has_zoned(mp);
 }
 
 static inline bool xfs_has_rtrmapbt(const struct xfs_mount *mp)
@@ -423,6 +427,11 @@ static inline bool xfs_has_rtreflink(const struct xfs_mount *mp)
               xfs_has_reflink(mp);
 }
 
+static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
+{
+       return !xfs_has_zoned(mp);
+}
+
 /*
  * Some features are always on for v5 file systems, allow the compiler to
  * eliminiate dead code when building without v4 support.
@@ -526,6 +535,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
 #define XFS_OPSTATE_WARNED_METADIR     17
 /* Filesystem should use qflags to determine quotaon status */
 #define XFS_OPSTATE_RESUMING_QUOTAON   18
+/* Kernel has logged a warning about zoned RT device being used on this fs. */
+#define XFS_OPSTATE_WARNED_ZONED       19
 
 #define __XFS_IS_OPSTATE(name, NAME) \
 static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
index f5a0dbc46a14349c30369c7ec9ab5412157191f6..c3a8efc7f09ba120d09e4480900af067c433e31b 100644 (file)
@@ -2056,6 +2056,8 @@ xfs_bmap_rtalloc(
                ap->datatype & XFS_ALLOC_INITIAL_USER_DATA;
        int                     error;
 
+       ASSERT(!xfs_has_zoned(ap->tp->t_mountp));
+
 retry:
        error = xfs_rtallocate_align(ap, &ralen, &raminlen, &prod, &noalign);
        if (error)
index 4414c8542144f23359ee4efa09057e2087716214..8a4b94dab6dd4da33164507b9161d7fff5538cdd 100644 (file)
@@ -1787,8 +1787,17 @@ xfs_fs_fill_super(
                mp->m_features &= ~XFS_FEAT_DISCARD;
        }
 
-       if (xfs_has_metadir(mp))
+       if (xfs_has_zoned(mp)) {
+               if (!xfs_has_metadir(mp)) {
+                       xfs_alert(mp,
+               "metadir feature required for zoned realtime devices.");
+                       error = -EINVAL;
+                       goto out_filestream_unmount;
+               }
+               xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
+       } else if (xfs_has_metadir(mp)) {
                xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
+       }
 
        if (xfs_has_reflink(mp)) {
                if (xfs_has_realtime(mp) &&