]> www.infradead.org Git - users/hch/xfsprogs.git/commitdiff
patch xfsprogs-rmap-btree-single-owner-2 rmap-btree
authorDave Chinner <david@fromorbit.com>
Wed, 3 Jun 2015 02:13:34 +0000 (12:13 +1000)
committerDave Chinner <david@fromorbit.com>
Wed, 3 Jun 2015 02:13:34 +0000 (12:13 +1000)
31 files changed:
db/agf.c
db/btblock.c
db/btblock.h
db/field.c
db/field.h
db/type.c
db/type.h
include/Makefile
include/libxfs.h
include/xfs_mount.h
include/xfs_trace.h
libxfs/Makefile
libxfs/xfs_alloc.c
libxfs/xfs_alloc.h
libxfs/xfs_bmap.c
libxfs/xfs_bmap_btree.c
libxfs/xfs_btree.h
libxfs/xfs_format.h
libxfs/xfs_ialloc.c
libxfs/xfs_ialloc_btree.c
libxfs/xfs_rmap.c [new file with mode: 0644]
libxfs/xfs_rmap_btree.c [new file with mode: 0644]
libxfs/xfs_rmap_btree.h [new file with mode: 0644]
libxfs/xfs_sb.c
libxfs/xfs_shared.h
libxfs/xfs_types.h
mkfs/xfs_mkfs.c
repair/dinode.c
repair/incore.h
repair/scan.c
repair/xfs_repair.c

index d9a07cafb8f2442b512b5f033afbbdda4ebd514f..f0121abf4a7bc1213b3e1af32a0ab00f29133f33 100644 (file)
--- a/db/agf.c
+++ b/db/agf.c
@@ -55,6 +55,9 @@ const field_t agf_flds[] = {
        { "cntroot", FLDT_AGBLOCK,
          OI(OFF(roots) + XFS_BTNUM_CNT * SZ(roots[XFS_BTNUM_CNT])), C1, 0,
          TYP_CNTBT },
+       { "rmaproot", FLDT_AGBLOCK,
+         OI(OFF(roots) + XFS_BTNUM_RMAP * SZ(roots[XFS_BTNUM_RMAP])), C1, 0,
+         TYP_RMAPBT },
        { "levels", FLDT_UINT32D, OI(OFF(levels)), CI(XFS_BTNUM_AGF),
          FLD_ARRAY|FLD_SKIPALL, TYP_NONE },
        { "bnolevel", FLDT_UINT32D,
@@ -63,6 +66,9 @@ const field_t agf_flds[] = {
        { "cntlevel", FLDT_UINT32D,
          OI(OFF(levels) + XFS_BTNUM_CNT * SZ(levels[XFS_BTNUM_CNT])), C1, 0,
          TYP_NONE },
+       { "rmaplevel", FLDT_UINT32D,
+         OI(OFF(levels) + XFS_BTNUM_RMAP * SZ(levels[XFS_BTNUM_RMAP])), C1, 0,
+         TYP_NONE },
        { "flfirst", FLDT_UINT32D, OI(OFF(flfirst)), C1, 0, TYP_NONE },
        { "fllast", FLDT_UINT32D, OI(OFF(fllast)), C1, 0, TYP_NONE },
        { "flcount", FLDT_UINT32D, OI(OFF(flcount)), C1, 0, TYP_NONE },
index cdb8b1df2b6a7b33fc722f5e23dd29abb75eb080..707bac8a50055c8eabffc60a68f2f6690ea2cca1 100644 (file)
@@ -96,6 +96,12 @@ struct xfs_db_btree {
                sizeof(xfs_inobt_rec_t),
                sizeof(__be32),
        },
+       {       XFS_RMAP_CRC_MAGIC,
+               XFS_BTREE_SBLOCK_CRC_LEN,
+               sizeof(struct xfs_rmap_key),
+               sizeof(struct xfs_rmap_rec),
+               sizeof(__be32),
+       },
        {       0,
        },
 };
@@ -571,3 +577,47 @@ const field_t      cntbt_rec_flds[] = {
        { NULL }
 };
 #undef ROFF
+
+/* RMAP btree blocks */
+const field_t  rmapbt_crc_hfld[] = {
+       { "", FLDT_RMAPBT_CRC, OI(0), C1, 0, TYP_NONE },
+       { NULL }
+};
+
+#define        OFF(f)  bitize(offsetof(struct xfs_btree_block, bb_ ## f))
+const field_t  rmapbt_crc_flds[] = {
+       { "magic", FLDT_UINT32X, OI(OFF(magic)), C1, 0, TYP_NONE },
+       { "level", FLDT_UINT16D, OI(OFF(level)), C1, 0, TYP_NONE },
+       { "numrecs", FLDT_UINT16D, OI(OFF(numrecs)), C1, 0, TYP_NONE },
+       { "leftsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_leftsib)), C1, 0, TYP_RMAPBT },
+       { "rightsib", FLDT_AGBLOCK, OI(OFF(u.s.bb_rightsib)), C1, 0, TYP_RMAPBT },
+       { "bno", FLDT_DFSBNO, OI(OFF(u.s.bb_blkno)), C1, 0, TYP_CNTBT },
+       { "lsn", FLDT_UINT64X, OI(OFF(u.s.bb_lsn)), C1, 0, TYP_NONE },
+       { "uuid", FLDT_UUID, OI(OFF(u.s.bb_uuid)), C1, 0, TYP_NONE },
+       { "owner", FLDT_AGNUMBER, OI(OFF(u.s.bb_owner)), C1, 0, TYP_NONE },
+       { "crc", FLDT_CRC, OI(OFF(u.s.bb_crc)), C1, 0, TYP_NONE },
+       { "recs", FLDT_RMAPBTREC, btblock_rec_offset, btblock_rec_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "keys", FLDT_RMAPBTKEY, btblock_key_offset, btblock_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_NONE },
+       { "ptrs", FLDT_RMAPBTPTR, btblock_ptr_offset, btblock_key_count,
+         FLD_ARRAY|FLD_ABASE1|FLD_COUNT|FLD_OFFSET, TYP_RMAPBT },
+       { NULL }
+};
+#undef OFF
+
+#define        KOFF(f) bitize(offsetof(struct xfs_rmap_key, rm_ ## f))
+const field_t  rmapbt_key_flds[] = {
+       { "startblock", FLDT_AGBLOCK, OI(KOFF(startblock)), C1, 0, TYP_DATA },
+       { NULL }
+};
+#undef KOFF
+
+#define        ROFF(f) bitize(offsetof(struct xfs_rmap_rec, rm_ ## f))
+const field_t  rmapbt_rec_flds[] = {
+       { "startblock", FLDT_AGBLOCK, OI(ROFF(startblock)), C1, 0, TYP_DATA },
+       { "blockcount", FLDT_EXTLEN, OI(ROFF(blockcount)), C1, 0, TYP_NONE },
+       { "owner", FLDT_UINT64X, OI(ROFF(owner)), C1, 0, TYP_NONE },
+       { NULL }
+};
+#undef ROFF
index daee060e32f5b3dcd9108fc907e3b51e020b6731..d8662a1ea5f1c74c0a2aab668935de96c361b103 100644 (file)
@@ -51,4 +51,9 @@ extern const struct field     cntbt_crc_hfld[];
 extern const struct field      cntbt_key_flds[];
 extern const struct field      cntbt_rec_flds[];
 
+extern const struct field      rmapbt_crc_flds[];
+extern const struct field      rmapbt_crc_hfld[];
+extern const struct field      rmapbt_key_flds[];
+extern const struct field      rmapbt_rec_flds[];
+
 extern int     btblock_size(void *obj, int startoff, int idx);
index 816065e7471d8cf5d513fa629ddb663665a14d2e..d185b234c2fac3fee162fcdf11e693acb4df4e2f 100644 (file)
@@ -164,6 +164,15 @@ const ftattr_t     ftattrtab[] = {
        { FLDT_CNTBTREC, "cntbtrec", fp_sarray, (char *)cntbt_rec_flds,
          SI(bitsz(xfs_alloc_rec_t)), 0, NULL, cntbt_rec_flds },
 
+       { FLDT_RMAPBT_CRC, "rmapbt", NULL, (char *)rmapbt_crc_flds, btblock_size,
+         FTARG_SIZE, NULL, rmapbt_crc_flds },
+       { FLDT_RMAPBTKEY, "rmapbtkey", fp_sarray, (char *)rmapbt_key_flds,
+         SI(bitsz(struct xfs_rmap_key)), 0, NULL, rmapbt_key_flds },
+       { FLDT_RMAPBTPTR, "rmapbtptr", fp_num, "%u",
+         SI(bitsz(xfs_rmap_ptr_t)), 0, fa_agblock, NULL },
+       { FLDT_RMAPBTREC, "rmapbtrec", fp_sarray, (char *)rmapbt_rec_flds,
+         SI(bitsz(struct xfs_rmap_rec)), 0, NULL, rmapbt_rec_flds },
+
 /* CRC field */
        { FLDT_CRC, "crc", fp_crc, "%#x (%s)", SI(bitsz(__uint32_t)),
          0, NULL, NULL },
index 6343c9ae5a529d4bba76ba90244654d150556e0a..f3fba668d64eb7fa42933316082d464dcae3d73a 100644 (file)
@@ -80,6 +80,10 @@ typedef enum fldt    {
        FLDT_CNTBTKEY,
        FLDT_CNTBTPTR,
        FLDT_CNTBTREC,
+       FLDT_RMAPBT_CRC,
+       FLDT_RMAPBTKEY,
+       FLDT_RMAPBTPTR,
+       FLDT_RMAPBTREC,
 
        /* CRC field type */
        FLDT_CRC,
index b29f2a47a5907ebf42246129b64e9b338a264ec1..de978507c922408cecffabf74a8a74c9b7c5e0a0 100644 (file)
--- a/db/type.c
+++ b/db/type.c
@@ -58,6 +58,7 @@ static const typ_t    __typtab[] = {
        { TYP_BMAPBTD, "bmapbtd", handle_struct, bmapbtd_hfld, NULL },
        { TYP_BNOBT, "bnobt", handle_struct, bnobt_hfld, NULL },
        { TYP_CNTBT, "cntbt", handle_struct, cntbt_hfld, NULL },
+       { TYP_RMAPBT, NULL },
        { TYP_DATA, "data", handle_block, NULL, NULL },
        { TYP_DIR2, "dir2", handle_struct, dir2_hfld, NULL },
        { TYP_DQBLK, "dqblk", handle_struct, dqblk_hfld, NULL },
@@ -87,6 +88,8 @@ static const typ_t    __typtab_crc[] = {
                &xfs_allocbt_buf_ops },
        { TYP_CNTBT, "cntbt", handle_struct, cntbt_crc_hfld,
                &xfs_allocbt_buf_ops },
+       { TYP_RMAPBT, "rmapbt", handle_struct, rmapbt_crc_hfld, 
+               &xfs_rmapbt_buf_ops },
        { TYP_DATA, "data", handle_block, NULL, NULL },
        { TYP_DIR2, "dir3", handle_struct, dir3_hfld,
                &xfs_dir3_db_buf_ops },
index 3bb26f1744160ee2a38d3b28538666600c099aee..9d02d6dadd4dd1807d0b73f269e2b7f33e6df9af 100644 (file)
--- a/db/type.h
+++ b/db/type.h
@@ -24,7 +24,7 @@ struct field;
 typedef enum typnm
 {
        TYP_AGF, TYP_AGFL, TYP_AGI, TYP_ATTR, TYP_BMAPBTA,
-       TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_DATA,
+       TYP_BMAPBTD, TYP_BNOBT, TYP_CNTBT, TYP_RMAPBT, TYP_DATA,
        TYP_DIR2, TYP_DQBLK, TYP_INOBT, TYP_INODATA, TYP_INODE,
        TYP_LOG, TYP_RTBITMAP, TYP_RTSUMMARY, TYP_SB, TYP_SYMLINK,
        TYP_TEXT, TYP_NONE
index 70e43a05be08d12c092fe7ba683bd865ffda39dd..b3526ec29892bf2cbcf3fc6ee999b250dc1f2d7e 100644 (file)
@@ -30,7 +30,7 @@ QAHFILES = libxfs.h libxlog.h \
        xfs_trace.h \
        xfs_trans.h
 
-HFILES = handle.h jdm.h xqm.h xfs.h
+HFILES = handle.h jdm.h xqm.h xfs.h platform_defs.h
 HFILES += $(PKG_PLATFORM).h
 PHFILES = darwin.h freebsd.h irix.h linux.h gnukfreebsd.h
 DKHFILES = volume.h fstyp.h dvh.h
index 6a59cc02412143978ace8a816c9f029a8384643a..c6bd37ddbcadd053d7375c675f7c266b607064f1 100644 (file)
@@ -66,6 +66,7 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len);
 #include <xfs/xfs_bmap_btree.h>
 #include <xfs/xfs_alloc_btree.h>
 #include <xfs/xfs_ialloc_btree.h>
+#include <xfs/xfs_rmap_btree.h>
 #include <xfs/xfs_attr_sf.h>
 #include <xfs/xfs_inode_fork.h>
 #include <xfs/xfs_inode_buf.h>
index 70bdea080270a2a73834fbb5fd74f041363ce457..b614edd378bc084f2e7017b103057eec5315b068 100644 (file)
@@ -64,6 +64,8 @@ typedef struct xfs_mount {
        uint                    m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */
        uint                    m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */
        uint                    m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */
+       uint                    m_rmap_mxr[2];  /* max rmap btree records */
+       uint                    m_rmap_mnr[2];  /* min rmap btree records */
        uint                    m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
        uint                    m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
        uint                    m_in_maxlevels; /* XFS_IN_MAXLEVELS */
index ab046a914664b51452d058560004d23f985de43a..31a994be22ea6ae35107e23b624ab290f674c7c1 100644 (file)
 #define trace_xfs_perag_get_tag(a,b,c,d) ((c) = (c))
 #define trace_xfs_perag_put(a,b,c,d)   ((c) = (c))
 
+#define trace_xfs_rmap_alloc_extent(a,b,c,d,e)         ((void) 0)
+#define trace_xfs_rmap_alloc_extent_done(a,b,c,d,e)    ((void) 0)
+#define trace_xfs_rmap_alloc_extent_error(a,b,c,d,e)   ((void) 0)
+#define trace_xfs_rmap_free_extent(a,b,c,d,e)          ((void) 0)
+#define trace_xfs_rmap_free_extent_done(a,b,c,d,e)     ((void) 0)
+#define trace_xfs_rmap_free_extent_error(a,b,c,d,e)    ((void) 0)
+
 #endif /* __TRACE_H__ */
index 981cb0bbd9172bb557718b59e1677574d6561fed..7dffa6b78e40f91f55045f61ae980fc8456175aa 100644 (file)
@@ -42,6 +42,7 @@ QAHFILES = xfs_alloc.h \
        xfs_inode_fork.h \
        xfs_log_format.h \
        xfs_quota_defs.h \
+       xfs_rmap_btree.h \
        xfs_sb.h \
        xfs_shared.h \
        xfs_trans_resv.h \
@@ -75,6 +76,8 @@ CFILES = cache.c \
        xfs_ialloc_btree.c \
        xfs_log_rlimit.c \
        xfs_rtbitmap.c \
+       xfs_rmap.c \
+       xfs_rmap_btree.c \
        xfs_sb.c \
        xfs_symlink_remote.c \
        xfs_trans_resv.c
index 23e3c5387503b40abf848e4ef885ff5f58d83719..d0003c5638d1d582383ad8139329cf5d80aaed11 100644 (file)
@@ -26,6 +26,7 @@
 #include "xfs_mount.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_alloc.h"
 #include "xfs_cksum.h"
@@ -615,6 +616,12 @@ xfs_alloc_ag_vextent(
        ASSERT(!args->wasfromfl || !args->isfl);
        ASSERT(args->agbno % args->alignment == 0);
 
+       /* insert new block into the reverse map btree */
+       error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
+                              args->agbno, args->len, args->owner);
+       if (error)
+               return error;
+
        if (!args->wasfromfl) {
                error = xfs_alloc_update_counters(args->tp, args->pag,
                                                  args->agbp,
@@ -1962,6 +1969,7 @@ xfs_alloc_fix_freelist(
        memset(&targs, 0, sizeof(targs));
        targs.tp = tp;
        targs.mp = mp;
+       targs.owner = XFS_RMAP_OWN_AG;
        targs.agbp = agbp;
        targs.agno = args->agno;
        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
@@ -2586,6 +2594,8 @@ error0:
  * Free an extent.
  * Just break up the extent address and hand off to xfs_free_ag_extent
  * after fixing up the freelist.
+ *
+ * XXX: need owner of extent being freed
  */
 int                            /* error */
 xfs_free_extent(
@@ -2627,6 +2637,12 @@ xfs_free_extent(
                goto error0;
        }
 
+       /* XXX: need owner */
+       error = xfs_rmap_free(tp, args.agbp, args.agno, args.agbno, len, 0);
+       if (error)
+               goto error0;
+
+       /* XXX: initially no multiple references, so just free it */
        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
        if (!error)
                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
@@ -2634,3 +2650,14 @@ error0:
        xfs_perag_put(args.pag);
        return error;
 }
+
+xfs_extlen_t
+xfs_prealloc_blocks(
+       struct xfs_mount        *mp)
+{
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return XFS_RMAP_BLOCK(mp) + 1;
+       if (xfs_sb_version_hasfinobt(&mp->m_sb))
+               return XFS_FIBT_BLOCK(mp) + 1;
+       return XFS_IBT_BLOCK(mp) + 1;
+}
index db5da4a9c67b63c5b0257d2d5f5364cd1c86bb80..4e52b1f8facfda4625c7727d052732203e3ed287 100644 (file)
@@ -72,6 +72,8 @@ typedef unsigned int xfs_alloctype_t;
  * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
  * btree requires 1 fsb, so we set the number of set-aside blocks
  * to 4 + 4*agcount.
+ *
+ * XXX: this changes for rmapbt filesystems.
  */
 #define XFS_ALLOC_SET_ASIDE(mp)  (4 + ((mp)->m_sb.sb_agcount * 4))
 
@@ -86,10 +88,13 @@ typedef unsigned int xfs_alloctype_t;
  *
  * The AG headers are sector sized, so the amount of space they take up is
  * dependent on filesystem geometry. The others are all single blocks.
+ *
+ * XXX: this changes for rmapbt filesystems.
  */
 #define XFS_ALLOC_AG_MAX_USABLE(mp)    \
        ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7)
 
+xfs_extlen_t   xfs_prealloc_blocks(struct xfs_mount *mp);
 
 /*
  * Argument structure for xfs_alloc routines.
@@ -120,6 +125,7 @@ typedef struct xfs_alloc_arg {
        char            isfl;           /* set if is freelist blocks - !acctg */
        char            userdata;       /* set if this is user data */
        xfs_fsblock_t   firstblock;     /* io first block allocated */
+       uint64_t        owner;          /* owner of blocks being allocated */
 } xfs_alloc_arg_t;
 
 /*
index e6d1e6c0d7becb4495f5e5a125ea28c7499acd49..56e68726cace724619af81df588a23285dac0380 100644 (file)
@@ -769,6 +769,7 @@ xfs_bmap_extents_to_btree(
        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = mp;
+       args.owner = ip->i_ino;
        args.firstblock = *firstblock;
        if (*firstblock == NULLFSBLOCK) {
                args.type = XFS_ALLOCTYPE_START_BNO;
@@ -915,6 +916,7 @@ xfs_bmap_local_to_extents(
        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = ip->i_mount;
+       args.owner = ip->i_ino;
        args.firstblock = *firstblock;
        /*
         * Allocate a block.  We know we need only one, since the
@@ -3683,6 +3685,7 @@ xfs_bmap_btalloc(
        memset(&args, 0, sizeof(args));
        args.tp = ap->tp;
        args.mp = mp;
+       args.owner = ap->ip->i_ino;
        args.fsbno = ap->blkno;
 
        /* Trim the allocation back to the maximum an AG can fit. */
index 2fd04e0cd19a43d62a965865070357318a0ca619..5df400019f17e988b1d7cd9d106ef662a3c8c4cb 100644 (file)
@@ -442,6 +442,7 @@ xfs_bmbt_alloc_block(
        args.mp = cur->bc_mp;
        args.fsbno = cur->bc_private.b.firstblock;
        args.firstblock = args.fsbno;
+       args.owner = cur->bc_private.b.ip->i_ino;
 
        if (args.fsbno == NULLFSBLOCK) {
                args.fsbno = be64_to_cpu(start->l);
index 8f18bab73ea535d8de8cd20a78b14d68b35f079f..48ab2b105d2df0401820300ae425e161e676944b 100644 (file)
@@ -38,17 +38,19 @@ union xfs_btree_ptr {
 };
 
 union xfs_btree_key {
-       xfs_bmbt_key_t          bmbt;
-       xfs_bmdr_key_t          bmbr;   /* bmbt root block */
-       xfs_alloc_key_t         alloc;
-       xfs_inobt_key_t         inobt;
+       struct xfs_bmbt_key             bmbt;
+       xfs_bmdr_key_t                  bmbr;   /* bmbt root block */
+       xfs_alloc_key_t                 alloc;
+       struct xfs_inobt_key            inobt;
+       struct xfs_rmap_key             rmap;
 };
 
 union xfs_btree_rec {
-       xfs_bmbt_rec_t          bmbt;
-       xfs_bmdr_rec_t          bmbr;   /* bmbt root block */
-       xfs_alloc_rec_t         alloc;
-       xfs_inobt_rec_t         inobt;
+       struct xfs_bmbt_rec             bmbt;
+       xfs_bmdr_rec_t                  bmbr;   /* bmbt root block */
+       struct xfs_alloc_rec            alloc;
+       struct xfs_inobt_rec            inobt;
+       struct xfs_rmap_rec             rmap;
 };
 
 /*
@@ -63,6 +65,7 @@ union xfs_btree_rec {
 #define        XFS_BTNUM_BMAP  ((xfs_btnum_t)XFS_BTNUM_BMAPi)
 #define        XFS_BTNUM_INO   ((xfs_btnum_t)XFS_BTNUM_INOi)
 #define        XFS_BTNUM_FINO  ((xfs_btnum_t)XFS_BTNUM_FINOi)
+#define        XFS_BTNUM_RMAP  ((xfs_btnum_t)XFS_BTNUM_RMAPi)
 
 /*
  * For logging record fields.
@@ -94,6 +97,7 @@ do {    \
        case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break;  \
        case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break;    \
        case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break;  \
+       case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break;  \
        case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;       \
        }       \
 } while (0)
@@ -108,6 +112,7 @@ do {    \
        case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
        case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
        case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
+       case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
        case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;       \
        }       \
 } while (0)
@@ -199,6 +204,7 @@ typedef struct xfs_btree_cur
                xfs_alloc_rec_incore_t  a;
                xfs_bmbt_irec_t         b;
                xfs_inobt_rec_incore_t  i;
+               struct xfs_rmap_irec    r;
        }               bc_rec;         /* current insert/search record value */
        struct xfs_buf  *bc_bufs[XFS_BTREE_MAXLEVELS];  /* buf ptr per level */
        int             bc_ptrs[XFS_BTREE_MAXLEVELS];   /* key/record # */
index 4d313d3d1a9ce5dbac2a5656211fd86d4cd76d90..0fe326f7f0ecc29502434c60fa4520711ec1fb00 100644 (file)
@@ -445,8 +445,10 @@ xfs_sb_has_compat_feature(
 }
 
 #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)                /* free inode btree */
+#define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)                /* reverse map btree */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
-               (XFS_SB_FEAT_RO_COMPAT_FINOBT)
+               (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
+                XFS_SB_FEAT_RO_COMPAT_RMAPBT)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN  ~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(
@@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
                (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
 }
 
+static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
+{
+       return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+               (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
+}
+
 /*
  * end of superblock version macros
  */
@@ -566,10 +574,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
 #define        XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
 
 /*
- * Btree number 0 is bno, 1 is cnt This value gives the size of the
+ * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
  * arrays below.
  */
-#define        XFS_BTNUM_AGF   ((int)XFS_BTNUM_CNTi + 1)
+#define        XFS_BTNUM_AGF   ((int)XFS_BTNUM_RMAPi + 1)
 
 /*
  * The second word of agf_levels in the first a.g. overlaps the EFS
@@ -586,12 +594,10 @@ typedef struct xfs_agf {
        __be32          agf_seqno;      /* sequence # starting from 0 */
        __be32          agf_length;     /* size in blocks of a.g. */
        /*
-        * Freespace information
+        * Freespace and rmap information
         */
        __be32          agf_roots[XFS_BTNUM_AGF];       /* root blocks */
-       __be32          agf_spare0;     /* spare field */
        __be32          agf_levels[XFS_BTNUM_AGF];      /* btree levels */
-       __be32          agf_spare1;     /* spare field */
 
        __be32          agf_flfirst;    /* first freelist block's index */
        __be32          agf_fllast;     /* last freelist block's index */
@@ -1254,16 +1260,74 @@ typedef __be32 xfs_inobt_ptr_t;
 #define        XFS_FIBT_BLOCK(mp)              ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
 
 /*
- * The first data block of an AG depends on whether the filesystem was formatted
- * with the finobt feature. If so, account for the finobt reserved root btree
- * block.
+ * Reverse mapping btree format definitions
+ *
+ * There is a btree for the reverse map per allocation group
+ */
+#define        XFS_RMAP_CRC_MAGIC      0x524d4233      /* 'RMB3' */
+
+/*
+ * Special owner types.
+ *
+ * Seeing as we only support up to 8EB, we have the upper bit of the owner field
+ * to tell us we have a special owner value. We use these for static metadata
+ * allocated at mkfs/growfs time, as well as for freespace management metadata.
+ */
+#define XFS_RMAP_OWN_NULL      (-1ULL) /* No owner, for growfs */
+#define XFS_RMAP_OWN_UNKNOWN   (-2ULL) /* Unknown owner, for EFI recovery */
+#define XFS_RMAP_OWN_FS                (-3ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_LOG       (-4ULL) /* static fs metadata */
+#define XFS_RMAP_OWN_AG                (-5ULL) /* AG freespace btree blocks */
+#define XFS_RMAP_OWN_INOBT     (-6ULL) /* Inode btree blocks */
+#define XFS_RMAP_OWN_INODES    (-7ULL) /* Inode chunk */
+#define XFS_RMAP_OWN_MIN       (-8ULL) /* guard */
+
+/*
+ * Data record structure
+ */
+struct xfs_rmap_rec {
+       __be32          rm_startblock;  /* extent start block */
+       __be32          rm_blockcount;  /* extent length */
+       __be64          rm_owner;       /* extent owner */
+};
+
+struct xfs_rmap_irec {
+       xfs_agblock_t   rm_startblock;  /* extent start block */
+       xfs_extlen_t    rm_blockcount;  /* extent length */
+       __uint64_t      rm_owner;       /* extent owner */
+};
+
+/*
+ * Key structure
+ *
+ * We don't use the length for lookups
+ */
+struct xfs_rmap_key {
+       __be32          rm_startblock;  /* extent start block */
+};
+
+/* btree pointer type */
+typedef __be32 xfs_rmap_ptr_t;
+
+/*
+ * block numbers in the AG.
  */
-#define XFS_PREALLOC_BLOCKS(mp) \
+#define        XFS_IBT_BLOCK(mp)               ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
+#define        XFS_FIBT_BLOCK(mp)              ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
+#define        XFS_RMAP_BLOCK(mp) \
        (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
         XFS_FIBT_BLOCK(mp) + 1 : \
         XFS_IBT_BLOCK(mp) + 1)
 
-
+/*
+ * The first data block of an AG depends on whether the filesystem was formatted
+ * with the optional btree features. These need to be accounted for
+ * appropriately.
+ *
+ * XXX: this should be calculated once at mount time and stored in the struct
+ * xfs_mount rather than calculated every time it is used.
+ */
+#define XFS_PREALLOC_BLOCKS(mp)        xfs_prealloc_blocks(mp)
 
 /*
  * BMAP Btree format definitions
index 2b4e4e0778662c098b4e08387a04b3fe6b98f6e2..08716f7e9ce4af9578f981c20b20379c0061d630 100644 (file)
@@ -364,6 +364,7 @@ xfs_ialloc_ag_alloc(
        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = tp->t_mountp;
+       args.owner = XFS_RMAP_OWN_INODES;
 
        /*
         * Locking will ensure that we don't have two callers in here
index 9ac143a5efe3fd39766481feb900dc2d549eb6cb..a40b9e8c32e4d2f5348e113c70f65b55441caeaf 100644 (file)
@@ -95,6 +95,7 @@ xfs_inobt_alloc_block(
        memset(&args, 0, sizeof(args));
        args.tp = cur->bc_tp;
        args.mp = cur->bc_mp;
+       args.owner = XFS_RMAP_OWN_INOBT;
        args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
        args.minlen = 1;
        args.maxlen = 1;
diff --git a/libxfs/xfs_rmap.c b/libxfs/xfs_rmap.c
new file mode 100644 (file)
index 0000000..b2a3330
--- /dev/null
@@ -0,0 +1,413 @@
+
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_btree.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+STATIC int
+xfs_rmap_lookup_le(
+       struct xfs_btree_cur    *cur,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       uint64_t                owner,
+       int                     *stat)
+{
+       cur->bc_rec.r.rm_startblock = bno;
+       cur->bc_rec.r.rm_blockcount = len;
+       cur->bc_rec.r.rm_owner = owner;
+       return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, ref].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int
+xfs_rmap_update(
+       struct xfs_btree_cur    *cur,
+       struct xfs_rmap_irec    *irec)
+{
+       union xfs_btree_rec     rec;
+
+       rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
+       rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
+       rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
+       return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+STATIC int
+xfs_rmap_get_rec(
+       struct xfs_btree_cur    *cur,
+       struct xfs_rmap_irec    *irec,
+       int                     *stat)
+{
+       union xfs_btree_rec     *rec;
+       int                     error;
+
+       error = xfs_btree_get_rec(cur, &rec, stat);
+       if (error || !*stat)
+               return error;
+
+       irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
+       irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
+       irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
+       return 0;
+}
+
+/*
+ * Find the extent in the rmap btree and remove it.
+ *
+ * The record we find should always span a range greater than or equal to the
+ * the extent being freed. This makes the code simple as, in theory, we do not
+ * have to handle ranges that are split across multiple records as extents that
+ * result in bmap btree extent merges should also result in rmap btree extent
+ * merges.  The owner field ensures we don't merge extents from different
+ * structures into the same record, hence this property should always hold true
+ * if we ensure that the rmap btree supports at least the same size maximum
+ * extent as the bmap btree (2^21 blocks at present).
+ *
+ * Complexity: when growing the filesystem, we "free" an extent when growing the
+ * last AG. This extent is new space and so it is not tracked as used space in
+ * the btree. The growfs code will pass in an owner of XFS_RMAP_OWN_NULL to
+ * indicate that it expected that there is no owner of this extent. We verify
+ * that - the extent lookup result in a record that does not overlap.
+ *
+ * Complexity #2: EFIs do not record the owner of the extent, so when recovering
+ * EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap btree to
+ * ignore the owner (i.e. wildcard match) so we don't trigger corruption checks
+ * during log recovery.
+ */
+int
+xfs_rmap_free(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       uint64_t                owner)
+{
+       struct xfs_btree_cur    *cur;
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_rmap_irec    ltrec;
+       int                     error;
+       int                     i;
+
+       /*
+        * if rmap btree is not supported, then just return success without
+        * doing anything.
+        */
+       if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+               return 0;
+
+       trace_xfs_rmap_free_extent(mp, agno, bno, len, owner);
+       cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+       /*
+        * We always have a left record because there's a static record
+        * for the AG headers at rm_startblock == 0.
+        */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+       error = xfs_rmap_get_rec(cur, &ltrec, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+       /* special growfs case - bno is beyond last record */
+       if (owner == XFS_RMAP_OWN_NULL) {
+               XFS_WANT_CORRUPTED_GOTO(mp, bno > ltrec.rm_startblock +
+                                               ltrec.rm_blockcount, out_error);
+               goto out_done;
+       }
+
+       /* make sure the extent we found covers the entire freeing range. */
+       XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno, out_error);
+       XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_blockcount >= len, out_error);
+
+/*
+       if (owner != ltrec.rm_owner ||
+           bno > ltrec.rm_startblock + ltrec.rm_blockcount)
+ */
+       //printk("rmfree  ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+       //              agno, bno, len, owner, ltrec.rm_startblock,
+       //              ltrec.rm_blockcount, ltrec.rm_owner);
+       XFS_WANT_CORRUPTED_GOTO(mp, bno <= ltrec.rm_startblock + ltrec.rm_blockcount,
+                               out_error);
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+                               (owner < XFS_RMAP_OWN_NULL &&
+                                owner >= XFS_RMAP_OWN_MIN), out_error);
+
+       /* exact match is easy */
+       if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
+       //printk("remove exact\n");
+               /* remove extent from rmap tree */
+               error = xfs_btree_delete(cur, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       } else if (ltrec.rm_startblock == bno) {
+       //printk("remove left\n");
+               /*
+                * overlap left hand side of extent
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing: |fffffffff|
+                * Result:            |rrrrrrrrrr|
+                *         bno       len
+                */
+               ltrec.rm_startblock += len;
+               ltrec.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
+       //printk("remove right\n");
+               /*
+                * overlap right hand side of extent
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing:            |fffffffff|
+                * Result:  |rrrrrrrrrr|
+                *                    bno       len
+                */
+               ltrec.rm_blockcount -= len;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else {
+               /*
+                * overlap middle of extent
+                *
+                *       ltbno                ltlen
+                * Orig:    |oooooooooooooooooooo|
+                * Freeing:       |fffffffff|
+                * Result:  |rrrrr|         |rrrr|
+                *               bno       len
+                */
+               xfs_extlen_t    orig_len = ltrec.rm_blockcount;
+       //printk("remove middle\n");
+
+               ltrec.rm_blockcount = bno - ltrec.rm_startblock;;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+
+               cur->bc_rec.r.rm_startblock = bno + len;
+               cur->bc_rec.r.rm_blockcount = orig_len - len -
+                                                    ltrec.rm_blockcount;
+               cur->bc_rec.r.rm_owner = ltrec.rm_owner;
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto out_error;
+       }
+
+out_done:
+       trace_xfs_rmap_free_extent_done(mp, agno, bno, len, owner);
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+out_error:
+       trace_xfs_rmap_free_extent_error(mp, agno, bno, len, owner);
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
+
+/*
+ * When we allocate a new block, the first thing we do is add a reference to the
+ * extent in the rmap btree. This is how we track the owner of the extent and th
+ * enumber of references to it.
+ *
+ * Initially, we do not have shared extents, and so the extent can only have a
+ * single reference count and owner. This makes the initial implementation easy,
+ * but does not allow us to use the rmap tree for tracking reflink shared files.
+ * Hence the initial implementation is simply a lookup to find the place to
+ * insert (and checking we don't find a duplicate/overlap) and then insertng the
+ * appropriate record.
+ */
+int
+xfs_rmap_alloc(
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno,
+       xfs_agblock_t           bno,
+       xfs_extlen_t            len,
+       uint64_t                owner)
+{
+       struct xfs_btree_cur    *cur;
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_rmap_irec    ltrec;
+       struct xfs_rmap_irec    gtrec;
+       int                     have_gt;
+       int                     error;
+       int                     i;
+
+       /*
+        * if rmap btree is not supported, then just return success without
+        * doing anything.
+        */
+       if (!xfs_sb_version_hasrmapbt(&tp->t_mountp->m_sb))
+               return 0;
+
+       trace_xfs_rmap_alloc_extent(mp, agno, bno, len, owner);
+       cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+
+       /*
+        * chekc to see if we find an existing record for this extent rather
+        * than just the location for insert.
+        */
+       error = xfs_rmap_lookup_le(cur, bno, len, owner, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+
+       error = xfs_rmap_get_rec(cur, &ltrec, &i);
+       if (error)
+               goto out_error;
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, ltrec 0x%x/0x%x/0x%llx\n",
+       //              agno, bno, len, owner, ltrec.rm_startblock,
+       //              ltrec.rm_blockcount, ltrec.rm_owner);
+
+       XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock + ltrec.rm_blockcount <= bno,
+                               out_error);
+
+       error = xfs_btree_increment(cur, 0, &have_gt);
+       if (error)
+               goto out_error;
+       if (have_gt) {
+               error = xfs_rmap_get_rec(cur, &gtrec, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+       //printk("rmalloc ag %d bno 0x%x/0x%x/0x%llx, gtrec 0x%x/0x%x/0x%llx\n",
+       //              agno, bno, len, owner, gtrec.rm_startblock,
+       //              gtrec.rm_blockcount, gtrec.rm_owner);
+               XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
+                                       out_error);
+       } else {
+               gtrec.rm_owner = XFS_RMAP_OWN_NULL;
+       }
+
+       /* cursor currently points one record past ltrec */
+       if (ltrec.rm_owner == owner &&
+           ltrec.rm_startblock + ltrec.rm_blockcount == bno) {
+               /*
+                * left edge contiguous
+                *
+                *       ltbno     ltlen
+                * orig:   |ooooooooo|
+                * adding:           |aaaaaaaaa|
+                * result: |rrrrrrrrrrrrrrrrrrr|
+                *                  bno       len
+                */
+               //printk("add left\n");
+               ltrec.rm_blockcount += len;
+               if (gtrec.rm_owner == owner &&
+                   bno + len == gtrec.rm_startblock) {
+                       //printk("add middle\n");
+                       /*
+                        * right edge also contiguous
+                        *
+                        *       ltbno     ltlen    gtbno     gtlen
+                        * orig:   |ooooooooo|         |ooooooooo|
+                        * adding:           |aaaaaaaaa|
+                        * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
+                        */
+                       ltrec.rm_blockcount += gtrec.rm_blockcount;
+                       error = xfs_btree_delete(cur, &i);
+                       if (error)
+                               goto out_error;
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               }
+
+               error = xfs_btree_decrement(cur, 0, &have_gt);
+               if (error)
+                       goto out_error;
+               error = xfs_rmap_update(cur, &ltrec);
+               if (error)
+                       goto out_error;
+       } else if (gtrec.rm_owner == owner &&
+                  bno + len == gtrec.rm_startblock) {
+               /*
+                * right edge contiguous
+                *
+                *                 gtbno     gtlen
+                * Orig:             |ooooooooo|
+                * adding: |aaaaaaaaa|
+                * Result: |rrrrrrrrrrrrrrrrrrr|
+                *        bno       len
+                */
+               //printk("add right\n");
+               gtrec.rm_startblock = bno;
+               gtrec.rm_blockcount += len;
+               error = xfs_rmap_update(cur, &gtrec);
+               if (error)
+                       goto out_error;
+       } else {
+               //printk("add no match\n");
+               /* no contiguous edge with identical owner */
+               cur->bc_rec.r.rm_startblock = bno;
+               cur->bc_rec.r.rm_blockcount = len;
+               cur->bc_rec.r.rm_owner = owner;
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto out_error;
+       }
+
+       trace_xfs_rmap_alloc_extent_done(mp, agno, bno, len, owner);
+       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       return 0;
+
+out_error:
+       trace_xfs_rmap_alloc_extent_error(mp, agno, bno, len, owner);
+       xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+       return error;
+}
diff --git a/libxfs/xfs_rmap_btree.c b/libxfs/xfs_rmap_btree.c
new file mode 100644 (file)
index 0000000..ed1792d
--- /dev/null
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "libxfs_priv.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+
+
+/*
+ * Reverse map btree.
+ *
+ * This is a per-ag tree used to track the owner of a given extent. Owner
+ * records are inserted when an extent is allocated, and removed when an extent
+ * is freed. For existing filesystems, there can only be one owner of an extent,
+ * usually an inode or some other metadata structure like a AG btree.
+ *
+ * Initial thoughts are that the
+ * value of the owner field needs external flags to define what it means, and
+ * hence we need a flags field in the record. This means the record is going to
+ * be larger than 16 bytes (agbno,len,owner = 16 bytes), so maybe this isn't the
+ * best idea. Initially just implement the owner field - we can probably steal
+ * bits from the extent length field for type descriptors given that MAXEXTLEN
+ * is only 21 bits if we want to store the type as well. Keep in mind that if we
+ * want to do this there are still restrictions on the length of extents we
+ * track in the rmap btree (see comments on xfs_rmap_free()).
+ *
+ * The rmap btree is part of the free space management, so blocks for the tree
+ * are sourced from the agfl. Hence we need transaction reservation support for
+ * this tree so that the freelist is always large enough. This also impacts on
+ * the minimum space we need to leave free in the AG.
+ *
+ * The tree is ordered by block number - there's no need to order/search by
+ * extent size for  online updating/management of the tree, and the reverse
+ * lookups are going to be "who owns this block" and so are by-block ordering is
+ * perfect for this.
+ *
+ * XXX: open question is how to handle blocks that are owned by the freespace
+ * tree blocks. Right now they will be classified when they are moved to the
+ * freelist or removed from the freelist. i.e. the extent allocation/freeing
+ * will mark the extents allocated as owned by the AG.
+ */
+STATIC struct xfs_btree_cur *
+xfs_rmapbt_dup_cursor(
+       struct xfs_btree_cur    *cur)
+{
+       return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
+                       cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_rmapbt_set_root(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       int                     inc)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agnumber_t          seqno = be32_to_cpu(agf->agf_seqno);
+       int                     btnum = cur->bc_btnum;
+       struct xfs_perag        *pag = xfs_perag_get(cur->bc_mp, seqno);
+
+       ASSERT(ptr->s != 0);
+
+       agf->agf_roots[btnum] = ptr->s;
+       be32_add_cpu(&agf->agf_levels[btnum], inc);
+       pag->pagf_levels[btnum] += inc;
+       xfs_perag_put(pag);
+
+       xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_rmapbt_alloc_block(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *start,
+       union xfs_btree_ptr     *new,
+       int                     *stat)
+{
+       int                     error;
+       xfs_agblock_t           bno;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+       /* Allocate the new block from the freelist. If we can't, give up.  */
+       error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+                                      &bno, 1);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+
+       if (bno == NULLAGBLOCK) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+               *stat = 0;
+               return 0;
+       }
+
+       xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+       xfs_trans_agbtree_delta(cur->bc_tp, 1);
+       new->s = cpu_to_be32(bno);
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       *stat = 1;
+       return 0;
+}
+
+STATIC int
+xfs_rmapbt_free_block(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp)
+{
+       struct xfs_buf          *agbp = cur->bc_private.a.agbp;
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       xfs_agblock_t           bno;
+       int                     error;
+
+       bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+       error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+       if (error)
+               return error;
+
+       xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+                             XFS_EXTENT_BUSY_SKIP_DISCARD);
+       xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+       xfs_trans_binval(cur->bc_tp, bp);
+       return 0;
+}
+
+STATIC int
+xfs_rmapbt_get_minrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_rmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rmapbt_get_maxrecs(
+       struct xfs_btree_cur    *cur,
+       int                     level)
+{
+       return cur->bc_mp->m_rmap_mxr[level != 0];
+}
+
+STATIC void
+xfs_rmapbt_init_key_from_rec(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       key->rmap.rm_startblock = rec->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_key(
+       union xfs_btree_key     *key,
+       union xfs_btree_rec     *rec)
+{
+       rec->rmap.rm_startblock = key->rmap.rm_startblock;
+}
+
+STATIC void
+xfs_rmapbt_init_rec_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *rec)
+{
+       rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+       rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+       rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+}
+
+STATIC void
+xfs_rmapbt_init_ptr_from_cur(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+       ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+       ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
+
+       ptr->s = agf->agf_roots[cur->bc_btnum];
+}
+
+STATIC __int64_t
+xfs_rmapbt_key_diff(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *key)
+{
+       struct xfs_rmap_irec    *rec = &cur->bc_rec.r;
+       struct xfs_rmap_key     *kp = &key->rmap;
+
+       return (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+}
+
+static bool
+xfs_rmapbt_verify(
+       struct xfs_buf          *bp)
+{
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_perag        *pag = bp->b_pag;
+       unsigned int            level;
+
+       /*
+        * magic number and level verification
+        *
+        * During growfs operations, we can't verify the exact level or owner as
+        * the perag is not fully initialised and hence not attached to the
+        * buffer.  In this case, check against the maximum tree depth.
+        *
+        * Similarly, during log recovery we will have a perag structure
+        * attached, but the agf information will not yet have been initialised
+        * from the on disk AGF. Again, we can only check against maximum limits
+        * in this case.
+        */
+       if (block->bb_magic!= cpu_to_be32(XFS_RMAP_CRC_MAGIC))
+               return false;
+
+       if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+               return false;
+       if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+               return false;
+       if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+               return false;
+
+       level = be16_to_cpu(block->bb_level);
+       if (pag && pag->pagf_init) {
+               if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+                       return false;
+       } else if (level >= mp->m_ag_maxlevels)
+               return false;
+
+       /* numrecs verification */
+       if (be16_to_cpu(block->bb_numrecs) > mp->m_rmap_mxr[level != 0])
+               return false;
+
+       /* sibling pointer verification */
+       if (!block->bb_u.s.bb_leftsib ||
+           (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+       if (!block->bb_u.s.bb_rightsib ||
+           (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+            block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+               return false;
+
+       return true;
+}
+
+static void
+xfs_rmapbt_read_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_btree_sblock_verify_crc(bp))
+               xfs_buf_ioerror(bp, -EFSBADCRC);
+       else if (!xfs_rmapbt_verify(bp))
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+       if (bp->b_error) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_verifier_error(bp);
+       }
+}
+
+static void
+xfs_rmapbt_write_verify(
+       struct xfs_buf  *bp)
+{
+       if (!xfs_rmapbt_verify(bp)) {
+               trace_xfs_btree_corrupt(bp, _RET_IP_);
+               xfs_buf_ioerror(bp, -EFSCORRUPTED);
+               xfs_verifier_error(bp);
+               return;
+       }
+       xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
+       .verify_read = xfs_rmapbt_read_verify,
+       .verify_write = xfs_rmapbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_rmapbt_keys_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_key     *k1,
+       union xfs_btree_key     *k2)
+{
+       return be32_to_cpu(k1->rmap.rm_startblock) <
+              be32_to_cpu(k2->rmap.rm_startblock);
+}
+
+STATIC int
+xfs_rmapbt_recs_inorder(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_rec     *r1,
+       union xfs_btree_rec     *r2)
+{
+       return be32_to_cpu(r1->rmap.rm_startblock) +
+               be32_to_cpu(r1->rmap.rm_blockcount) <=
+               be32_to_cpu(r2->rmap.rm_startblock);
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_rmapbt_ops = {
+       .rec_len                = sizeof(struct xfs_rmap_rec),
+       .key_len                = sizeof(struct xfs_rmap_key),
+
+       .dup_cursor             = xfs_rmapbt_dup_cursor,
+       .set_root               = xfs_rmapbt_set_root,
+       .alloc_block            = xfs_rmapbt_alloc_block,
+       .free_block             = xfs_rmapbt_free_block,
+       .get_minrecs            = xfs_rmapbt_get_minrecs,
+       .get_maxrecs            = xfs_rmapbt_get_maxrecs,
+       .init_key_from_rec      = xfs_rmapbt_init_key_from_rec,
+       .init_rec_from_key      = xfs_rmapbt_init_rec_from_key,
+       .init_rec_from_cur      = xfs_rmapbt_init_rec_from_cur,
+       .init_ptr_from_cur      = xfs_rmapbt_init_ptr_from_cur,
+       .key_diff               = xfs_rmapbt_key_diff,
+       .buf_ops                = &xfs_rmapbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+       .keys_inorder           = xfs_rmapbt_keys_inorder,
+       .recs_inorder           = xfs_rmapbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new allocation btree cursor.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_buf          *agbp,
+       xfs_agnumber_t          agno)
+{
+       struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
+       struct xfs_btree_cur    *cur;
+
+       cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+       cur->bc_tp = tp;
+       cur->bc_mp = mp;
+       cur->bc_btnum = XFS_BTNUM_RMAP;
+       cur->bc_flags = XFS_BTREE_CRC_BLOCKS;
+       cur->bc_blocklog = mp->m_sb.sb_blocklog;
+       cur->bc_ops = &xfs_rmapbt_ops;
+       cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
+
+       cur->bc_private.a.agbp = agbp;
+       cur->bc_private.a.agno = agno;
+
+       return cur;
+}
+
+/*
+ * Calculate number of records in an rmap btree block.
+ */
+int
+xfs_rmapbt_maxrecs(
+       struct xfs_mount        *mp,
+       int                     blocklen,
+       int                     leaf)
+{
+       blocklen -= XFS_RMAP_BLOCK_LEN;
+
+       if (leaf)
+               return blocklen / sizeof(struct xfs_rmap_rec);
+       return blocklen /
+               (sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
+}
diff --git a/libxfs/xfs_rmap_btree.h b/libxfs/xfs_rmap_btree.h
new file mode 100644 (file)
index 0000000..9ad65e5
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_RMAP_BTREE_H__
+#define        __XFS_RMAP_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RMAP_BLOCK_LEN     XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_RMAP_REC_ADDR(block, index) \
+       ((struct xfs_rmap_rec *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                (((index) - 1) * sizeof(struct xfs_rmap_rec))))
+
+#define XFS_RMAP_KEY_ADDR(block, index) \
+       ((struct xfs_rmap_key *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                ((index) - 1) * sizeof(struct xfs_rmap_key)))
+
+#define XFS_RMAP_PTR_ADDR(block, index, maxrecs) \
+       ((xfs_rmap_ptr_t *) \
+               ((char *)(block) + XFS_RMAP_BLOCK_LEN + \
+                (maxrecs) * sizeof(struct xfs_rmap_key) + \
+                ((index) - 1) * sizeof(xfs_rmap_ptr_t)))
+
+struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
+                               struct xfs_trans *tp, struct xfs_buf *bp,
+                               xfs_agnumber_t agno);
+int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+
+int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
+                  xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+                  uint64_t owner);
+int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
+                 xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
+                 uint64_t owner);
+
+#endif /* __XFS_RMAP_BTREE_H__ */
index 6844cd8c3ef03e1819381e4e641982ae784eeea1..e5c7a8534df4b38088d815d6aca4f3ae7a5595d1 100644 (file)
@@ -668,6 +668,11 @@ xfs_sb_mount_common(
        mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
        mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
 
+       mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
+       mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+       mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
+       mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+
        mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
        mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
                                        sbp->sb_inopblock);
index 8dda4b321343ba6bf28ebc98516225b1e677dc6d..e8e88f3e5daf28e5544377e8c5949829e2d41b99 100644 (file)
@@ -38,6 +38,7 @@ extern const struct xfs_buf_ops xfs_agi_buf_ops;
 extern const struct xfs_buf_ops xfs_agf_buf_ops;
 extern const struct xfs_buf_ops xfs_agfl_buf_ops;
 extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
 extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
 extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
 extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
index b79dc66b2ecd4afb89f924cb046ac5b37f1ff8d8..3d503647f26b6924ecbe9b702d076bf29220830a 100644 (file)
@@ -108,8 +108,8 @@ typedef enum {
 } xfs_lookup_t;
 
 typedef enum {
-       XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
-       XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+       XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
+       XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
 } xfs_btnum_t;
 
 struct xfs_name {
index 17706664a8fe2406d30028dfc3a7c6dcfaa8f7a7..c42581b9cc79d582424d76a3c377618761b3bdc6 100644 (file)
@@ -185,6 +185,8 @@ char        *mopts[] = {
        "crc",
 #define M_FINOBT       1
        "finobt",
+#define M_RMAPBT       2
+       "rmapbt",
        NULL
 };
 
@@ -1004,6 +1006,7 @@ main(
        int                     lazy_sb_counters;
        int                     crcs_enabled;
        int                     finobt;
+       bool                    rmapbt;
 
        progname = basename(argv[0]);
        setlocale(LC_ALL, "");
@@ -1038,6 +1041,7 @@ main(
        lazy_sb_counters = 1;
        crcs_enabled = 0;
        finobt = 0;
+       rmapbt = false;
        memset(&fsx, 0, sizeof(fsx));
 
        memset(&xi, 0, sizeof(xi));
@@ -1539,6 +1543,14 @@ _("cannot specify both crc and ftype\n"));
                                                illegal(value, "m finobt");
                                        finobt = c;
                                        break;
+                               case M_RMAPBT:
+                                       if (!value || *value == '\0')
+                                               reqval('m', mopts, M_CRC);
+                                       c = atoi(value);
+                                       if (c < 0 || c > 1)
+                                               illegal(value, "m rmapbt");
+                                       rmapbt = c;
+                                       break;
                                default:
                                        unknown('m', value);
                                }
@@ -1889,6 +1901,11 @@ _("32 bit Project IDs always enabled on CRC enabled filesytems\n"));
 _("warning: finobt not supported without CRC support, disabled.\n"));
                finobt = 0;
        }
+       if (rmapbt && !crcs_enabled) {
+               fprintf(stderr,
+_("warning: rmapbt not supported without CRC support, disabled.\n"));
+               rmapbt = 0;
+       }
 
        if (nsflag || nlflag) {
                if (dirblocksize < blocksize ||
@@ -2483,7 +2500,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
        mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
 
        /*
-        * sb_versionnum and finobt flags must be set before we use
+        * sb_versionnum, finobt and rmapbt flags must be set before we use
         * XFS_PREALLOC_BLOCKS().
         */
        sbp->sb_features2 = XFS_SB_VERSION2_MKFS(crcs_enabled, lazy_sb_counters,
@@ -2505,6 +2522,8 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
 
        if (finobt)
                sbp->sb_features_ro_compat = XFS_SB_FEAT_RO_COMPAT_FINOBT;
+       if (rmapbt)
+               sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT;
 
        if (loginternal) {
                /*
@@ -2568,7 +2587,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
                printf(_(
                   "meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n"
                   "         =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n"
-                  "         =%-22s crc=%-8u finobt=%u\n"
+                  "         =%-22s crc=%-8u finobt=%u, rmapbt=%u\n"
                   "data     =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n"
                   "         =%-22s sunit=%-6u swidth=%u blks\n"
                   "naming   =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n"
@@ -2577,7 +2596,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
                   "realtime =%-22s extsz=%-6d blocks=%lld, rtextents=%lld\n"),
                        dfile, isize, (long long)agcount, (long long)agsize,
                        "", sectorsize, attrversion, !projid16bit,
-                       "", crcs_enabled, finobt,
+                       "", crcs_enabled, finobt, rmapbt,
                        "", blocksize, (long long)dblocks, imaxpct,
                        "", dsunit, dswidth,
                        dirversion, dirblocksize, nci, dirftype,
@@ -2748,6 +2767,12 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
                agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
                agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
                agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+               if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+                       agf->agf_roots[XFS_BTNUM_RMAPi] =
+                                               cpu_to_be32(XFS_RMAP_BLOCK(mp));
+                       agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+               }
+
                agf->agf_flfirst = 0;
                agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
                agf->agf_flcount = 0;
@@ -2935,22 +2960,83 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"),
                /*
                 * Free INO btree root block
                 */
-               if (!finobt)
-                       continue;
+               if (finobt) {
+                       buf = libxfs_getbuf(mp->m_ddev_targp,
+                                       XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+                                       bsize);
+                       buf->b_ops = &xfs_inobt_buf_ops;
+                       block = XFS_BUF_TO_BLOCK(buf);
+                       memset(block, 0, blocksize);
+                       if (xfs_sb_version_hascrc(&mp->m_sb))
+                               xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+                                                       agno, XFS_BTREE_CRC_BLOCKS);
+                       else
+                               xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
+                                                       agno, 0);
+                       libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+               }
 
-               buf = libxfs_getbuf(mp->m_ddev_targp,
-                               XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
+               /* RMAP btree root block */
+               if (rmapbt) {
+                       struct xfs_rmap_rec     *rrec;
+
+                       buf = libxfs_getbuf(mp->m_ddev_targp,
+                               XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
                                bsize);
-               buf->b_ops = &xfs_inobt_buf_ops;
-               block = XFS_BUF_TO_BLOCK(buf);
-               memset(block, 0, blocksize);
-               if (xfs_sb_version_hascrc(&mp->m_sb))
-                       xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0,
+                       buf->b_ops = &xfs_rmapbt_buf_ops;
+                       block = XFS_BUF_TO_BLOCK(buf);
+                       memset(block, 0, blocksize);
+
+                       xfs_btree_init_block(mp, buf, XFS_RMAP_CRC_MAGIC, 0, 0,
                                                agno, XFS_BTREE_CRC_BLOCKS);
-               else
-                       xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0,
-                                               agno, 0);
-               libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+
+                       /*
+                        * mark the AG header regions as static metadata
+                        * The BNO btree block is the first block after the
+                        * headers, so it's location defines the size of region
+                        * the static metadata consumes.
+                        */
+                       rrec = XFS_RMAP_REC_ADDR(block, 1);
+                       rrec->rm_startblock = 0;
+                       rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account freespace btree root blocks */
+                       rrec = XFS_RMAP_REC_ADDR(block, 2);
+                       rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(2);
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account inode btree root blocks */
+                       rrec = XFS_RMAP_REC_ADDR(block, 3);
+                       rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+                                                       XFS_IBT_BLOCK(mp));
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account for rmap btree root */ 
+                       rrec = XFS_RMAP_REC_ADDR(block, 4);
+                       rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+                       rrec->rm_blockcount = cpu_to_be32(1);
+                       rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+                       be16_add_cpu(&block->bb_numrecs, 1);
+
+                       /* account for the log space */
+                       if (loginternal && agno == logagno) {
+                               rrec = XFS_RMAP_REC_ADDR(block, 5);
+                               rrec->rm_startblock = cpu_to_be32(
+                                               XFS_FSB_TO_AGBNO(mp, logstart));
+                               rrec->rm_blockcount = cpu_to_be32(logblocks);
+                               rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+                               be16_add_cpu(&block->bb_numrecs, 1);
+                       }
+
+                       libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE);
+               }
+
        }
 
        /*
index 179203ee1a3062a90842688b03b1d32eb4a30e0e..fc8bc128a0d233a315571b40ed57b82fa52a3d13 100644 (file)
@@ -744,6 +744,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
 _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
                                        forkname, ino, (__uint64_t) b);
                                /* fall through ... */
+                       case XR_E_INUSE1:       /* seen by rmap */
                        case XR_E_UNKNOWN:
                                set_bmap_ext(agno, agbno, blen, XR_E_INUSE);
                                break;
@@ -751,6 +752,11 @@ _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"),
                        case XR_E_BAD_STATE:
                                do_error(_("bad state in block map %" PRIu64 "\n"), b);
 
+                       case XR_E_FS_MAP1:
+                       case XR_E_INO1:
+                       case XR_E_INUSE_FS1:
+                               do_warn(_("rmap claims metadata use!\n"));
+                               /* fall through */
                        case XR_E_FS_MAP:
                        case XR_E_INO:
                        case XR_E_INUSE_FS:
index ba819b4eca5490388be9a2b85a236d0e1fb4f023..b5c00879cd1c6c47564b35c781940bb9d0fb3dff 100644 (file)
@@ -102,17 +102,11 @@ typedef struct rt_extent_tree_node  {
 #define XR_E_MULT      5       /* extent is multiply referenced */
 #define XR_E_INO       6       /* extent used by inodes (inode blocks) */
 #define XR_E_FS_MAP    7       /* extent used by fs space/inode maps */
-#define XR_E_BAD_STATE 8
-
-/* extent states, in 64 bit word chunks */
-#define        XR_E_UNKNOWN_LL         0x0000000000000000LL
-#define        XR_E_FREE1_LL           0x1111111111111111LL
-#define        XR_E_FREE_LL            0x2222222222222222LL
-#define        XR_E_INUSE_LL           0x3333333333333333LL
-#define        XR_E_INUSE_FS_LL        0x4444444444444444LL
-#define        XR_E_MULT_LL            0x5555555555555555LL
-#define        XR_E_INO_LL             0x6666666666666666LL
-#define        XR_E_FS_MAP_LL          0x7777777777777777LL
+#define XR_E_INUSE1    8       /* used block (marked by rmap btree) */
+#define XR_E_INUSE_FS1 9       /* used by fs ag header or log (rmap btree) */
+#define XR_E_INO1      10      /* used by inodes (marked by rmap btree) */
+#define XR_E_FS_MAP1   11      /* used by fs space/inode maps (rmap btree) */
+#define XR_E_BAD_STATE 12
 
 /* separate state bit, OR'ed into high (4th) bit of ex_state field */
 
index e7e05d18806011e4633ef6302f5e4e5295238974..3c00660da2f23af3540431567fa59ad31d036058 100644 (file)
@@ -44,6 +44,7 @@ struct aghdr_cnts {
        __uint32_t      agicount;
        __uint32_t      agifreecount;
        __uint64_t      fdblocks;
+       __uint64_t      usedblocks;
        __uint64_t      icount;
        __uint64_t      ifreecount;
        __uint32_t      fibtfreecount;
@@ -292,6 +293,13 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n"
                pthread_mutex_lock(&ag_locks[agno].lock);
                state = get_bmap(agno, agbno);
                switch (state) {
+               case XR_E_INUSE1:
+                       /*
+                        * block was claimed as in use data by the rmap
+                        * btree, but has not been found in the data extent
+                        * map for the inode. That means this bmbt block hasn't
+                        * yet been claimed as in use, which means -it's ours-
+                        */
                case XR_E_UNKNOWN:
                case XR_E_FREE1:
                case XR_E_FREE:
@@ -737,6 +745,251 @@ _("%s freespace btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
        }
 }
 
+static void
+scan_rmapbt(
+       struct xfs_btree_block  *block,
+       int                     level,
+       xfs_agblock_t           bno,
+       xfs_agnumber_t          agno,
+       int                     suspect,
+       int                     isroot,
+       __uint32_t              magic,
+       void                    *priv)
+{
+       struct aghdr_cnts       *agcnts = priv;
+       const char              *name = "rmap";
+       int                     i;
+       xfs_rmap_ptr_t          *pp;
+       struct xfs_rmap_rec     *rp;
+       int                     hdr_errors = 0;
+       int                     numrecs;
+       int                     state;
+       xfs_agblock_t           lastblock = 0;
+
+       if (magic != XFS_RMAP_CRC_MAGIC) {
+               name = "(unknown)";
+               assert(0);
+       }
+
+       if (be32_to_cpu(block->bb_magic) != magic) {
+               do_warn(_("bad magic # %#x in bt%s block %d/%d\n"),
+                       be32_to_cpu(block->bb_magic), name, agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+
+       /*
+        * All RMAP btree blocks except the roots are freed for a
+        * fully empty filesystem, thus they are counted towards the
+        * free data block counter.
+        */
+       if (!isroot) {
+               agcnts->agfbtreeblks++;
+               agcnts->fdblocks++;
+       }
+
+       if (be16_to_cpu(block->bb_level) != level) {
+               do_warn(_("expected level %d got %d in bt%s block %d/%d\n"),
+                       level, be16_to_cpu(block->bb_level), name, agno, bno);
+               hdr_errors++;
+               if (suspect)
+                       return;
+       }
+
+       /* check for btree blocks multiply claimed */
+       state = get_bmap(agno, bno);
+       if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1))  {
+               set_bmap(agno, bno, XR_E_MULT);
+               do_warn(
+_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"),
+                               name, state, agno, bno, suspect);
+               return;
+       }
+       set_bmap(agno, bno, XR_E_FS_MAP);
+
+       numrecs = be16_to_cpu(block->bb_numrecs);
+       if (level == 0) {
+               if (numrecs > mp->m_rmap_mxr[0])  {
+                       numrecs = mp->m_rmap_mxr[0];
+                       hdr_errors++;
+               }
+               if (isroot == 0 && numrecs < mp->m_rmap_mnr[0])  {
+                       numrecs = mp->m_rmap_mnr[0];
+                       hdr_errors++;
+               }
+
+               if (hdr_errors) {
+                       do_warn(
+       _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+                               be16_to_cpu(block->bb_numrecs),
+                               mp->m_rmap_mnr[0], mp->m_rmap_mxr[0],
+                               name, agno, bno);
+                       suspect++;
+               }
+
+               rp = XFS_RMAP_REC_ADDR(block, 1);
+               for (i = 0; i < numrecs; i++) {
+                       xfs_agblock_t           b, end;
+                       xfs_extlen_t            len, blen;
+                       int64_t                 owner;
+
+                       b = be32_to_cpu(rp[i].rm_startblock);
+                       len = be32_to_cpu(rp[i].rm_blockcount);
+                       owner = be64_to_cpu(rp[i].rm_owner);
+                       end = b + len;
+
+                       if (!verify_agbno(mp, agno, b)) {
+                               do_warn(
+       _("invalid start block %u in record %u of %s btree block %u/%u\n"),
+                                       b, i, name, agno, bno);
+                               continue;
+                       }
+                       if (len == 0 || !verify_agbno(mp, agno, end - 1)) {
+                               do_warn(
+       _("invalid length %u in record %u of %s btree block %u/%u\n"),
+                                       len, i, name, agno, bno);
+                               continue;
+                       }
+
+                       /* XXX: range check owner */
+
+                       if (b && b <= lastblock) {
+                               do_warn(_(
+       "out-of-order rmap btree record %d (%u %u) block %u/%u\n"),
+                                       i, b, len, agno, bno);
+                       } else {
+                               lastblock = b;
+                       }
+
+                       for ( ; b < end; b += blen)  {
+                               state = get_bmap_ext(agno, b, end, &blen);
+                               switch (state) {
+                               case XR_E_UNKNOWN:
+                                       switch (owner) {
+                                       case XFS_RMAP_OWN_FS:
+                                       case XFS_RMAP_OWN_LOG:
+                                               set_bmap(agno, b, XR_E_INUSE_FS1);
+                                               break;
+                                       case XFS_RMAP_OWN_AG:
+                                       case XFS_RMAP_OWN_INOBT:
+                                               set_bmap(agno, b, XR_E_FS_MAP1);
+                                               break;
+                                       case XFS_RMAP_OWN_INODES:
+                                               set_bmap(agno, b, XR_E_INO1);
+                                               break;
+                                       case XFS_RMAP_OWN_NULL:
+                                               /* still unknown */
+                                               break;
+                                       default:
+                                               /* file data */
+                                               set_bmap(agno, b, XR_E_INUSE1);
+                                               break;
+                                       }
+                                       break;
+                               case XR_E_INUSE_FS:
+                                       if (owner == XFS_RMAP_OWN_FS ||
+                                           owner == XFS_RMAP_OWN_LOG)
+                                               break;
+                                       do_warn(
+_("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+                                               agno, b, b + blen - 1,
+                                               name, state, owner);
+                                       break;
+                               case XR_E_FS_MAP:
+                                       if (owner == XFS_RMAP_OWN_AG ||
+                                           owner == XFS_RMAP_OWN_INOBT)
+                                               break;
+                                       do_warn(
+_("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+                                               agno, b, b + blen - 1,
+                                               name, state, owner);
+                                       break;
+                               case XR_E_INO:
+                                       if (owner == XFS_RMAP_OWN_INODES)
+                                               break;
+                                       do_warn(
+_("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+                                               agno, b, b + blen - 1,
+                                               name, state, owner);
+                                       break;
+                               case XR_E_INUSE:
+                                       if (owner >= 0 &&
+                                           owner < mp->m_sb.sb_dblocks)
+                                               break;
+                                       do_warn(
+_("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"),
+                                               agno, b, b + blen - 1,
+                                               name, state, owner);
+                                       break;
+                               case XR_E_FREE1:
+                               case XR_E_FREE:
+                                       /*
+                                        * May be on the AGFL. If not, they'll
+                                        * be caught later.
+                                        */
+                                       break;
+                               default:
+                                       do_warn(
+_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"),
+                                               agno, b, b + blen - 1,
+                                               name, state, owner);
+                                       break;
+                               }
+                       }
+               }
+               return;
+       }
+
+       /*
+        * interior record
+        */
+       pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]);
+
+       if (numrecs > mp->m_rmap_mxr[1])  {
+               numrecs = mp->m_rmap_mxr[1];
+               hdr_errors++;
+       }
+       if (isroot == 0 && numrecs < mp->m_rmap_mnr[1])  {
+               numrecs = mp->m_rmap_mnr[1];
+               hdr_errors++;
+       }
+
+       /*
+        * don't pass bogus tree flag down further if this block
+        * looked ok.  bail out if two levels in a row look bad.
+        */
+       if (hdr_errors)  {
+               do_warn(
+       _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"),
+                       be16_to_cpu(block->bb_numrecs),
+                       mp->m_rmap_mnr[1], mp->m_rmap_mxr[1],
+                       name, agno, bno);
+               if (suspect)
+                       return;
+               suspect++;
+       } else if (suspect) {
+               suspect = 0;
+       }
+
+       for (i = 0; i < numrecs; i++)  {
+               xfs_agblock_t           bno = be32_to_cpu(pp[i]);
+
+               /*
+                * XXX - put sibling detection right here.
+                * we know our sibling chain is good.  So as we go,
+                * we check the entry before and after each entry.
+                * If either of the entries references a different block,
+                * check the sibling pointer.  If there's a sibling
+                * pointer mismatch, try and extract as much data
+                * as possible.
+                */
+               if (bno != 0 && verify_agbno(mp, agno, bno)) {
+                       scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0,
+                                   magic, priv, &xfs_rmapbt_buf_ops);
+               }
+       }
+}
 static int
 scan_single_ino_chunk(
        xfs_agnumber_t          agno,
@@ -814,20 +1067,27 @@ _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in ino rec, skipping rec\n"),
                        agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
 
                        state = get_bmap(agno, agbno);
-                       if (state == XR_E_UNKNOWN)  {
-                               set_bmap(agno, agbno, XR_E_INO);
-                       } else if (state == XR_E_INUSE_FS && agno == 0 &&
-                                  ino + j >= first_prealloc_ino &&
-                                  ino + j < last_prealloc_ino)  {
+                       switch (state) {
+                       case XR_E_INO:
+                               break;
+                       case XR_E_UNKNOWN:
+                       case XR_E_INO1: /* seen by rmap */
                                set_bmap(agno, agbno, XR_E_INO);
-                       } else  {
+                               break;
+                       case XR_E_INUSE_FS:
+                       case XR_E_INUSE_FS1:
+                               if (agno == 0 &&
+                                   ino + j >= first_prealloc_ino &&
+                                   ino + j < last_prealloc_ino) {
+                                       set_bmap(agno, agbno, XR_E_INO);
+                                       break;
+                               }
+                               /* fall through */
+                       default:
+                               /* XXX - maybe should mark block a duplicate */
                                do_warn(
 _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"),
                                        agno, agbno, mp->m_sb.sb_inopblock);
-                               /*
-                                * XXX - maybe should mark
-                                * block a duplicate
-                                */
                                return ++suspect;
                        }
                }
@@ -973,19 +1233,35 @@ _("bad ending inode # (%" PRIu64 " (0x%x 0x%zx)) in finobt rec, skipping rec\n")
                        agbno = XFS_AGINO_TO_AGBNO(mp, ino + j);
 
                        state = get_bmap(agno, agbno);
-                       if (state == XR_E_INO) {
-                               continue;
-                       } else if ((state == XR_E_UNKNOWN) ||
-                                  (state == XR_E_INUSE_FS && agno == 0 &&
-                                   ino + j >= first_prealloc_ino &&
-                                   ino + j < last_prealloc_ino)) {
+                       switch (state) {
+                       case XR_E_INO:
+                               break;
+                       case XR_E_INO1: /* seen by rmap */
+                               set_bmap(agno, agbno, XR_E_INO);
+                               break;
+                       case XR_E_UNKNOWN:
                                do_warn(
 _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
                                        agno, agbno, mp->m_sb.sb_inopblock);
 
                                set_bmap(agno, agbno, XR_E_INO);
                                suspect++;
-                       } else {
+                               break;
+                       case XR_E_INUSE_FS:
+                       case XR_E_INUSE_FS1:
+                               if (agno == 0 &&
+                                   ino + j >= first_prealloc_ino &&
+                                   ino + j < last_prealloc_ino) {
+                                       do_warn(
+_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"),
+                                               agno, agbno, mp->m_sb.sb_inopblock);
+
+                                       set_bmap(agno, agbno, XR_E_INO);
+                                       suspect++;
+                                       break;
+                               }
+                               /* fall through */
+                       default:
                                do_warn(
 _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"),
                                        agno, agbno, mp->m_sb.sb_inopblock);
@@ -1163,6 +1439,7 @@ scan_inobt(
         */
        state = get_bmap(agno, bno);
        switch (state)  {
+       case XR_E_FS_MAP1: /* already been seen by an rmap scan */
        case XR_E_UNKNOWN:
        case XR_E_FREE1:
        case XR_E_FREE:
@@ -1296,7 +1573,7 @@ scan_freelist(
        if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
            XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) &&
            XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp))
-               set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP);
+               set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS);
 
        if (be32_to_cpu(agf->agf_flcount) == 0)
                return;
@@ -1381,6 +1658,19 @@ validate_agf(
                        bno, agno);
        }
 
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+               bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]);
+               if (bno != 0 && verify_agbno(mp, agno, bno)) {
+                       scan_sbtree(bno,
+                                   be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]),
+                                   agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC,
+                                   agcnts, &xfs_rmapbt_buf_ops);
+               } else  {
+                       do_warn(_("bad agbno %u for rmapbt root, agno %d\n"),
+                               bno, agno);
+               }
+       }
+
        if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) {
                do_warn(_("agf_freeblks %u, counted %u in ag %u\n"),
                        be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno);
@@ -1396,6 +1686,7 @@ validate_agf(
                do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"),
                        be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno);
        }
+
 }
 
 static void
@@ -1635,6 +1926,7 @@ scan_ags(
        __uint64_t      fdblocks = 0;
        __uint64_t      icount = 0;
        __uint64_t      ifreecount = 0;
+       __uint64_t      usedblocks = 0;
        xfs_agnumber_t  i;
        work_queue_t    wq;
 
@@ -1657,6 +1949,7 @@ scan_ags(
                fdblocks += agcnts[i].fdblocks;
                icount += agcnts[i].icount;
                ifreecount += agcnts[i].ifreecount;
+               usedblocks += agcnts[i].usedblocks;
        }
 
        free(agcnts);
@@ -1678,5 +1971,11 @@ scan_ags(
                do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"),
                        mp->m_sb.sb_fdblocks, fdblocks);
        }
+
+       if (usedblocks &&
+           usedblocks != mp->m_sb.sb_dblocks - fdblocks) {
+               do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"),
+                       mp->m_sb.sb_dblocks - fdblocks, usedblocks);
+       }
 }
 
index 11a6069aca71af5f7a006023c7f26a35a1994152..07ddd00457b4fae4f71a6a732903eaf32106d526 100644 (file)
@@ -411,6 +411,8 @@ calc_mkfs(xfs_mount_t *mp)
        fino_bno = inobt_root + XFS_MIN_FREELIST_RAW(1, 1, mp) + 1;
        if (xfs_sb_version_hasfinobt(&mp->m_sb))
                fino_bno++;
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               fino_bno++;
 
        /*
         * If the log is allocated in the first allocation group we need to