]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
xfs: validate metadata LSNs against log on v5 superblocks
authorBrian Foster <bfoster@redhat.com>
Mon, 12 Oct 2015 04:59:25 +0000 (15:59 +1100)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 20 Jan 2017 09:49:03 +0000 (01:49 -0800)
From a45086e27dfa21a4b39134f7505c8f60a3ecdec4 Mon Sep 17 00:00:00 2001

Since the onset of v5 superblocks, the LSN of the last modification has
been included in a variety of on-disk data structures. This LSN is used
to provide log recovery ordering guarantees (e.g., to ensure an older
log recovery item is not replayed over a newer target data structure).

While this works correctly from the point a filesystem is formatted and
mounted, userspace tools have some problematic behaviors that defeat
this mechanism. For example, xfs_repair historically zeroes out the log
unconditionally (regardless of whether corruption is detected). If this
occurs, the LSN of the filesystem is reset and the log is now in a
problematic state with respect to on-disk metadata structures that might
have a larger LSN. Until either the log catches up to the highest
previously used metadata LSN or each affected data structure is modified
and written out without incident (which resets the metadata LSN), log
recovery is susceptible to filesystem corruption.

This problem is ultimately addressed and repaired in the associated
userspace tools. The kernel is still responsible to detect the problem
and notify the user that something is wrong. Check the superblock LSN at
mount time and fail the mount if it is invalid. From that point on,
trigger verifier failure on any metadata I/O where an invalid LSN is
detected. This results in a filesystem shutdown and guarantees that we
do not log metadata changes with invalid LSNs on disk. Since this is a
known issue with a known recovery path, present a warning to instruct
the user how to recover.

Orabug: 25062171

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
15 files changed:
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_dir2_block.c
fs/xfs/libxfs/xfs_dir2_data.c
fs/xfs/libxfs/xfs_dir2_leaf.c
fs/xfs/libxfs/xfs_dir2_node.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_symlink_remote.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c

index 516162be1398404a18e32909f8ff25c065364605..3112b71ea0e6b74bbc419eb8f3a68a0e56b182a2 100644 (file)
@@ -468,7 +468,9 @@ xfs_agfl_verify(
                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
                        return false;
        }
-       return true;
+
+       return xfs_log_check_lsn(mp,
+                                be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 }
 
 static void
@@ -2204,7 +2206,14 @@ xfs_agf_verify(
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
            !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
+               return false;
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
                        return false;
+               if (!xfs_log_check_lsn(mp,
+                               be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
+                       return false;
+       }
 
        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
index e9d401ce93bb19d822a2ec9b475dae7ad5d279c1..57f36d16bbaca5d12dc55d9c5827922d6ee3fdd6 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
 #include "xfs_dir2.h"
+#include "xfs_log.h"
 
 
 /*
@@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+                       return false;
        } else {
                if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
                        return false;
index c72283dd8d44b6327e420355864ea8571068c9f4..80ea308a80cb8f688ba40dfcd30f58c19b1478ae 100644 (file)
@@ -32,6 +32,7 @@
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
 #include "xfs_alloc.h"
+#include "xfs_log.h"
 
 /*
  * Cursor allocation zone.
@@ -241,8 +242,14 @@ bool
 xfs_btree_lblock_verify_crc(
        struct xfs_buf          *bp)
 {
-       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
+                       return false;
                return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+       }
 
        return true;
 }
@@ -273,8 +280,14 @@ bool
 xfs_btree_sblock_verify_crc(
        struct xfs_buf          *bp)
 {
-       if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
+
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
+                       return false;
                return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+       }
 
        return true;
 }
index 2385f8cd08ab9f1cdf19ad3a9a8473a5e4e34eb3..d5e13dfbed364233464eb97ea9a46c1bcec46f38 100644 (file)
@@ -39,6 +39,7 @@
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
 #include "xfs_buf_item.h"
+#include "xfs_log.h"
 
 /*
  * xfs_da_btree.c
@@ -150,6 +151,8 @@ xfs_da3_node_verify(
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+                       return false;
        } else {
                if (ichdr.magic != XFS_DA_NODE_MAGIC)
                        return false;
@@ -321,6 +324,7 @@ xfs_da3_node_create(
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
 
+               memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
                ichdr.magic = XFS_DA3_NODE_MAGIC;
                hdr3->info.blkno = cpu_to_be64(bp->b_bn);
                hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
index 9354e190b82e9e9e4f33a38e2254309159622a97..6e0a2892831b4de976d56f8754f8fb335f963dd9 100644 (file)
@@ -34,6 +34,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 
 /*
  * Local function prototypes.
@@ -71,6 +72,8 @@ xfs_dir3_block_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                       return false;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
                        return false;
index de1ea16f57485ccc03143196eb2aced9147aed5b..5f6978f957e95990aa537184cf53449acbfa359f 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 
 /*
  * Check the consistency of the data block.
@@ -224,6 +225,8 @@ xfs_dir3_data_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                       return false;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
                        return false;
index 106119955400a6d06cc73ff93cad2055a8e4bc1e..d00e1c27b2609cddc291e047bf840d7cef7f9125 100644 (file)
@@ -33,6 +33,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 
 /*
  * Local function declarations.
@@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
                        return false;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
+                       return false;
        } else {
                if (leaf->hdr.info.magic != cpu_to_be16(magic))
                        return false;
index 41b80d3d38772b100903bdcf7c604adad6417fd4..c0b92c79a702b282299daaf5c9816398d4f79c55 100644 (file)
@@ -33,6 +33,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 
 /*
  * Function declarations.
@@ -97,6 +98,8 @@ xfs_dir3_free_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+               if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                       return false;
        } else {
                if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
                        return false;
index 1c9e75521250ecf606639578ce79696b6ff4a682..3b61d4108bfb19fe4cf8430721ecdedab36c7769 100644 (file)
@@ -38,6 +38,7 @@
 #include "xfs_icreate_item.h"
 #include "xfs_icache.h"
 #include "xfs_trace.h"
+#include "xfs_log.h"
 
 
 /*
@@ -2051,9 +2052,14 @@ xfs_agi_verify(
        struct xfs_mount *mp = bp->b_target->bt_mount;
        struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
 
-       if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
+       if (xfs_sb_version_hascrc(&mp->m_sb)) {
+               if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
+                       return false;
+               if (!xfs_log_check_lsn(mp,
+                               be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
                        return false;
+       }
+
        /*
         * Validate the magic number of the agi block.
         */
index dc4bfc5d88fccf221609cf2ac13c80370de07221..be211c7af307dd03226254c2dc217d050b13a975 100644 (file)
@@ -35,6 +35,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_log.h"
 
 /*
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -159,6 +160,15 @@ xfs_mount_validate_sb(
                                                XFS_SB_FEAT_INCOMPAT_UNKNOWN));
                        return -EINVAL;
                }
+       } else if (xfs_sb_version_hascrc(sbp)) {
+               /*
+                * We can't read verify the sb LSN because the read verifier is
+                * called before the log is allocated and processed. We know the
+                * log is set up before write verifier (!check_version) calls,
+                * so just check it here.
+                */
+               if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
+                       return -EFSCORRUPTED;
        }
 
        if (xfs_sb_version_has_pquotino(sbp)) {
index e7e26bd6468fdd218e3390a9913487f52e9576a2..7eaa841c3615f727a6ddfdaa958ec7f97b9c2253 100644 (file)
@@ -31,6 +31,7 @@
 #include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
+#include "xfs_log.h"
 
 
 /*
@@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return 0;
 
+       memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
        dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
        dsl->sl_offset = cpu_to_be32(offset);
        dsl->sl_bytes = cpu_to_be32(size);
@@ -116,6 +118,8 @@ xfs_symlink_verify(
                return false;
        if (dsl->sl_owner == 0)
                return false;
+       if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
+               return false;
 
        return true;
 }
index 3dfa2dbca48dde83924cc1b41890dbf554f9430d..abf0ab852c1ecce4296088a673042ffc716640a9 100644 (file)
@@ -3150,11 +3150,19 @@ xlog_state_switch_iclogs(
        }
 
        if (log->l_curr_block >= log->l_logBBsize) {
+               /*
+                * Rewind the current block before the cycle is bumped to make
+                * sure that the combined LSN never transiently moves forward
+                * when the log wraps to the next cycle. This is to support the
+                * unlocked sample of these fields from xlog_valid_lsn(). Most
+                * other cases should acquire l_icloglock.
+                */
+               log->l_curr_block -= log->l_logBBsize;
+               ASSERT(log->l_curr_block >= 0);
+               smp_wmb();
                log->l_curr_cycle++;
                if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
                        log->l_curr_cycle++;
-               log->l_curr_block -= log->l_logBBsize;
-               ASSERT(log->l_curr_block >= 0);
        }
        ASSERT(iclog == log->l_iclog);
        log->l_iclog = iclog->ic_next;
@@ -4010,3 +4018,45 @@ xlog_iclogs_empty(
        return 1;
 }
 
+/*
+ * Verify that an LSN stamped into a piece of metadata is valid. This is
+ * intended for use in read verifiers on v5 superblocks.
+ */
+bool
+xfs_log_check_lsn(
+       struct xfs_mount        *mp,
+       xfs_lsn_t               lsn)
+{
+       struct xlog             *log = mp->m_log;
+       bool                    valid;
+
+       /*
+        * norecovery mode skips mount-time log processing and unconditionally
+        * resets the in-core LSN. We can't validate in this mode, but
+        * modifications are not allowed anyways so just return true.
+        */
+       if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+               return true;
+
+       /*
+        * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
+        * handled by recovery and thus safe to ignore here.
+        */
+       if (lsn == NULLCOMMITLSN)
+               return true;
+
+       valid = xlog_valid_lsn(mp->m_log, lsn);
+
+       /* warn the user about what's gone wrong before verifier failure */
+       if (!valid) {
+               spin_lock(&log->l_icloglock);
+               xfs_warn(mp,
+"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
+"Please unmount and run xfs_repair (>= v4.3) to resolve.",
+                        CYCLE_LSN(lsn), BLOCK_LSN(lsn),
+                        log->l_curr_cycle, log->l_curr_block);
+               spin_unlock(&log->l_icloglock);
+       }
+
+       return valid;
+}
index 84e0deb95abda6181b5a10b2a3f790df9c6f04ff..81422004df9cf2c1e756be0a689b4f2c39217ead 100644 (file)
@@ -189,5 +189,6 @@ bool        xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 void   xfs_log_work_queue(struct xfs_mount *mp);
 void   xfs_log_worker(struct work_struct *work);
 void   xfs_log_quiesce(struct xfs_mount *mp);
+bool   xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
 
 #endif /* __XFS_LOG_H__ */
index db7cbdeb2b42c766914682862f3179866393111b..fcc11b614d298f634c3e524f80a526362d7658a4 100644 (file)
@@ -558,4 +558,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
        remove_wait_queue(wq, &wait);
 }
 
+/*
+ * The LSN is valid so long as it is behind the current LSN. If it isn't, this
+ * means that the next log record that includes this metadata could have a
+ * smaller LSN. In turn, this means that the modification in the log would not
+ * replay.
+ */
+static inline bool
+xlog_valid_lsn(
+       struct xlog     *log,
+       xfs_lsn_t       lsn)
+{
+       int             cur_cycle;
+       int             cur_block;
+       bool            valid = true;
+
+       /*
+        * First, sample the current lsn without locking to avoid added
+        * contention from metadata I/O. The current cycle and block are updated
+        * (in xlog_state_switch_iclogs()) and read here in a particular order
+        * to avoid false negatives (e.g., thinking the metadata LSN is valid
+        * when it is not).
+        *
+        * The current block is always rewound before the cycle is bumped in
+        * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
+        * a transiently forward state. Instead, we can see the LSN in a
+        * transiently behind state if we happen to race with a cycle wrap.
+        */
+       cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
+       smp_rmb();
+       cur_block = ACCESS_ONCE(log->l_curr_block);
+
+       if ((CYCLE_LSN(lsn) > cur_cycle) ||
+           (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
+               /*
+                * If the metadata LSN appears invalid, it's possible the check
+                * above raced with a wrap to the next log cycle. Grab the lock
+                * to check for sure.
+                */
+               spin_lock(&log->l_icloglock);
+               cur_cycle = log->l_curr_cycle;
+               cur_block = log->l_curr_block;
+               spin_unlock(&log->l_icloglock);
+
+               if ((CYCLE_LSN(lsn) > cur_cycle) ||
+                   (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
+                       valid = false;
+       }
+
+       return valid;
+}
+
 #endif /* __XFS_LOG_PRIV_H__ */
index 4f5784f85a5b22e4bad0946106dcb1302f817ace..5b4d35faf5f7b01e32c64d4cf92e67c2b2afe80a 100644 (file)
@@ -4488,9 +4488,19 @@ xlog_recover(
        int             error;
 
        /* find the tail of the log */
-       if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
+       error = xlog_find_tail(log, &head_blk, &tail_blk);
+       if (error)
                return error;
 
+       /*
+        * The superblock was read before the log was available and thus the LSN
+        * could not be verified. Check the superblock LSN against the current
+        * LSN now that it's known.
+        */
+       if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
+           !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
+               return -EINVAL;
+
        if (tail_blk != head_blk) {
                /* There used to be a comment here:
                 *