#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_exchmaps.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
+#include "scrub/tempexch.h"
+#include "scrub/reap.h"
#include "scrub/rtbitmap.h"
-/* Set up to repair the realtime bitmap file metadata. */
+/* rt bitmap content repairs */
+
+/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
- unsigned long long blocks = 0;
+ char *descr;
+ unsigned long long blocks = mp->m_sb.sb_rbmblocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /* Create an xfile to hold our reconstructed bitmap. */
+ descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
+ error = xfile_create(descr, blocks, &sc->xfile);
+ kfree(descr);
+ if (error)
+ return error;
/*
- * Reserve enough blocks to write out a completely new bmbt for a
- * maximally fragmented bitmap file. We do not hold the rtbitmap
- * ILOCK yet, so this is entirely speculative.
+ * Reserve enough blocks to write out a completely new bitmap file,
+ * plus twice as many blocks as we would need if we can only allocate
+ * one block per data fork mapping. This should cover the
+ * preallocation of the temporary file and exchanging the extent
+ * mappings.
+ *
+ * We cannot use xfs_exchmaps_estimate because we have not yet
+ * constructed the replacement bitmap and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtbitmap ILOCK) and cannot ask for more reservation.
*/
- blocks = xfs_bmbt_calc_size(mp, mp->m_sb.sb_rbmblocks);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
rtb->resblks += blocks;
+
+ /*
+ * We must hold rbmip with ILOCK_EXCL to use the file mapping exchange
+ * at the end of the repair function. Change the desired rtglock
+ * flags.
+ */
+ rtb->rtglock_flags &= ~XFS_RTGLOCK_BITMAP_SHARED;
+ rtb->rtglock_flags |= XFS_RTGLOCK_BITMAP;
+ return 0;
+}
+
+static inline xrep_wordoff_t
+rtx_to_wordoff(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ return rtx >> XFS_NBWORDLOG;
+}
+
+static inline xrep_wordcnt_t
+rtxlen_to_wordcnt(
+ xfs_rtxlen_t rtxlen)
+{
+ return rtxlen >> XFS_NBWORDLOG;
+}
+
+/* Helper functions to record rtwords in an xfile. */
+
+static inline int
+xfbmp_load(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t *word)
+{
+ union xfs_rtword_raw urk;
+ int error;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ error = xfile_load(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+ if (error)
+ return error;
+
+ *word = be32_to_cpu(urk.rtg);
+ return 0;
+}
+
+static inline int
+xfbmp_store(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const xfs_rtword_t word)
+{
+ union xfs_rtword_raw urk;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ urk.rtg = cpu_to_be32(word);
+ return xfile_store(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyin(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyout(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+/* Perform a logical OR operation on an rtword in the incore bitmap. */
+static int
+xrep_rtbitmap_or(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t mask)
+{
+ xfs_rtword_t word;
+ int error;
+
+ error = xfbmp_load(rtb, wordoff, &word);
+ if (error)
+ return error;
+
+ trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
+
+ return xfbmp_store(rtb, wordoff, word | mask);
+}
+
+/*
+ * Mark as free every rt extent between the next rt block we expected to see
+ * in the rtrmap records and the given rt block.
+ */
+STATIC int
+xrep_rtbitmap_mark_free(
+ struct xchk_rtbitmap *rtb,
+ xfs_rgblock_t rgbno)
+{
+ struct xfs_mount *mp = rtb->sc->mp;
+ struct xfs_rtgroup *rtg = rtb->sc->sr.rtg;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t nextrtx;
+ xrep_wordoff_t wordoff, nextwordoff;
+ unsigned int bit;
+ unsigned int bufwsize;
+ xfs_extlen_t mod;
+ xfs_rtword_t mask;
+ int error;
+
+ if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
+ return -EFSCORRUPTED;
+
+ /*
+ * Convert rt blocks to rt extents The block range we find must be
+ * aligned to an rtextent boundary on both ends.
+ */
+ startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
+ mod = xfs_rtb_to_rtxoff(mp, rtb->next_rgbno);
+ if (mod)
+ return -EFSCORRUPTED;
+
+ nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
+ mod = xfs_rtb_to_rtxoff(mp, rgbno - 1);
+ if (mod != mp->m_sb.sb_rextsize - 1)
+ return -EFSCORRUPTED;
+
+ trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
+
+ /* Set bits as needed to round startrtx up to the nearest word. */
+ bit = startrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ xfs_rtblock_t len = nextrtx - startrtx;
+ unsigned int lastbit;
+
+ lastbit = min(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
+ mask);
+ if (error || lastbit - bit == len)
+ return error;
+ startrtx += XFS_NBWORD - bit;
+ }
+
+ /* Set bits as needed to round nextrtx down to the nearest word. */
+ bit = nextrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ mask = ((xfs_rtword_t)1 << bit) - 1;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
+ mask);
+ if (error || startrtx + bit == nextrtx)
+ return error;
+ nextrtx -= bit;
+ }
+
+ trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
+
+ /* Set all the words in between, up to a whole fs block at once. */
+ wordoff = rtx_to_wordoff(mp, startrtx);
+ nextwordoff = rtx_to_wordoff(mp, nextrtx);
+ bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
+
+ while (wordoff < nextwordoff) {
+ xrep_wordoff_t rem;
+ xrep_wordcnt_t wordcnt;
+
+ wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
+ bufwsize);
+
+ /*
+ * Try to keep us aligned to the rtwords buffer to reduce the
+ * number of xfile writes.
+ */
+ rem = wordoff & (bufwsize - 1);
+ if (rem)
+ wordcnt = min_t(xrep_wordcnt_t, wordcnt,
+ bufwsize - rem);
+
+ error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
+ if (error)
+ return error;
+
+ wordoff += wordcnt;
+ }
+
+ return 0;
+}
+
+/* Set free space in the rtbitmap based on rtrmapbt records. */
+STATIC int
+xrep_rtbitmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xchk_rtbitmap *rtb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rtb->sc, &error))
+ return error;
+
+ if (rtb->next_rgbno < rec->rm_startblock) {
+ error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
+ if (error)
+ return error;
+ }
+
+ rtb->next_rgbno = max(rtb->next_rgbno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/*
+ * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
+ * in the realtime bitmap that we're computing.
+ */
+STATIC int
+xrep_rtbitmap_find_freespace(
+ struct xchk_rtbitmap *rtb)
+{
+ struct xfs_scrub *sc = rtb->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ uint64_t blockcount;
+ int error;
+
+ /* Prepare a buffer of ones so that we can accelerate bulk setting. */
+ memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
+
+ xrep_rtgroup_btcur_init(sc, &sc->sr);
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
+ rtb);
+ if (error)
+ goto out;
+
+ /*
+ * Mark as free every possible rt extent from the last one we saw to
+ * the end of the rt group.
+ */
+ blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ if (rtb->next_rgbno < blockcount) {
+ error = xrep_rtbitmap_mark_free(rtb, blockcount);
+ if (error)
+ goto out;
+ }
+
+out:
+ xchk_rtgroup_btcur_free(&sc->sr);
+ return error;
+}
+
+static int
+xrep_rtbitmap_prep_buf(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp,
+ void *data)
+{
+ struct xchk_rtbitmap *rtb = data;
+ struct xfs_mount *mp = sc->mp;
+ union xfs_rtword_raw *ondisk;
+ int error;
+
+ rtb->args.mp = sc->mp;
+ rtb->args.tp = sc->tp;
+ rtb->args.rbmbp = bp;
+ ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
+ rtb->args.rbmbp = NULL;
+
+ error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
+ mp->m_blockwsize);
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(sc->mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ bp->b_ops = &xfs_rtbitmap_buf_ops;
+ } else {
+ bp->b_ops = &xfs_rtbuf_ops;
+ }
+
+ rtb->prep_wordoff += mp->m_blockwsize;
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
return 0;
}
return error;
/* Fix inconsistent bitmap geometry */
- return xrep_rtbitmap_geometry(sc, rtb);
+ error = xrep_rtbitmap_geometry(sc, rtb);
+ if (error)
+ return error;
+
+ /* We require the realtime rmapbt to rebuild anything. */
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return -EOPNOTSUPP;
+ /* We require atomic file exchange range to rebuild anything. */
+ if (!xfs_has_exchange_range(sc->mp))
+ return -EOPNOTSUPP;
+
+ /*
+ * Generate the new rtbitmap data. We don't need the rtbmp information
+ * once this call is finished.
+ */
+ error = xrep_rtbitmap_find_freespace(rtb);
+ if (error)
+ return error;
+
+ /*
+ * Try to take ILOCK_EXCL of the temporary file. We had better be the
+ * only ones holding onto this inode, but we can't block while holding
+ * the rtbitmap file's ILOCK_EXCL.
+ */
+ while (!xrep_tempfile_ilock_nowait(sc)) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+ delay(1);
+ }
+
+ /*
+ * Make sure we have space allocated for the part of the bitmap
+ * file that corresponds to this group.
+ */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ /* Copy the bitmap file that we generated. */
+ error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
+ xrep_rtbitmap_prep_buf, rtb);
+ if (error)
+ return error;
+ error = xrep_tempfile_set_isize(sc,
+ XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
+ if (error)
+ return error;
+
+ /*
+ * Now exchange the data fork contents. We're done with the temporary
+ * buffer, so we can reuse it for the tempfile exchmaps information.
+ */
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
+ rtb->rbmblocks, &rtb->tempexch);
+ if (error)
+ return error;
+
+ error = xrep_tempexch_contents(sc, &rtb->tempexch);
+ if (error)
+ return error;
+
+ /* Free the old bitmap blocks if they are free. */
+ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}
+
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink);
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link);
+#ifdef CONFIG_XFS_RT
+DECLARE_EVENT_CLASS(xrep_rtbitmap_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, xfs_rtxnum_t end),
+ TP_ARGS(mp, start, end),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rtxnum_t, start)
+ __field(xfs_rtxnum_t, end)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d startrtx 0x%llx endrtx 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->start,
+ __entry->end)
+);
+#define DEFINE_REPAIR_RGBITMAP_EVENT(name) \
+DEFINE_EVENT(xrep_rtbitmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, \
+ xfs_rtxnum_t end), \
+ TP_ARGS(mp, start, end))
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free);
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free_bulk);
+
+TRACE_EVENT(xrep_rtbitmap_or,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ xfs_rtword_t mask, xfs_rtword_t word),
+ TP_ARGS(mp, wordoff, mask, word),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, mask)
+ __field(unsigned int, word)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->mask = mask;
+ __entry->word = word;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx mask 0x%x word 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->mask,
+ __entry->word)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_fileoff_t rbmoff,
+ xfs_rtxnum_t rtx, xfs_rtxnum_t len),
+ TP_ARGS(rtg, rbmoff, rtx, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(xfs_rtxnum_t, rtx)
+ __field(xfs_rtxnum_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg->rtg_mount->m_super->s_dev;
+ __entry->rtdev = rtg->rtg_mount->m_rtdev_targp->bt_dev;
+ __entry->rgno = rtg->rtg_rgno;
+ __entry->rbmoff = rbmoff;
+ __entry->rtx = rtx;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rgno 0x%x rbmoff 0x%llx rtx 0x%llx rtxcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rgno,
+ __entry->rbmoff,
+ __entry->rtx,
+ __entry->len)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_words,
+ TP_PROTO(struct xfs_mount *mp, xfs_fileoff_t rbmoff,
+ unsigned long long wordoff, unsigned int wordcnt),
+ TP_ARGS(mp, rbmoff, wordoff, wordcnt),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, wordcnt)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rbmoff = rbmoff;
+ __entry->wordoff = wordoff;
+ __entry->wordcnt = wordcnt;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rbmoff 0x%llx wordoff 0x%llx wordcnt 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rbmoff,
+ __entry->wordoff,
+ __entry->wordcnt)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_word,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ unsigned int bit, xfs_rtword_t ondisk_word,
+ xfs_rtword_t xfile_word, xfs_rtword_t word_mask),
+ TP_ARGS(mp, wordoff, bit, ondisk_word, xfile_word, word_mask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, bit)
+ __field(xfs_rtword_t, ondisk_word)
+ __field(xfs_rtword_t, xfile_word)
+ __field(xfs_rtword_t, word_mask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->bit = bit;
+ __entry->ondisk_word = ondisk_word;
+ __entry->xfile_word = xfile_word;
+ __entry->word_mask = word_mask;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx bit %u ondisk 0x%x(0x%x) inmem 0x%x(0x%x) result 0x%x mask 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->bit,
+ __entry->ondisk_word,
+ __entry->ondisk_word & __entry->word_mask,
+ __entry->xfile_word,
+ __entry->xfile_word & ~__entry->word_mask,
+ (__entry->xfile_word & ~__entry->word_mask) |
+ (__entry->ondisk_word & __entry->word_mask),
+ __entry->word_mask)
+);
+#endif /* CONFIG_XFS_RT */
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */