]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: use dax_direct_access for log writes xfs-log-dax
authorChristoph Hellwig <hch@lst.de>
Sun, 6 Oct 2019 14:18:04 +0000 (16:18 +0200)
committerChristoph Hellwig <hch@lst.de>
Mon, 3 Feb 2020 17:52:47 +0000 (18:52 +0100)
If the XFS log is on a DAX capable device we can trivially skip all
the block layer overhead and just copy the data directly to the NVDIMM.

Signed-off-by: Christoph Hellwig <hch@lst.de>
fs/xfs/xfs_log.c
fs/xfs/xfs_log_priv.h

index f6006d94a581e9d2f7d3a6a7c6613d2568d66ccd..c549bac3933f3e7899b4d0fdba188fb0ce0ffff0 100644 (file)
@@ -1271,18 +1271,14 @@ xlog_space_left(
        return free_bytes;
 }
 
-
 static void
-xlog_ioend_work(
-       struct work_struct      *work)
+__xlog_ioend(
+       struct xlog_in_core     *iclog,
+       int                     error)
 {
-       struct xlog_in_core     *iclog =
-               container_of(work, struct xlog_in_core, ic_end_io_work);
        struct xlog             *log = iclog->ic_log;
        bool                    aborted = false;
-       int                     error;
 
-       error = blk_status_to_errno(iclog->ic_bio.bi_status);
 #ifdef DEBUG
        /* treat writes with injected CRC errors as failed */
        if (iclog->ic_fail_crc)
@@ -1306,6 +1302,16 @@ xlog_ioend_work(
        }
 
        xlog_state_done_syncing(iclog, aborted);
+}
+
+static void
+xlog_ioend_work(
+       struct work_struct      *work)
+{
+       struct xlog_in_core     *iclog =
+               container_of(work, struct xlog_in_core, ic_end_io_work);
+
+       __xlog_ioend(iclog, blk_status_to_errno(iclog->ic_bio.bi_status));
        bio_uninit(&iclog->ic_bio);
 
        /*
@@ -1426,6 +1432,16 @@ xlog_alloc_log(
        log->l_flags       |= XLOG_ACTIVE_RECOVERY;
        INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
 
+       /* try to setup direct DAX access to the log if possible */
+       if (IS_ENABLED(CONFIG_FS_DAX) && log->l_targ->bt_daxdev) {
+               if (bdev_dax_pgoff(log->l_targ->bt_bdev, blk_offset,
+                                  BBTOB(num_bblks), &log->l_dax_offset)) {
+                       /* give up if the log is not suitably aligned */
+                       fs_put_dax(log->l_targ->bt_daxdev);
+                       log->l_targ->bt_daxdev = NULL;
+               }
+       }
+
        log->l_prev_block  = -1;
        /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
        xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
@@ -1821,6 +1837,62 @@ xlog_write_iclog(
        submit_bio(&iclog->ic_bio);
 }
 
+static int
+xlog_copy_to_dax(
+       struct xlog             *log,
+       uint64_t                offset,
+       void                    *data,
+       unsigned int            count)
+{
+       uint64_t                log_size = BBTOB(log->l_logBBsize);
+       long                    nr_pages = howmany(log_size, PAGE_SIZE);
+       int                     id, ret = -EIO;
+       void                    *pmem_addr;
+
+       /*
+        * XXX: do we need to do anything about badblocks or clearing poison?
+        */
+       id = dax_read_lock();
+       if (likely(dax_direct_access(log->l_targ->bt_daxdev, log->l_dax_offset,
+                       nr_pages, &pmem_addr, NULL) == nr_pages)) {
+               memcpy_flushcache(pmem_addr + offset, data, count);
+               ret = 0;
+       }
+       dax_read_unlock(id);
+       return ret;
+}
+
+static void
+xlog_write_iclog_dax(
+       struct xlog             *log,
+       struct xlog_in_core     *iclog,
+       uint64_t                offset,
+       unsigned int            count)
+{
+       uint64_t                log_size = BBTOB(log->l_logBBsize);
+       void                    *data = iclog->ic_data;
+       int                     error = -EIO;
+
+       if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR))
+               goto out;
+
+       xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
+       if (offset + count > log_size) {
+               unsigned int split = log_size - offset;
+
+               if (xlog_copy_to_dax(log, offset, data, split) ||
+                   xlog_copy_to_dax(log, 0, data + split, count - split))
+                       goto out;
+       } else {
+               if (xlog_copy_to_dax(log, offset, data, count))
+                       goto out;
+       }
+       xfs_blkdev_issue_flush(log->l_targ);
+       error = 0;
+out:
+       __xlog_ioend(iclog, error);
+}
+
 /*
  * We need to bump cycle number for the part of the iclog that is
  * written to the start of the log. Watch out for the header magic
@@ -1960,6 +2032,13 @@ xlog_sync(
        }
 #endif
 
+       xlog_verify_iclog(log, iclog, count);
+
+       if (IS_ENABLED(CONFIG_FS_DAX) && log->l_targ->bt_daxdev) {
+               xlog_write_iclog_dax(log, iclog, BBTOB(bno), count);
+               return;
+       }
+
        /*
         * Flush the data device before flushing the log to make sure all meta
         * data written back from the AIL actually made it to disk before
@@ -1973,7 +2052,6 @@ xlog_sync(
                need_flush = false;
        }
 
-       xlog_verify_iclog(log, iclog, count);
        xlog_write_iclog(log, iclog, bno, count, need_flush);
 }
 
index b192c5a9f9fda00b15e0d063cf74b5addd613319..4429bb5eac71d0e04100a1cccaef304fa37be5fa 100644 (file)
@@ -361,6 +361,7 @@ struct xlog {
        xfs_daddr_t             l_logBBstart;   /* start block of log */
        int                     l_logsize;      /* size of log in bytes */
        int                     l_logBBsize;    /* size of log in BB chunks */
+       pgoff_t                 l_dax_offset;   /* offset for DAX access */
 
        /* The following block of fields are changed while holding icloglock */
        wait_queue_head_t       l_flush_wait ____cacheline_aligned_in_smp;