From 44d8873139728476867da4bfc9e01bac898e2e1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 6 Oct 2019 16:18:04 +0200 Subject: [PATCH] xfs: use dax_direct_access for log writes If the XFS log is on a DAX capable device we can trivially skip all the block layer overhead and just copy the data directly to the NVDIMM. Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_log.c | 94 +++++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_log_priv.h | 1 + 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f6006d94a581..c549bac3933f 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1271,18 +1271,14 @@ xlog_space_left( return free_bytes; } - static void -xlog_ioend_work( - struct work_struct *work) +__xlog_ioend( + struct xlog_in_core *iclog, + int error) { - struct xlog_in_core *iclog = - container_of(work, struct xlog_in_core, ic_end_io_work); struct xlog *log = iclog->ic_log; bool aborted = false; - int error; - error = blk_status_to_errno(iclog->ic_bio.bi_status); #ifdef DEBUG /* treat writes with injected CRC errors as failed */ if (iclog->ic_fail_crc) @@ -1306,6 +1302,16 @@ xlog_ioend_work( } xlog_state_done_syncing(iclog, aborted); +} + +static void +xlog_ioend_work( + struct work_struct *work) +{ + struct xlog_in_core *iclog = + container_of(work, struct xlog_in_core, ic_end_io_work); + + __xlog_ioend(iclog, blk_status_to_errno(iclog->ic_bio.bi_status)); bio_uninit(&iclog->ic_bio); /* @@ -1426,6 +1432,16 @@ xlog_alloc_log( log->l_flags |= XLOG_ACTIVE_RECOVERY; INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); + /* try to setup direct DAX access to the log if possible */ + if (IS_ENABLED(CONFIG_FS_DAX) && log->l_targ->bt_daxdev) { + if (bdev_dax_pgoff(log->l_targ->bt_bdev, blk_offset, + BBTOB(num_bblks), &log->l_dax_offset)) { + /* give up if the log is not suitably aligned */ + fs_put_dax(log->l_targ->bt_daxdev); + log->l_targ->bt_daxdev = NULL; + } + } + log->l_prev_block = -1; /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); @@ -1821,6 +1837,62 @@ xlog_write_iclog( submit_bio(&iclog->ic_bio); } +static int +xlog_copy_to_dax( + struct xlog *log, + uint64_t offset, + void *data, + unsigned int count) +{ + uint64_t log_size = BBTOB(log->l_logBBsize); + long nr_pages = howmany(log_size, PAGE_SIZE); + int id, ret = -EIO; + void *pmem_addr; + + /* + * XXX: do we need to do anything about badblocks or clearing poison? + */ + id = dax_read_lock(); + if (likely(dax_direct_access(log->l_targ->bt_daxdev, log->l_dax_offset, + nr_pages, &pmem_addr, NULL) == nr_pages)) { + memcpy_flushcache(pmem_addr + offset, data, count); + ret = 0; + } + dax_read_unlock(id); + return ret; +} + +static void +xlog_write_iclog_dax( + struct xlog *log, + struct xlog_in_core *iclog, + uint64_t offset, + unsigned int count) +{ + uint64_t log_size = BBTOB(log->l_logBBsize); + void *data = iclog->ic_data; + int error = -EIO; + + if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) + goto out; + + xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); + if (offset + count > log_size) { + unsigned int split = log_size - offset; + + if (xlog_copy_to_dax(log, offset, data, split) || + xlog_copy_to_dax(log, 0, data + split, count - split)) + goto out; + } else { + if (xlog_copy_to_dax(log, offset, data, count)) + goto out; + } + xfs_blkdev_issue_flush(log->l_targ); + error = 0; +out: + __xlog_ioend(iclog, error); +} + /* * We need to bump cycle number for the part of the iclog that is * written to the start of the log. Watch out for the header magic @@ -1960,6 +2032,13 @@ xlog_sync( } #endif + xlog_verify_iclog(log, iclog, count); + + if (IS_ENABLED(CONFIG_FS_DAX) && log->l_targ->bt_daxdev) { + xlog_write_iclog_dax(log, iclog, BBTOB(bno), count); + return; + } + /* * Flush the data device before flushing the log to make sure all meta * data written back from the AIL actually made it to disk before @@ -1973,7 +2052,6 @@ xlog_sync( need_flush = false; } - xlog_verify_iclog(log, iclog, count); xlog_write_iclog(log, iclog, bno, count, need_flush); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b192c5a9f9fd..4429bb5eac71 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -361,6 +361,7 @@ struct xlog { xfs_daddr_t l_logBBstart; /* start block of log */ int l_logsize; /* size of log in bytes */ int l_logBBsize; /* size of log in BB chunks */ + pgoff_t l_dax_offset; /* offset for DAX access */ /* The following block of fields are changed while holding icloglock */ wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp; -- 2.50.1