From 42b0a7dcef6983766567d0c010a2c88db6d1091c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Nov 2024 09:14:33 +0100 Subject: [PATCH] iomap: optionally use ioends for direct I/O struct iomap_ioend currently tracks outstanding buffered writes and has some really nice code in core iomap and XFS to merge contiguous I/Os an defer them to userspace for completion in a very efficient way. For zoned writes we'll also need a per-bio user context completion to record the written blocks, and the infrastructure for that would look basically like the ioend handling for buffered I/O. So intead of reinventing the wheel, reuse the existing infrastructure. Signed-off-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 7 +++++- fs/iomap/direct-io.c | 50 +++++++++++++++++++++++++++++++++++++++++- fs/iomap/ioend.c | 7 ++++-- include/linux/iomap.h | 5 +++-- 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d74330698c14..cddd62e76f84 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -17,6 +17,7 @@ #include #include #include +#include "internal.h" #include "trace.h" #include "../internal.h" @@ -1595,6 +1596,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) if (!atomic_dec_and_test(&ioend->io_remaining)) return 0; + if (ioend->io_isdirect) + return iomap_finish_ioend_direct(ioend); return iomap_finish_ioend_buffered(ioend); } @@ -1644,6 +1647,8 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next) if (ioend->io_bio.bi_status != next->io_bio.bi_status) return false; + if (ioend->io_isdirect != next->io_isdirect) + return false; if (next->io_flags & IOMAP_F_BOUNDARY) return false; if ((ioend->io_flags & (IOMAP_F_SHARED | IOMAP_F_PRIVATE)) != @@ -1763,7 +1768,7 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, if (pos > wpc->iomap.offset) wpc->iomap.flags &= ~IOMAP_F_BOUNDARY; return iomap_init_ioend(inode, bio, pos, wpc->iomap.type, - wpc->iomap.flags); + wpc->iomap.flags, false); } static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 536ce7e0613b..566d9eb0429c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -12,6 +12,7 @@ #include #include #include +#include "internal.h" #include "trace.h" #include "../internal.h" @@ -20,6 +21,7 @@ * Private flags for iomap_dio, must not overlap with the public ones in * iomap.h: */ +#define IOMAP_DIO_NO_INVALIDATE (1U << 25) #define IOMAP_DIO_CALLER_COMP (1U << 26) #define IOMAP_DIO_INLINE_COMP (1U << 27) #define IOMAP_DIO_WRITE_THROUGH (1U << 28) @@ -117,7 +119,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) * ->end_io() when necessary, otherwise a racing buffer read would cache * zeros from unwritten extents. */ - if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE)) + if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) && + !(dio->flags & IOMAP_DIO_NO_INVALIDATE)) kiocb_invalidate_post_direct_write(iocb, dio->size); inode_dio_end(file_inode(iocb->ki_filp)); @@ -163,6 +166,51 @@ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret) cmpxchg(&dio->error, 0, ret); } +u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend) +{ + struct iomap_dio *dio = ioend->io_bio.bi_private; + bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); + struct kiocb *iocb = dio->iocb; + u32 vec_count = ioend->io_bio.bi_vcnt; + + if (ioend->io_error) + iomap_dio_set_error(dio, ioend->io_error); + + if (atomic_dec_and_test(&dio->ref)) { + struct inode *inode = file_inode(iocb->ki_filp); + + if (dio->wait_for_completion) { + struct task_struct *waiter = dio->submit.waiter; + + WRITE_ONCE(dio->submit.waiter, NULL); + blk_wake_io_task(waiter); + } else if (!inode->i_mapping->nrpages) { + WRITE_ONCE(iocb->private, NULL); + + /* + * We must never invalidate pages from this thread to + * avoid deadlocks with buffered I/O completions. + * Tough luck if you hit the tiny race with someone + * dirtying the range now. + */ + dio->flags |= IOMAP_DIO_NO_INVALIDATE; + iomap_dio_complete_work(&dio->aio.work); + } else { + INIT_WORK(&dio->aio.work, iomap_dio_complete_work); + queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work); + } + } + + if (should_dirty) { + bio_check_pages_dirty(&ioend->io_bio); + } else { + bio_release_pages(&ioend->io_bio, false); + bio_put(&ioend->io_bio); + } + + return vec_count; +} + void iomap_dio_bio_end_io(struct bio *bio) { struct iomap_dio *dio = bio->bi_private; diff --git a/fs/iomap/ioend.c b/fs/iomap/ioend.c index eeb2aabe4ce6..f8222eb0cbaa 100644 --- a/fs/iomap/ioend.c +++ b/fs/iomap/ioend.c @@ -5,7 +5,8 @@ #include struct iomap_ioend *iomap_init_ioend(struct inode *inode, - struct bio *bio, loff_t file_offset, u8 type, u16 flags) + struct bio *bio, loff_t file_offset, u8 type, u16 flags, + bool isdirect) { struct iomap_ioend *ioend = iomap_ioend_from_bio(bio); @@ -15,6 +16,7 @@ struct iomap_ioend *iomap_init_ioend(struct inode *inode, INIT_LIST_HEAD(&ioend->io_list); ioend->io_type = type; ioend->io_flags = flags; + ioend->io_isdirect = isdirect; ioend->io_inode = inode; ioend->io_offset = file_offset; ioend->io_size = bio->bi_iter.bi_size; @@ -58,7 +60,8 @@ struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, bool is_append, split->bi_end_io = bio->bi_end_io; split_ioend = iomap_init_ioend(ioend->io_inode, split, - ioend->io_offset, ioend->io_type, ioend->io_flags); + ioend->io_offset, ioend->io_type, ioend->io_flags, + ioend->io_isdirect); split_ioend->io_parent = ioend; atomic_inc(&ioend->io_remaining); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index c928ef3fca44..020af176365c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -282,7 +282,7 @@ static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos, } struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, - loff_t file_offset, u8 type, u16 flags); + loff_t file_offset, u8 type, u16 flags, bool isdirect); struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, bool is_append, unsigned int *alloc_len); @@ -325,7 +325,8 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ - u16 io_type; + u8 io_type; + bool io_isdirect; /* is direct I/O */ u16 io_flags; /* IOMAP_F_* */ struct inode *io_inode; /* file being written to */ atomic_t io_remaining; -- 2.50.1