From 7919292a961421bfdb22f83c16657684c96076b3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:42 +0000 Subject: [PATCH 01/16] io_uring/kbuf: remove legacy kbuf bulk allocation Legacy provided buffers are slow and discouraged in favour of the ring variant. Remove the bulk allocation to keep it simpler as we don't care about performance. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a064d70370e590efed8076e9501ae4cfc20fe0ca.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 8e72de7712ac..f152afdf0bc7 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -501,12 +501,9 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return 0; } -#define IO_BUFFER_ALLOC_BATCH 64 - static int io_refill_buffer_cache(struct io_ring_ctx *ctx) { - struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH]; - int allocated; + struct io_buffer *buf; /* * Completions that don't happen inline (eg not under uring_lock) will @@ -524,27 +521,10 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx) spin_unlock(&ctx->completion_lock); } - /* - * No free buffers and no completion entries either. Allocate a new - * batch of buffer entries and add those to our freelist. - */ - - allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT, - ARRAY_SIZE(bufs), (void **) bufs); - if (unlikely(!allocated)) { - /* - * Bulk alloc is all-or-nothing. If we fail to get a batch, - * retry single alloc to be on the safe side. - */ - bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); - if (!bufs[0]) - return -ENOMEM; - allocated = 1; - } - - while (allocated) - list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache); - + buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); + if (!buf) + return -ENOMEM; + list_add_tail(&buf->list, &ctx->io_buffers_cache); return 0; } -- 2.51.0 From 9afe6847cff78e7f3aa8f4c920265cf298033251 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:43 +0000 Subject: [PATCH 02/16] io_uring/kbuf: remove legacy kbuf kmem cache Remove the kmem cache used by legacy provided buffers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8195c207d8524d94e972c0c82de99282289f7f5c.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 -- io_uring/io_uring.h | 1 - io_uring/kbuf.c | 6 ++---- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index e34a92c73a5d..6fa1e88e40fb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3927,8 +3927,6 @@ static int __init io_uring_init(void) req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU); - io_buf_cachep = KMEM_CACHE(io_buffer, - SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); BUG_ON(!iou_wq); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index ab619e63ef39..85bc8f76ca19 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -418,7 +418,6 @@ static inline bool io_req_cache_empty(struct io_ring_ctx *ctx) } extern struct kmem_cache *req_cachep; -extern struct kmem_cache *io_buf_cachep; static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx) { diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index f152afdf0bc7..2e1561c9220f 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -20,8 +20,6 @@ /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) -struct kmem_cache *io_buf_cachep; - struct io_provide_buf { struct file *file; __u64 addr; @@ -411,7 +409,7 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { buf = list_entry(item, struct io_buffer, list); - kmem_cache_free(io_buf_cachep, buf); + kfree(buf); } } @@ -521,7 +519,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx) spin_unlock(&ctx->completion_lock); } - buf = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL); + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); if (!buf) return -ENOMEM; list_add_tail(&buf->list, &ctx->io_buffers_cache); -- 2.51.0 From dd4fbb11e7ccc15dbb197a5bbfb2ca8bfda89fcd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:44 +0000 Subject: [PATCH 03/16] io_uring/kbuf: move locking into io_kbuf_drop() Move the burden of locking out of the caller into io_kbuf_drop(), that will help with furher refactoring. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/530f0cf1f06963029399f819a9a58b1a34bebef3.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +---- io_uring/kbuf.h | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 6fa1e88e40fb..ed7c9081352a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -398,11 +398,8 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) static void io_clean_op(struct io_kiocb *req) { - if (req->flags & REQ_F_BUFFER_SELECTED) { - spin_lock(&req->ctx->completion_lock); + if (unlikely(req->flags & REQ_F_BUFFER_SELECTED)) io_kbuf_drop(req); - spin_unlock(&req->ctx->completion_lock); - } if (req->flags & REQ_F_NEED_CLEANUP) { const struct io_cold_def *def = &io_cold_defs[req->opcode]; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index bd80c44c5af1..310f94a0727a 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -174,13 +174,13 @@ static inline void __io_put_kbuf_list(struct io_kiocb *req, int len, static inline void io_kbuf_drop(struct io_kiocb *req) { - lockdep_assert_held(&req->ctx->completion_lock); - if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) return; + spin_lock(&req->ctx->completion_lock); /* len == 0 is fine here, non-ring will always drop all of it */ __io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp); + spin_unlock(&req->ctx->completion_lock); } static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, -- 2.51.0 From dc39fb1093ea33019f192c93b77b863282e10162 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:45 +0000 Subject: [PATCH 04/16] io_uring/kbuf: simplify __io_put_kbuf As a preparation step remove an optimisation from __io_put_kbuf() trying to use the locked cache. With that __io_put_kbuf_list() is only used with ->io_buffers_comp, and we remove the explicit list argument. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1b7f1394ec4afc7f96b35a61f5992e27c49fd067.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 26 +++----------------------- io_uring/kbuf.h | 7 +++---- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2e1561c9220f..3a43af9f7061 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -70,29 +70,9 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) { - /* - * We can add this buffer back to two lists: - * - * 1) The io_buffers_cache list. This one is protected by the - * ctx->uring_lock. If we already hold this lock, add back to this - * list as we can grab it from issue as well. - * 2) The io_buffers_comp list. This one is protected by the - * ctx->completion_lock. - * - * We migrate buffers from the comp_list to the issue cache list - * when we need one. - */ - if (issue_flags & IO_URING_F_UNLOCKED) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); - __io_put_kbuf_list(req, len, &ctx->io_buffers_comp); - spin_unlock(&ctx->completion_lock); - } else { - lockdep_assert_held(&req->ctx->uring_lock); - - __io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache); - } + spin_lock(&req->ctx->completion_lock); + __io_put_kbuf_list(req, len); + spin_unlock(&req->ctx->completion_lock); } static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 310f94a0727a..1f2877064829 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -160,14 +160,13 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) return ret; } -static inline void __io_put_kbuf_list(struct io_kiocb *req, int len, - struct list_head *list) +static inline void __io_put_kbuf_list(struct io_kiocb *req, int len) { if (req->flags & REQ_F_BUFFER_RING) { __io_put_kbuf_ring(req, len, 1); } else { req->buf_index = req->kbuf->bgid; - list_add(&req->kbuf->list, list); + list_add(&req->kbuf->list, &req->ctx->io_buffers_comp); req->flags &= ~REQ_F_BUFFER_SELECTED; } } @@ -179,7 +178,7 @@ static inline void io_kbuf_drop(struct io_kiocb *req) spin_lock(&req->ctx->completion_lock); /* len == 0 is fine here, non-ring will always drop all of it */ - __io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp); + __io_put_kbuf_list(req, 0); spin_unlock(&req->ctx->completion_lock); } -- 2.51.0 From 13ee854e7c04236a47a5beaacdcf51eb0bc7a8fa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:46 +0000 Subject: [PATCH 05/16] io_uring/kbuf: remove legacy kbuf caching Remove all struct io_buffer caches. It makes it a fair bit simpler. Apart from from killing a bunch of lines and juggling between lists, __io_put_kbuf_list() doesn't need ->completion_lock locking now. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/18287217466ee2576ea0b1e72daccf7b22c7e856.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 -- io_uring/io_uring.c | 2 -- io_uring/kbuf.c | 57 +++++----------------------------- io_uring/kbuf.h | 5 ++- 4 files changed, 9 insertions(+), 58 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3def525a1da3..e2fef264ff8b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -360,7 +360,6 @@ struct io_ring_ctx { spinlock_t completion_lock; - struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct hlist_head waitid_list; @@ -379,8 +378,6 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct list_head io_buffers_cache; - /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ed7c9081352a..969caaccce9d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -323,7 +323,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) init_waitqueue_head(&ctx->sqo_sq_wait); INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->cq_overflow_list); - INIT_LIST_HEAD(&ctx->io_buffers_cache); ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX, sizeof(struct async_poll), 0); ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, @@ -348,7 +347,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) spin_lock_init(&ctx->completion_lock); raw_spin_lock_init(&ctx->timeout_lock); INIT_WQ_LIST(&ctx->iopoll_list); - INIT_LIST_HEAD(&ctx->io_buffers_comp); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); INIT_LIST_HEAD(&ctx->ltimeout_list); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 3a43af9f7061..caf5b9bb2aec 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -70,9 +70,7 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) { - spin_lock(&req->ctx->completion_lock); __io_put_kbuf_list(req, len); - spin_unlock(&req->ctx->completion_lock); } static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, @@ -345,7 +343,9 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *nxt; nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); - list_move(&nxt->list, &ctx->io_buffers_cache); + list_del(&nxt->list); + kfree(nxt); + if (++i == nbufs) return i; cond_resched(); @@ -363,8 +363,6 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; - struct list_head *item, *tmp; - struct io_buffer *buf; while (1) { unsigned long index = 0; @@ -378,19 +376,6 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) break; io_put_bl(ctx, bl); } - - /* - * Move deferred locked entries to cache before pruning - */ - spin_lock(&ctx->completion_lock); - if (!list_empty(&ctx->io_buffers_comp)) - list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache); - spin_unlock(&ctx->completion_lock); - - list_for_each_safe(item, tmp, &ctx->io_buffers_cache) { - buf = list_entry(item, struct io_buffer, list); - kfree(buf); - } } static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) @@ -479,33 +464,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe return 0; } -static int io_refill_buffer_cache(struct io_ring_ctx *ctx) -{ - struct io_buffer *buf; - - /* - * Completions that don't happen inline (eg not under uring_lock) will - * add to ->io_buffers_comp. If we don't have any free buffers, check - * the completion list and splice those entries first. - */ - if (!list_empty_careful(&ctx->io_buffers_comp)) { - spin_lock(&ctx->completion_lock); - if (!list_empty(&ctx->io_buffers_comp)) { - list_splice_init(&ctx->io_buffers_comp, - &ctx->io_buffers_cache); - spin_unlock(&ctx->completion_lock); - return 0; - } - spin_unlock(&ctx->completion_lock); - } - - buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); - if (!buf) - return -ENOMEM; - list_add_tail(&buf->list, &ctx->io_buffers_cache); - return 0; -} - static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, struct io_buffer_list *bl) { @@ -514,12 +472,11 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, int i, bid = pbuf->bid; for (i = 0; i < pbuf->nbufs; i++) { - if (list_empty(&ctx->io_buffers_cache) && - io_refill_buffer_cache(ctx)) + buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); + if (!buf) break; - buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, - list); - list_move_tail(&buf->list, &bl->buf_list); + + list_add_tail(&buf->list, &bl->buf_list); buf->addr = addr; buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); buf->bid = bid; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 1f2877064829..c0b9636c5c4a 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -166,8 +166,9 @@ static inline void __io_put_kbuf_list(struct io_kiocb *req, int len) __io_put_kbuf_ring(req, len, 1); } else { req->buf_index = req->kbuf->bgid; - list_add(&req->kbuf->list, &req->ctx->io_buffers_comp); req->flags &= ~REQ_F_BUFFER_SELECTED; + kfree(req->kbuf); + req->kbuf = NULL; } } @@ -176,10 +177,8 @@ static inline void io_kbuf_drop(struct io_kiocb *req) if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) return; - spin_lock(&req->ctx->completion_lock); /* len == 0 is fine here, non-ring will always drop all of it */ __io_put_kbuf_list(req, 0); - spin_unlock(&req->ctx->completion_lock); } static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, -- 2.51.0 From e150e70fce425e1cdfc227974893cad9fb90a0d3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:47 +0000 Subject: [PATCH 06/16] io_uring/kbuf: open code __io_put_kbuf() __io_put_kbuf() is a trivial wrapper, open code it into __io_put_kbufs(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/9dc17380272b48d56c95992c6f9eaacd5546e1d3.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 5 ----- io_uring/kbuf.h | 4 +--- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index caf5b9bb2aec..d612e4c15b0e 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -68,11 +68,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) return true; } -void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) -{ - __io_put_kbuf_list(req, len); -} - static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl) { diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index c0b9636c5c4a..055b7a672f2e 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -74,8 +74,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); -void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags); - bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, @@ -194,7 +192,7 @@ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, if (!__io_put_kbuf_ring(req, len, nbufs)) ret |= IORING_CQE_F_BUF_MORE; } else { - __io_put_kbuf(req, len, issue_flags); + __io_put_kbuf_list(req, len); } return ret; } -- 2.51.0 From 54e00d9a612ab93f37f612a5ccd7c0c4f8a31cea Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:48 +0000 Subject: [PATCH 07/16] io_uring/kbuf: introduce io_kbuf_drop_legacy() io_kbuf_drop() is only used for legacy provided buffers, and so __io_put_kbuf_list() is never called for REQ_F_BUFFER_RING. Remove the dead branch out of __io_put_kbuf_list(), rename it into io_kbuf_drop_legacy() and use it directly instead of io_kbuf_drop(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c8cc73e2272f09a86ecbdad9ebdd8304f8e583c0.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/kbuf.c | 10 ++++++++++ io_uring/kbuf.h | 24 ++---------------------- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 969caaccce9d..ec98a0ec6f34 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -397,7 +397,7 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) static void io_clean_op(struct io_kiocb *req) { if (unlikely(req->flags & REQ_F_BUFFER_SELECTED)) - io_kbuf_drop(req); + io_kbuf_drop_legacy(req); if (req->flags & REQ_F_NEED_CLEANUP) { const struct io_cold_def *def = &io_cold_defs[req->opcode]; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index d612e4c15b0e..815fb58da3ba 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -50,6 +50,16 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } +void io_kbuf_drop_legacy(struct io_kiocb *req) +{ + if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED))) + return; + req->buf_index = req->kbuf->bgid; + req->flags &= ~REQ_F_BUFFER_SELECTED; + kfree(req->kbuf); + req->kbuf = NULL; +} + bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 055b7a672f2e..3e18c916afc6 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -75,6 +75,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); +void io_kbuf_drop_legacy(struct io_kiocb *req); struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, unsigned int bgid); @@ -158,27 +159,6 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) return ret; } -static inline void __io_put_kbuf_list(struct io_kiocb *req, int len) -{ - if (req->flags & REQ_F_BUFFER_RING) { - __io_put_kbuf_ring(req, len, 1); - } else { - req->buf_index = req->kbuf->bgid; - req->flags &= ~REQ_F_BUFFER_SELECTED; - kfree(req->kbuf); - req->kbuf = NULL; - } -} - -static inline void io_kbuf_drop(struct io_kiocb *req) -{ - if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) - return; - - /* len == 0 is fine here, non-ring will always drop all of it */ - __io_put_kbuf_list(req, 0); -} - static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs, unsigned issue_flags) { @@ -192,7 +172,7 @@ static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, if (!__io_put_kbuf_ring(req, len, nbufs)) ret |= IORING_CQE_F_BUF_MORE; } else { - __io_put_kbuf_list(req, len); + io_kbuf_drop_legacy(req); } return ret; } -- 2.51.0 From 5d3e51240d89678b87b5dc6987ea572048a0f0eb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Feb 2025 11:36:49 +0000 Subject: [PATCH 08/16] io_uring/kbuf: uninline __io_put_kbufs __io_put_kbufs() and other helper functions are too large to be inlined, compilers would normally refuse to do so. Uninline it and move together with io_kbuf_commit into kbuf.c. io_kbuf_commitSigned-off-by: Pavel Begunkov Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3dade7f55ad590e811aff83b1ec55c9c04e17b2b.1738724373.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 60 +++++++++++++++++++++++++++++++++++++++ io_uring/kbuf.h | 74 +++++++------------------------------------------ 2 files changed, 70 insertions(+), 64 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 815fb58da3ba..3478be6d02ab 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -20,6 +20,9 @@ /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) +/* Mapped buffer ring, return io_uring_buf from head */ +#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] + struct io_provide_buf { struct file *file; __u64 addr; @@ -29,6 +32,34 @@ struct io_provide_buf { __u16 bid; }; +bool io_kbuf_commit(struct io_kiocb *req, + struct io_buffer_list *bl, int len, int nr) +{ + if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) + return true; + + req->flags &= ~REQ_F_BUFFERS_COMMIT; + + if (unlikely(len < 0)) + return true; + + if (bl->flags & IOBL_INC) { + struct io_uring_buf *buf; + + buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); + if (WARN_ON_ONCE(len > buf->len)) + len = buf->len; + buf->len -= len; + if (buf->len) { + buf->addr += len; + return false; + } + } + + bl->head += nr; + return true; +} + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -323,6 +354,35 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg) return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); } +static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) +{ + struct io_buffer_list *bl = req->buf_list; + bool ret = true; + + if (bl) { + ret = io_kbuf_commit(req, bl, len, nr); + req->buf_index = bl->bgid; + } + req->flags &= ~REQ_F_BUFFER_RING; + return ret; +} + +unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs) +{ + unsigned int ret; + + ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); + + if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) { + io_kbuf_drop_legacy(req); + return ret; + } + + if (!__io_put_kbuf_ring(req, len, nbufs)) + ret |= IORING_CQE_F_BUF_MORE; + return ret; +} + static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned nbufs) { diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 3e18c916afc6..2ec0b983ce24 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -77,6 +77,10 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); void io_kbuf_drop_legacy(struct io_kiocb *req); +unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs); +bool io_kbuf_commit(struct io_kiocb *req, + struct io_buffer_list *bl, int len, int nr); + struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, unsigned int bgid); @@ -115,77 +119,19 @@ static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) return false; } -/* Mapped buffer ring, return io_uring_buf from head */ -#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] - -static inline bool io_kbuf_commit(struct io_kiocb *req, - struct io_buffer_list *bl, int len, int nr) -{ - if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) - return true; - - req->flags &= ~REQ_F_BUFFERS_COMMIT; - - if (unlikely(len < 0)) - return true; - - if (bl->flags & IOBL_INC) { - struct io_uring_buf *buf; - - buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); - if (WARN_ON_ONCE(len > buf->len)) - len = buf->len; - buf->len -= len; - if (buf->len) { - buf->addr += len; - return false; - } - } - - bl->head += nr; - return true; -} - -static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) -{ - struct io_buffer_list *bl = req->buf_list; - bool ret = true; - - if (bl) { - ret = io_kbuf_commit(req, bl, len, nr); - req->buf_index = bl->bgid; - } - req->flags &= ~REQ_F_BUFFER_RING; - return ret; -} - -static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len, - int nbufs, unsigned issue_flags) -{ - unsigned int ret; - - if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) - return 0; - - ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); - if (req->flags & REQ_F_BUFFER_RING) { - if (!__io_put_kbuf_ring(req, len, nbufs)) - ret |= IORING_CQE_F_BUF_MORE; - } else { - io_kbuf_drop_legacy(req); - } - return ret; -} - static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags) { - return __io_put_kbufs(req, len, 1, issue_flags); + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; + return __io_put_kbufs(req, len, 1); } static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len, int nbufs, unsigned issue_flags) { - return __io_put_kbufs(req, len, nbufs, issue_flags); + if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED))) + return 0; + return __io_put_kbufs(req, len, nbufs); } #endif -- 2.51.0 From 1533376b131f5d76f8739e89efc78c4687d96bd3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 12:48:56 -0700 Subject: [PATCH 09/16] io_uring/cancel: add generic remove_all helper Any opcode that is cancelable ends up defining its own remove all helper, which iterates the pending list and cancels matches. Add a generic helper for it, which can be used by them. Signed-off-by: Jens Axboe --- io_uring/cancel.c | 21 +++++++++++++++++++++ io_uring/cancel.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 484193567839..4feacc57be63 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -341,3 +341,24 @@ out: fput(file); return ret; } + +bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, + struct hlist_head *list, bool cancel_all, + bool (*cancel)(struct io_kiocb *)) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + bool found = false; + + lockdep_assert_held(&ctx->uring_lock); + + hlist_for_each_entry_safe(req, tmp, list, hash_node) { + if (!io_match_task_safe(req, tctx, cancel_all)) + continue; + hlist_del_init(&req->hash_node); + if (cancel(req)) + found = true; + } + + return found; +} diff --git a/io_uring/cancel.h b/io_uring/cancel.h index bbfea2cd00ea..80734a0a2b26 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -24,6 +24,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd, int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg); bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); +bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, + struct hlist_head *list, bool cancel_all, + bool (*cancel)(struct io_kiocb *)); + static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { if (req->cancel_seq_set && sequence == req->work.cancel_seq) -- 2.51.0 From e855b9138470da9c1d2fe340acf653bd2af03922 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 12:51:26 -0700 Subject: [PATCH 10/16] io_uring/futex: convert to io_cancel_remove_all() Use the generic helper for cancelations. Signed-off-by: Jens Axboe --- io_uring/futex.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/io_uring/futex.c b/io_uring/futex.c index 43e2143255f5..47b8c229a2e3 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -90,7 +90,7 @@ static bool io_futexv_claim(struct io_futex *iof) return true; } -static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +static bool __io_futex_cancel(struct io_kiocb *req) { /* futex wake already done or in progress */ if (req->opcode == IORING_OP_FUTEX_WAIT) { @@ -128,7 +128,7 @@ int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, if (req->cqe.user_data != cd->data && !(cd->flags & IORING_ASYNC_CANCEL_ANY)) continue; - if (__io_futex_cancel(ctx, req)) + if (__io_futex_cancel(req)) nr++; if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) break; @@ -144,21 +144,7 @@ int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all) { - struct hlist_node *tmp; - struct io_kiocb *req; - bool found = false; - - lockdep_assert_held(&ctx->uring_lock); - - hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { - if (!io_match_task_safe(req, tctx, cancel_all)) - continue; - hlist_del_init(&req->hash_node); - __io_futex_cancel(ctx, req); - found = true; - } - - return found; + return io_cancel_remove_all(ctx, tctx, &ctx->futex_list, cancel_all, __io_futex_cancel); } int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- 2.51.0 From 7d9944f5061e49cab5ee0e1c9507c2e8f94d41b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 12:52:46 -0700 Subject: [PATCH 11/16] io_uring/waitid: convert to io_cancel_remove_all() Use the generic helper for cancelations. Signed-off-by: Jens Axboe --- io_uring/waitid.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 15a7daf3ff4f..87d19710d68a 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -132,7 +132,7 @@ static void io_waitid_complete(struct io_kiocb *req, int ret) io_req_set_res(req, ret, 0); } -static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) +static bool __io_waitid_cancel(struct io_kiocb *req) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); struct io_waitid_async *iwa = req->async_data; @@ -170,7 +170,7 @@ int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, if (req->cqe.user_data != cd->data && !(cd->flags & IORING_ASYNC_CANCEL_ANY)) continue; - if (__io_waitid_cancel(ctx, req)) + if (__io_waitid_cancel(req)) nr++; if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) break; @@ -186,21 +186,7 @@ int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, bool cancel_all) { - struct hlist_node *tmp; - struct io_kiocb *req; - bool found = false; - - lockdep_assert_held(&ctx->uring_lock); - - hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { - if (!io_match_task_safe(req, tctx, cancel_all)) - continue; - hlist_del_init(&req->hash_node); - __io_waitid_cancel(ctx, req); - found = true; - } - - return found; + return io_cancel_remove_all(ctx, tctx, &ctx->waitid_list, cancel_all, __io_waitid_cancel); } static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req) -- 2.51.0 From 8fa374f90b721ae5f56605ff67fc94a8b583e10c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 13:13:58 -0700 Subject: [PATCH 12/16] io_uring/cancel: add generic cancel helper Any opcode that is cancelable ends up defining its own cancel helper for finding and canceling a specific request. Add a generic helper that can be used for this purpose. Signed-off-by: Jens Axboe --- io_uring/cancel.c | 21 +++++++++++++++++++++ io_uring/cancel.h | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 4feacc57be63..0870060bac7c 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -362,3 +362,24 @@ bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, return found; } + +int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags, struct hlist_head *list, + bool (*cancel)(struct io_kiocb *)) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + int nr = 0; + + io_ring_submit_lock(ctx, issue_flags); + hlist_for_each_entry_safe(req, tmp, list, hash_node) { + if (!io_cancel_req_match(req, cd)) + continue; + if (cancel(req)) + nr++; + if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) + break; + } + io_ring_submit_unlock(ctx, issue_flags); + return nr ?: -ENOENT; +} diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 80734a0a2b26..43e9bb74e9d1 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -28,6 +28,10 @@ bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, struct hlist_head *list, bool cancel_all, bool (*cancel)(struct io_kiocb *)); +int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd, + unsigned int issue_flags, struct hlist_head *list, + bool (*cancel)(struct io_kiocb *)); + static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { if (req->cancel_seq_set && sequence == req->work.cancel_seq) -- 2.51.0 From 2eaa2fac4704d59f2fd07e496114c39303e54a18 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 13:15:57 -0700 Subject: [PATCH 13/16] io_uring/futex: use generic io_cancel_remove() helper Don't implement our own loop rolling and checking, just use the generic helper to find and cancel requests. Signed-off-by: Jens Axboe --- io_uring/futex.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/io_uring/futex.c b/io_uring/futex.c index 47b8c229a2e3..ede6279cadc6 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -116,29 +116,7 @@ static bool __io_futex_cancel(struct io_kiocb *req) int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags) { - struct hlist_node *tmp; - struct io_kiocb *req; - int nr = 0; - - if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) - return -ENOENT; - - io_ring_submit_lock(ctx, issue_flags); - hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { - if (req->cqe.user_data != cd->data && - !(cd->flags & IORING_ASYNC_CANCEL_ANY)) - continue; - if (__io_futex_cancel(req)) - nr++; - if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) - break; - } - io_ring_submit_unlock(ctx, issue_flags); - - if (nr) - return nr; - - return -ENOENT; + return io_cancel_remove(ctx, cd, issue_flags, &ctx->futex_list, __io_futex_cancel); } bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, -- 2.51.0 From 932de5e35fda2844aa258c640d198dbbce74bb8f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Feb 2025 13:16:29 -0700 Subject: [PATCH 14/16] io_uring/waitid: use generic io_cancel_remove() helper Don't implement our own loop rolling and checking, just use the generic helper to find and cancel requests. Signed-off-by: Jens Axboe --- io_uring/waitid.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 87d19710d68a..5c443e5f6d92 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -158,29 +158,7 @@ static bool __io_waitid_cancel(struct io_kiocb *req) int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags) { - struct hlist_node *tmp; - struct io_kiocb *req; - int nr = 0; - - if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED)) - return -ENOENT; - - io_ring_submit_lock(ctx, issue_flags); - hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { - if (req->cqe.user_data != cd->data && - !(cd->flags & IORING_ASYNC_CANCEL_ANY)) - continue; - if (__io_waitid_cancel(req)) - nr++; - if (!(cd->flags & IORING_ASYNC_CANCEL_ALL)) - break; - } - io_ring_submit_unlock(ctx, issue_flags); - - if (nr) - return nr; - - return -ENOENT; + return io_cancel_remove(ctx, cd, issue_flags, &ctx->waitid_list, __io_waitid_cancel); } bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx, -- 2.51.0 From 7c71a0af81ba72de9b2c501065e4e718aba9a271 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 8 Feb 2025 10:50:34 -0700 Subject: [PATCH 15/16] io_uring/net: improve recv bundles Current recv bundles are only supported for multishot receives, and additionally they also always post at least 2 CQEs if more data is available than what a buffer will hold. This happens because the initial bundle recv will do a single buffer, and then do the rest of what is in the socket as a followup receive. As shown in a test program, if 1k buffers are available and 32k is available to receive in the socket, you'd get the following completions: bundle=1, mshot=0 cqe res 1024 cqe res 1024 [...] cqe res 1024 bundle=1, mshot=1 cqe res 1024 cqe res 31744 where bundle=1 && mshot=0 will post 32 1k completions, and bundle=1 && mshot=1 will post a 1k completion and then a 31k completion. To support bundle recv without multishot, it's possible to simply retry the recv immediately and post a single completion, rather than split it into two completions. With the below patch, the same test looks as follows: bundle=1, mshot=0 cqe res 32768 bundle=1, mshot=1 cqe res 32768 where mshot=0 works fine for bundles, and both of them post just a single 32k completion rather than split it into separate completions. Posting fewer completions is always a nice win, and not needing multishot for proper bundle efficiency is nice for cases that can't necessarily use multishot. Reported-by: Norman Maurer Link: https://lore.kernel.org/r/184f9f92-a682-4205-a15d-89e18f664502@kernel.dk Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv") Signed-off-by: Jens Axboe --- io_uring/net.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index 17852a6616ff..10344b3a6d89 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -76,6 +76,7 @@ struct io_sr_msg { /* initialised and used only by !msg send variants */ u16 buf_group; u16 buf_index; + bool retry; void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; @@ -187,6 +188,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, req->flags &= ~REQ_F_BL_EMPTY; sr->done_io = 0; + sr->retry = false; sr->len = 0; /* get from the provided buffer */ req->buf_index = sr->buf_group; } @@ -402,6 +404,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; + sr->retry = false; if (req->opcode != IORING_OP_SEND) { if (sqe->addr2 || sqe->file_index) @@ -785,6 +788,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); sr->done_io = 0; + sr->retry = false; if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; @@ -833,6 +837,9 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_recvmsg_prep_setup(req); } +/* bits to clear in old and inherit in new cflags on bundle retry */ +#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE) + /* * Finishes io_recv and io_recvmsg. * @@ -852,9 +859,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, if (sr->flags & IORING_RECVSEND_BUNDLE) { cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags); + if (sr->retry) + cflags = req->cqe.flags | (cflags & CQE_F_MASK); /* bundle with no more immediate buffers, we're done */ if (req->flags & REQ_F_BL_EMPTY) goto finish; + /* if more is available, retry and append to this one */ + if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) { + req->cqe.flags = cflags & ~CQE_F_MASK; + sr->len = kmsg->msg.msg_inq; + sr->done_io += *ret; + sr->retry = true; + return false; + } } else { cflags |= io_put_kbuf(req, *ret, issue_flags); } @@ -1233,6 +1250,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_kiocb *notif; zc->done_io = 0; + zc->retry = false; req->flags |= REQ_F_POLL_NO_LAZY; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) -- 2.51.0 From 0e8934724f78602635d6e11c97ef48caa693cb65 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 11 Feb 2025 13:19:56 -0700 Subject: [PATCH 16/16] io_uring: use IO_REQ_LINK_FLAGS more Replace the 2 instances of REQ_F_LINK | REQ_F_HARDLINK with the more commonly used IO_REQ_LINK_FLAGS. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250211202002.3316324-1-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ec98a0ec6f34..8bb8c099c3e1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -110,11 +110,13 @@ #define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) +#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) + #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ REQ_F_ASYNC_DATA) -#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ +#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | IO_REQ_LINK_FLAGS | \ REQ_F_REISSUE | IO_REQ_CLEAN_FLAGS) #define IO_TCTX_REFS_CACHE_NR (1U << 10) @@ -131,7 +133,6 @@ struct io_defer_entry { /* requests with any of those set should undergo io_disarm_next() */ #define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) -#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) /* * No waiters. It's larger than any valid value of the tw counter @@ -1157,7 +1158,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req, * We don't know how many reuqests is there in the link and whether * they can even be queued lazily, fall back to non-lazy. */ - if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) + if (req->flags & IO_REQ_LINK_FLAGS) flags &= ~IOU_F_TWQ_LAZY_WAKE; guard(rcu)(); -- 2.51.0