From 7e2fe01a69f6be3e284b38cfd2e4e0598a3b0a8f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:11 +0800 Subject: [PATCH 01/16] ublk: comment on ubq->canceling handling in ublk_queue_rq() In ublk_queue_rq(), ubq->canceling has to be handled after ->fail_io and ->force_abort are dealt with, otherwise the request may not be failed when deleting disk. Add comment on this usage. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fbcb7c2ff851..5b0c885dc38f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1310,6 +1310,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; + /* + * ->canceling has to be handled after ->force_abort and ->fail_io + * is dealt with, otherwise this request may not be failed in case + * of recovery, and cause hang when deleting disk + */ if (unlikely(ubq->canceling)) { __ublk_abort_rq(ubq, rq); return BLK_STS_OK; -- 2.51.0 From 705b80841eda212df79a43371f5ccb3bcadbb893 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:12 +0800 Subject: [PATCH 02/16] ublk: remove two unused fields from 'struct ublk_queue' Remove two unused fields(`io_addr` & `max_io_sz`) from `struct ublk_queue`. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5b0c885dc38f..b60f4bd647a1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -143,8 +143,6 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; - unsigned long io_addr; /* mapped vm address */ - unsigned int max_io_sz; bool force_abort; bool timeout; bool canceling; -- 2.51.0 From 1d781c0de08c0b35948ad4aaf609a4cc9995d9f6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:13 +0800 Subject: [PATCH 03/16] ublk: add helper of ublk_need_map_io() ublk_need_map_io() is more readable. Reviewed-by: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b60f4bd647a1..200504dd67a9 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -596,6 +596,11 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq) return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY); } +static inline bool ublk_need_map_io(const struct ublk_queue *ubq) +{ + return !ublk_support_user_copy(ubq); +} + static inline bool ublk_need_req_ref(const struct ublk_queue *ubq) { /* @@ -923,7 +928,7 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (ublk_support_user_copy(ubq)) + if (!ublk_need_map_io(ubq)) return rq_bytes; /* @@ -947,7 +952,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (ublk_support_user_copy(ubq)) + if (!ublk_need_map_io(ubq)) return rq_bytes; if (ublk_need_unmap_req(req)) { @@ -1867,7 +1872,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) goto out; - if (!ublk_support_user_copy(ubq)) { + if (ublk_need_map_io(ubq)) { /* * FETCH_RQ has to provide IO buffer if NEED GET * DATA is not enabled @@ -1889,7 +1894,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) goto out; - if (!ublk_support_user_copy(ubq)) { + if (ublk_need_map_io(ubq)) { /* * COMMIT_AND_FETCH_REQ has to provide IO buffer if * NEED GET DATA is not enabled or it is Read IO. -- 2.51.0 From b460f328e257db6af0d127fc8a2437f64ad01d3a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:14 +0800 Subject: [PATCH 04/16] ublk: call io_uring_cmd_to_pdu to get uring_cmd pdu Call io_uring_cmd_to_pdu() to get uring_cmd pdu, and one big benefit is the automatic pdu size build check. Suggested-by: Uday Shankar Signed-off-by: Ming Lei Reviewed-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250327095123.179113-6-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 200504dd67a9..1e11816d0b90 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1040,7 +1040,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( struct io_uring_cmd *ioucmd) { - return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu; + return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu); } static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) -- 2.51.0 From ebf695f129367ed4b26df6baec2ea7fc50c9e6f0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:15 +0800 Subject: [PATCH 05/16] ublk: add segment parameter IO split is usually bad in io_uring world, since -EAGAIN is caused and IO handling may have to fallback to io-wq, this way does hurt performance. ublk starts to support zero copy recently, for avoiding unnecessary IO split, ublk driver's segment limit should be aligned with backend device's segment limit. Another reason is that io_buffer_register_bvec() needs to allocate bvecs, which number is aligned with ublk request segment number, so that big memory allocation can be avoided by setting reasonable max_segments limit. So add segment parameter for providing ublk server chance to align segment limit with backend, and keep it reasonable from implementation viewpoint. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-7-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 20 +++++++++++++++++++- include/uapi/linux/ublk_cmd.h | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1e11816d0b90..a5bcf3aa9d8c 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -74,7 +74,7 @@ #define UBLK_PARAM_TYPE_ALL \ (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \ - UBLK_PARAM_TYPE_DMA_ALIGN) + UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT) struct ublk_rq_data { struct kref ref; @@ -580,6 +580,18 @@ static int ublk_validate_params(const struct ublk_device *ub) return -EINVAL; } + if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { + const struct ublk_param_segment *p = &ub->params.seg; + + if (!is_power_of_2(p->seg_boundary_mask + 1)) + return -EINVAL; + + if (p->seg_boundary_mask + 1 < UBLK_MIN_SEGMENT_SIZE) + return -EINVAL; + if (p->max_segment_size < UBLK_MIN_SEGMENT_SIZE) + return -EINVAL; + } + return 0; } @@ -2370,6 +2382,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) lim.dma_alignment = ub->params.dma.alignment; + if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { + lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask; + lim.max_segment_size = ub->params.seg.max_segment_size; + lim.max_segments = ub->params.seg.max_segments; + } + if (wait_for_completion_interruptible(&ub->completion) != 0) return -EINTR; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 7255b36b5cf6..583b86681c93 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -410,6 +410,29 @@ struct ublk_param_dma_align { __u8 pad[4]; }; +#define UBLK_MIN_SEGMENT_SIZE 4096 +/* + * If any one of the three segment parameter is set as 0, the behavior is + * undefined. + */ +struct ublk_param_segment { + /* + * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has + * to be >= UBLK_MIN_SEGMENT_SIZE(4096) + */ + __u64 seg_boundary_mask; + + /* + * max_segment_size could be override by virt_boundary_mask, so be + * careful when setting both. + * + * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096) + */ + __u32 max_segment_size; + __u16 max_segments; + __u8 pad[2]; +}; + struct ublk_params { /* * Total length of parameters, userspace has to set 'len' for both @@ -423,6 +446,7 @@ struct ublk_params { #define UBLK_PARAM_TYPE_DEVT (1 << 2) #define UBLK_PARAM_TYPE_ZONED (1 << 3) #define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4) +#define UBLK_PARAM_TYPE_SEGMENT (1 << 5) __u32 types; /* types of parameter included */ struct ublk_param_basic basic; @@ -430,6 +454,7 @@ struct ublk_params { struct ublk_param_devt devt; struct ublk_param_zoned zoned; struct ublk_param_dma_align dma; + struct ublk_param_segment seg; }; #endif -- 2.51.0 From 17970209167d521da2f48d45a4242a57fd39d223 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:16 +0800 Subject: [PATCH 06/16] ublk: document zero copy feature Add words to explain how zero copy feature works, and why it has to be trusted for handling IO read command. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-8-ming.lei@redhat.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 1e0e7358e14a..74c57488dc9a 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -309,18 +309,35 @@ with specified IO tag in the command data: ``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy the server buffer (pages) read to the IO request pages. -Future development -================== - Zero copy --------- -Zero copy is a generic requirement for nbd, fuse or similar drivers. A -problem [#xiaoguang]_ Xiaoguang mentioned is that pages mapped to userspace -can't be remapped any more in kernel with existing mm interfaces. This can -occurs when destining direct IO to ``/dev/ublkb*``. Also, he reported that -big requests (IO size >= 256 KB) may benefit a lot from zero copy. - +ublk zero copy relies on io_uring's fixed kernel buffer, which provides +two APIs: `io_buffer_register_bvec()` and `io_buffer_unregister_bvec`. + +ublk adds IO command of `UBLK_IO_REGISTER_IO_BUF` to call +`io_buffer_register_bvec()` for ublk server to register client request +buffer into io_uring buffer table, then ublk server can submit io_uring +IOs with the registered buffer index. IO command of `UBLK_IO_UNREGISTER_IO_BUF` +calls `io_buffer_unregister_bvec()` to unregister the buffer, which is +guaranteed to be live between calling `io_buffer_register_bvec()` and +`io_buffer_unregister_bvec()`. Any io_uring operation which supports this +kind of kernel buffer will grab one reference of the buffer until the +operation is completed. + +ublk server implementing zero copy or user copy has to be CAP_SYS_ADMIN and +be trusted, because it is ublk server's responsibility to make sure IO buffer +filled with data for handling read command, and ublk server has to return +correct result to ublk driver when handling READ command, and the result +has to match with how many bytes filled to the IO buffer. Otherwise, +uninitialized kernel IO buffer will be exposed to client application. + +ublk server needs to align the parameter of `struct ublk_param_dma_align` +with backend for zero copy to work correctly. + +For reaching best IO performance, ublk server should align its segment +parameter of `struct ublk_param_segment` with backend for avoiding +unnecessary IO split, which usually hurts io_uring performance. References ========== -- 2.51.0 From d796cea7b9f33b6315362f504b15fcc26d678493 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:17 +0800 Subject: [PATCH 07/16] ublk: implement ->queue_rqs() Implement ->queue_rqs() for improving perf in case of MQ. In this way, we just need to call io_uring_cmd_complete_in_task() once for whole IO batch, then both io_uring and ublk server can get exact batch from ublk frontend. Follows IOPS improvement: - tests tools/testing/selftests/ublk/kublk add -t null -q 2 [-z] fio/t/io_uring -p0 /dev/ublkb0 - results: more than 10% IOPS boost observed Pass all ublk selftests, especially the io dispatch order test. Cc: Uday Shankar Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-9-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 131 +++++++++++++++++++++++++++++++++------ 1 file changed, 111 insertions(+), 20 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a5bcf3aa9d8c..f97919460515 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -81,6 +81,20 @@ struct ublk_rq_data { }; struct ublk_uring_cmd_pdu { + /* + * Store requests in same batch temporarily for queuing them to + * daemon context. + * + * It should have been stored to request payload, but we do want + * to avoid extra pre-allocation, and uring_cmd payload is always + * free for us + */ + struct request *req_list; + + /* + * The following two are valid in this cmd whole lifetime, and + * setup in ublk uring_cmd handler + */ struct ublk_queue *ubq; u16 tag; }; @@ -1170,14 +1184,12 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, - unsigned int issue_flags) +static void ublk_dispatch_req(struct ublk_queue *ubq, + struct io_uring_cmd *cmd, + struct request *req, + unsigned int issue_flags) { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - struct ublk_queue *ubq = pdu->ubq; - int tag = pdu->tag; - struct request *req = blk_mq_tag_to_rq( - ubq->dev->tag_set.tags[ubq->q_id], tag); + int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; unsigned int mapped_bytes; @@ -1252,6 +1264,18 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + int tag = pdu->tag; + struct request *req = blk_mq_tag_to_rq( + ubq->dev->tag_set.tags[ubq->q_id], tag); + + ublk_dispatch_req(ubq, cmd, req, issue_flags); +} + static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { struct ublk_io *io = &ubq->ios[rq->tag]; @@ -1259,6 +1283,35 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); } +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct request *rq = pdu->req_list; + struct ublk_queue *ubq = rq->mq_hctx->driver_data; + struct request *next; + + while (rq) { + struct ublk_io *io = &ubq->ios[rq->tag]; + + next = rq->rq_next; + rq->rq_next = NULL; + ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); + rq = next; + } +} + +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) +{ + struct request *rq = rq_list_peek(l); + struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); + + pdu->req_list = rq; + rq_list_init(l); + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); +} + static enum blk_eh_timer_return ublk_timeout(struct request *rq) { struct ublk_queue *ubq = rq->mq_hctx->driver_data; @@ -1297,21 +1350,12 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq) return BLK_EH_RESET_TIMER; } -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq) { - struct ublk_queue *ubq = hctx->driver_data; - struct request *rq = bd->rq; blk_status_t res; - if (unlikely(ubq->fail_io)) { + if (unlikely(ubq->fail_io)) return BLK_STS_TARGET; - } - - /* fill iod to slot in io cmd buffer */ - res = ublk_setup_iod(ubq, rq); - if (unlikely(res != BLK_STS_OK)) - return BLK_STS_IOERR; /* With recovery feature enabled, force_abort is set in * ublk_stop_dev() before calling del_gendisk(). We have to @@ -1325,6 +1369,29 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort)) return BLK_STS_IOERR; + if (unlikely(ubq->canceling)) + return BLK_STS_IOERR; + + /* fill iod to slot in io cmd buffer */ + res = ublk_setup_iod(ubq, rq); + if (unlikely(res != BLK_STS_OK)) + return BLK_STS_IOERR; + + blk_mq_start_request(rq); + return BLK_STS_OK; +} + +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct ublk_queue *ubq = hctx->driver_data; + struct request *rq = bd->rq; + blk_status_t res; + + res = ublk_prep_req(ubq, rq); + if (res != BLK_STS_OK) + return res; + /* * ->canceling has to be handled after ->force_abort and ->fail_io * is dealt with, otherwise this request may not be failed in case @@ -1335,12 +1402,35 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - blk_mq_start_request(bd->rq); ublk_queue_cmd(ubq, rq); - return BLK_STS_OK; } +static void ublk_queue_rqs(struct rq_list *rqlist) +{ + struct rq_list requeue_list = { }; + struct rq_list submit_list = { }; + struct ublk_queue *ubq = NULL; + struct request *req; + + while ((req = rq_list_pop(rqlist))) { + struct ublk_queue *this_q = req->mq_hctx->driver_data; + + if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) + ublk_queue_cmd_list(ubq, &submit_list); + ubq = this_q; + + if (ublk_prep_req(ubq, req) == BLK_STS_OK) + rq_list_add_tail(&submit_list, req); + else + rq_list_add_tail(&requeue_list, req); + } + + if (ubq && !rq_list_empty(&submit_list)) + ublk_queue_cmd_list(ubq, &submit_list); + *rqlist = requeue_list; +} + static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -1353,6 +1443,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, + .queue_rqs = ublk_queue_rqs, .init_hctx = ublk_init_hctx, .timeout = ublk_timeout, }; -- 2.51.0 From daabfb50a56b11a4f15d2bdbfae129e61c08c0ac Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:18 +0800 Subject: [PATCH 08/16] ublk: rename ublk_rq_task_work_cb as ublk_cmd_tw_cb The new name is aligned with ublk_cmd_list_tw_cb(), and looks more readable. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-10-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f97919460515..355a59c78539 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1264,8 +1264,8 @@ static void ublk_dispatch_req(struct ublk_queue *ubq, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, - unsigned int issue_flags) +static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, + unsigned int issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; @@ -1280,7 +1280,7 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { struct ublk_io *io = &ubq->ios[rq->tag]; - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); + io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_tw_cb); } static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, -- 2.51.0 From 8c778614361f288ef552fd6a52a17460a45b2f4f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:19 +0800 Subject: [PATCH 09/16] selftests: ublk: add more tests for covering MQ Add test test_generic_02.sh for covering IO dispatch order in case of MQ. Especially we just support ->queue_rqs() which may affect IO dispatch order. Add test_loop_05.sh and test_stripe_03.sh for covering MQ. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-11-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 3 ++ tools/testing/selftests/ublk/test_common.sh | 6 +++ .../testing/selftests/ublk/test_generic_02.sh | 44 +++++++++++++++++++ tools/testing/selftests/ublk/test_loop_01.sh | 14 +++--- tools/testing/selftests/ublk/test_loop_03.sh | 14 +++--- tools/testing/selftests/ublk/test_loop_05.sh | 28 ++++++++++++ .../testing/selftests/ublk/test_stress_01.sh | 6 +-- .../testing/selftests/ublk/test_stress_02.sh | 6 +-- .../testing/selftests/ublk/test_stripe_01.sh | 14 +++--- .../testing/selftests/ublk/test_stripe_03.sh | 30 +++++++++++++ 10 files changed, 132 insertions(+), 33 deletions(-) create mode 100755 tools/testing/selftests/ublk/test_generic_02.sh create mode 100755 tools/testing/selftests/ublk/test_loop_05.sh create mode 100755 tools/testing/selftests/ublk/test_stripe_03.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7817afe29005..7a8c994de244 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -4,6 +4,7 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh +TEST_PROGS += test_generic_02.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -11,8 +12,10 @@ TEST_PROGS += test_loop_01.sh TEST_PROGS += test_loop_02.sh TEST_PROGS += test_loop_03.sh TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_loop_05.sh TEST_PROGS += test_stripe_01.sh TEST_PROGS += test_stripe_02.sh +TEST_PROGS += test_stripe_03.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 75f54ac6b1c4..a88b35943227 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,12 @@ _get_disk_dev_t() { echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_run_fio_verify_io() { + fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ + --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ + --verify_state_save=0 "$@" > /dev/null +} + _create_backfile() { local my_size=$1 local my_file diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh new file mode 100755 index 000000000000..3e80121e3bf5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_02" +ERR_CODE=0 + +if ! _have_program bpftrace; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "sequential io order for MQ" + +dev_id=$(_add_ublk_dev -t null -q 2) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then + _cleanup_test "null" + exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk +fio --name=write_seq \ + --filename=/dev/ublkb"${dev_id}" \ + --ioengine=libaio --iodepth=16 \ + --rw=write \ + --size=512M \ + --direct=1 \ + --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait +if grep -q "io_out_of_order" "$UBLK_TMP"; then + cat "$UBLK_TMP" + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index c882d2a08e13..1ef8b6044777 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -6,6 +6,10 @@ TID="loop_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -14,15 +18,7 @@ dev_id=$(_add_ublk_dev -t loop "$backfile_0") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=16 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index 269c96787d7d..e9ca744de8b1 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -6,6 +6,10 @@ TID="loop_03" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "loop" "write and verify over zero copy" backfile_0=$(_create_backfile 256M) @@ -13,15 +17,7 @@ dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") _check_add_dev $TID $? "$backfile_0" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=64 \ - --rw=write \ - --size=256M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M ERR_CODE=$? _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh new file mode 100755 index 000000000000..2e6e2e6978fc --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_05" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +backfile_0=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 7177f6c57bc5..a8be24532b24 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -27,20 +27,20 @@ ublk_io_and_remove() _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null +ublk_io_and_remove 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_remove 256M -t loop -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2a8e60579a06..2159e4cc8140 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -27,20 +27,20 @@ ublk_io_and_kill_daemon() _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null +ublk_io_and_kill_daemon 8G -t null -q 4 ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then _show_result $TID $ERR_CODE fi -ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" ERR_CODE=$? _cleanup_test "stress" _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index c01f3dc325ab..7e387ef656ea 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -6,6 +6,10 @@ TID="stripe_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "stripe" "write and verify test" backfile_0=$(_create_backfile 256M) @@ -15,15 +19,7 @@ dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") _check_add_dev $TID $? "${backfile_0}" # run fio over the ublk disk -fio --name=write_and_verify \ - --filename=/dev/ublkb"${dev_id}" \ - --ioengine=libaio --iodepth=32 \ - --rw=write \ - --size=512M \ - --direct=1 \ - --verify=crc32c \ - --do_verify=1 \ - --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh new file mode 100755 index 000000000000..c1b34af36145 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_03" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE -- 2.51.0 From c78ae7b71ed66a180708377b45042ef77efc840e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 27 Mar 2025 17:51:20 +0800 Subject: [PATCH 10/16] selftests: ublk: add test for checking zero copy related parameter ublk zero copy usually requires to set dma and segment parameter correctly, so hard-code null target's dma & segment parameter in non-default value, and verify if they are setup correctly by ublk driver. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250327095123.179113-12-ming.lei@redhat.com Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/Makefile | 1 + tools/testing/selftests/ublk/null.c | 11 +++++++- .../testing/selftests/ublk/test_generic_03.sh | 28 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/ublk/test_generic_03.sh diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7a8c994de244..d98680d64a2f 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -5,6 +5,7 @@ LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh +TEST_PROGS += test_generic_03.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 899875ff50fe..91fec3690d4b 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) dev->tgt.dev_size = dev_size; dev->tgt.params = (struct ublk_params) { - .types = UBLK_PARAM_TYPE_BASIC, + .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | + UBLK_PARAM_TYPE_SEGMENT, .basic = { .logical_bs_shift = 9, .physical_bs_shift = 12, @@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = info->max_io_buf_bytes >> 9, .dev_sectors = dev_size >> 9, }, + .dma = { + .alignment = 4095, + }, + .seg = { + .seg_boundary_mask = 4095, + .max_segment_size = 32 << 10, + .max_segments = 32, + }, }; if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh new file mode 100755 index 000000000000..b551aa76cb0d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_03.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_03" +ERR_CODE=0 + +_prep_test "null" "check dma & segment limits for zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +sysfs_path=/sys/block/ublkb"${dev_id}" +dma_align=$(cat "$sysfs_path"/queue/dma_alignment) +max_segments=$(cat "$sysfs_path"/queue/max_segments) +max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size) +if [ "$dma_align" != "4095" ]; then + ERR_CODE=255 +fi +if [ "$max_segments" != "32" ]; then + ERR_CODE=255 +fi +if [ "$max_segment_size" != "32768" ]; then + ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE -- 2.51.0 From dfbce8b798fb848a42706e2e544b78b3db22aaae Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:07 -0600 Subject: [PATCH 11/16] ublk: remove unused cmd argument to ublk_dispatch_req() ublk_dispatch_req() never uses its struct io_uring_cmd *cmd argument. Drop it so callers don't have to pass a value. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-2-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 355a59c78539..39efe443e235 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1185,7 +1185,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, } static void ublk_dispatch_req(struct ublk_queue *ubq, - struct io_uring_cmd *cmd, struct request *req, unsigned int issue_flags) { @@ -1273,7 +1272,7 @@ static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, struct request *req = blk_mq_tag_to_rq( ubq->dev->tag_set.tags[ubq->q_id], tag); - ublk_dispatch_req(ubq, cmd, req, issue_flags); + ublk_dispatch_req(ubq, req, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1292,11 +1291,9 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, struct request *next; while (rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; - next = rq->rq_next; rq->rq_next = NULL; - ublk_dispatch_req(ubq, io->cmd, rq, issue_flags); + ublk_dispatch_req(ubq, rq, issue_flags); rq = next; } } -- 2.51.0 From 9d7fa99189709b80eb16094aad18f7e492b835de Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:08 -0600 Subject: [PATCH 12/16] ublk: skip 1 NULL check in ublk_cmd_list_tw_cb() loop ublk_cmd_list_tw_cb() is always performed on a non-empty request list. So don't check whether rq is NULL on the first iteration of the loop, just on subsequent iterations. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-3-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 39efe443e235..8b9780c0feab 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1290,12 +1290,12 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, struct ublk_queue *ubq = rq->mq_hctx->driver_data; struct request *next; - while (rq) { + do { next = rq->rq_next; rq->rq_next = NULL; ublk_dispatch_req(ubq, rq, issue_flags); rq = next; - } + } while (rq); } static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) -- 2.51.0 From 6a87fc437a034e4be2a63d8dfd4d2985c6c574bc Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:09 -0600 Subject: [PATCH 13/16] ublk: get ubq from pdu in ublk_cmd_list_tw_cb() Save a few pointer dereferences by obtaining struct ublk_queue *ubq from the ublk_uring_cmd_pdu instead of the request's mq_hctx. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-4-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8b9780c0feab..9276d1fcc100 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1287,7 +1287,7 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct request *rq = pdu->req_list; - struct ublk_queue *ubq = rq->mq_hctx->driver_data; + struct ublk_queue *ubq = pdu->ubq; struct request *next; do { -- 2.51.0 From 108d8aecaeeb52f5fbe98ac94da534954db1da44 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:10 -0600 Subject: [PATCH 14/16] ublk: avoid redundant io->cmd in ublk_queue_cmd_list() ublk_queue_cmd_list() loads io->cmd twice. The intervening stores prevent the compiler from combining the loads. Since struct ublk_io *io is only used to compute io->cmd, replace the variable with io->cmd. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-5-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 9276d1fcc100..23250471562a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1301,12 +1301,12 @@ static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) { struct request *rq = rq_list_peek(l); - struct ublk_io *io = &ubq->ios[rq->tag]; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(io->cmd); + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); pdu->req_list = rq; rq_list_init(l); - io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_list_tw_cb); + io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb); } static enum blk_eh_timer_return ublk_timeout(struct request *rq) -- 2.51.0 From 00cfc05cf81f58b1bc2650e18228350a094b1f6d Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Fri, 28 Mar 2025 12:04:11 -0600 Subject: [PATCH 15/16] ublk: store req in ublk_uring_cmd_pdu for ublk_cmd_tw_cb() Pass struct request *rq to ublk_cmd_tw_cb() through ublk_uring_cmd_pdu, mirroring how it works for ublk_cmd_list_tw_cb(). This saves some pointer dereferences, as well as the bounds check in blk_mq_tag_to_rq(). Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250328180411.2696494-6-csander@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 23250471562a..466a23b89379 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -89,7 +89,10 @@ struct ublk_uring_cmd_pdu { * to avoid extra pre-allocation, and uring_cmd payload is always * free for us */ - struct request *req_list; + union { + struct request *req; + struct request *req_list; + }; /* * The following two are valid in this cmd whole lifetime, and @@ -1268,18 +1271,17 @@ static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - int tag = pdu->tag; - struct request *req = blk_mq_tag_to_rq( - ubq->dev->tag_set.tags[ubq->q_id], tag); - ublk_dispatch_req(ubq, req, issue_flags); + ublk_dispatch_req(ubq, pdu->req, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - io_uring_cmd_complete_in_task(io->cmd, ublk_cmd_tw_cb); + pdu->req = rq; + io_uring_cmd_complete_in_task(cmd, ublk_cmd_tw_cb); } static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, -- 2.51.0 From a20b8631c8885cda45a331a151d29a83dfbfdefb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 28 Mar 2025 23:10:54 +0000 Subject: [PATCH 16/16] io_uring/net: open code io_sendmsg_copy_hdr() io_sendmsg_setup() is trivial and io_sendmsg_copy_hdr() doesn't add any good abstraction, open code one into another. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/565318ce585665e88053663eeee5178d2c15692f.1743202294.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 228b4f13d34c..34d103f2469d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -325,25 +325,6 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, return 0; } -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL); - if (unlikely(ret)) - return ret; - - if (!(req->flags & REQ_F_BUFFER_SELECT)) - ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, - ITER_SOURCE); - /* save msg_control as sys_sendmsg() overwrites it */ - sr->msg_control = iomsg->msg.msg_control_user; - return ret; -} - void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) { struct io_async_msghdr *io = req->async_data; @@ -392,10 +373,19 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; + struct user_msghdr msg; + int ret; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); + if (unlikely(ret)) + return ret; + /* save msg_control as sys_sendmsg() overwrites it */ + sr->msg_control = kmsg->msg.msg_control_user; - return io_sendmsg_copy_hdr(req, kmsg); + if (req->flags & REQ_F_BUFFER_SELECT) + return 0; + return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) -- 2.51.0