From: Linus Torvalds Date: Mon, 23 May 2022 20:06:15 +0000 (-0700) Subject: Merge tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux... X-Git-Tag: v5.19-rc1~247 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=9836e93c0a7e031ac6a71c56171c229de1eea7cf;p=users%2Fhch%2Fxfs.git Merge tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block Pull io_uring NVMe command passthrough from Jens Axboe: "On top of everything else, this adds support for passthrough for io_uring. The initial feature for this is NVMe passthrough support, which allows non-filesystem based IO commands and admin commands. To support this, io_uring grows support for SQE and CQE members that are twice as big, allowing to pass in a full NVMe command without having to copy data around. And to complete with more than just a single 32-bit value as the output" * tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block: (22 commits) io_uring: cleanup handling of the two task_work lists nvme: enable uring-passthrough for admin commands nvme: helper for uring-passthrough checks blk-mq: fix passthrough plugging nvme: add vectored-io support for uring-cmd nvme: wire-up uring-cmd support for io-passthru on char-device. nvme: refactor nvme_submit_user_cmd() block: wire-up support for passthrough plugging fs,io_uring: add infrastructure for uring-cmd io_uring: support CQE32 for nop operation io_uring: enable CQE32 io_uring: support CQE32 in /proc info io_uring: add tracing for additional CQE32 fields io_uring: overflow processing for CQE32 io_uring: flush completions for CQE32 io_uring: modify io_get_cqe for CQE32 io_uring: add CQE32 completion processing io_uring: add CQE32 setup processing io_uring: change ring size calculation for CQE32 io_uring: store add. return values for CQE32 ... --- 9836e93c0a7e031ac6a71c56171c229de1eea7cf diff --cc fs/io_uring.c index 1015dd49e7e5,c5a476e6c068..9f1c682d7caf --- a/fs/io_uring.c +++ b/fs/io_uring.c @@@ -2462,10 -2541,21 +2597,23 @@@ static inline void __io_req_complete(st io_req_complete_post(req, res, cflags); } + static inline void __io_req_complete32(struct io_kiocb *req, + unsigned int issue_flags, s32 res, + u32 cflags, u64 extra1, u64 extra2) + { + if (issue_flags & IO_URING_F_COMPLETE_DEFER) { + io_req_complete_state(req, res, cflags); + req->extra1 = extra1; + req->extra2 = extra2; + } else { + io_req_complete_post32(req, res, cflags, extra1, extra2); + } + } + static inline void io_req_complete(struct io_kiocb *req, s32 res) { + if (res < 0) + req_set_fail(req); __io_req_complete(req, 0, res, 0); } @@@ -4997,17 -5169,11 +5256,23 @@@ static int io_nop_prep(struct io_kiocb */ static int io_nop(struct io_kiocb *req, unsigned int issue_flags) { ++ unsigned int cflags; + void __user *buf; + + if (req->flags & REQ_F_BUFFER_SELECT) { + size_t len = 1; + + buf = io_buffer_select(req, &len, issue_flags); + if (!buf) + return -ENOBUFS; + } + - __io_req_complete(req, issue_flags, 0, io_put_kbuf(req, issue_flags)); ++ cflags = io_put_kbuf(req, issue_flags); + if (!(req->ctx->flags & IORING_SETUP_CQE32)) - __io_req_complete(req, issue_flags, 0, 0); ++ __io_req_complete(req, issue_flags, 0, cflags); + else - __io_req_complete32(req, issue_flags, 0, 0, req->nop.extra1, - req->nop.extra2); ++ __io_req_complete32(req, issue_flags, 0, cflags, ++ req->nop.extra1, req->nop.extra2); return 0; } @@@ -6750,11 -6776,10 +7013,11 @@@ static void __io_poll_execute(struct io req->io_task_work.func = io_apoll_task_func; trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask); - io_req_task_work_add(req, false); + io_req_task_work_add(req); } -static inline void io_poll_execute(struct io_kiocb *req, int res, int events) +static inline void io_poll_execute(struct io_kiocb *req, int res, + __poll_t events) { if (io_poll_get_ownership(req)) __io_poll_execute(req, res, events); diff --cc include/uapi/linux/io_uring.h index cc9544629eee,23618be55dd2..53e7dae92e42 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@@ -61,19 -62,19 +62,28 @@@ struct io_uring_sqe __s32 splice_fd_in; __u32 file_index; }; - __u64 addr3; - __u64 __pad2[1]; + union { + struct { + __u64 addr3; + __u64 __pad2[1]; + }; + /* + * If the ring is initialized with IORING_SETUP_SQE128, then + * this field is used for 80 bytes of arbitrary command data + */ + __u8 cmd[0]; + }; }; +/* + * If sqe->file_index is set to this for opcodes that instantiate a new + * direct descriptor (like openat/openat2/accept), then io_uring will allocate + * an available direct descriptor instead of having the application pass one + * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE + * if the space is full. + */ +#define IORING_FILE_INDEX_ALLOC (~0U) + enum { IOSQE_FIXED_FILE_BIT, IOSQE_IO_DRAIN_BIT,