]> www.infradead.org Git - users/hch/block.git/commitdiff
fs,io_uring: add infrastructure for uring-cmd
authorJens Axboe <axboe@kernel.dk>
Thu, 5 May 2022 06:06:12 +0000 (11:36 +0530)
committerJens Axboe <axboe@kernel.dk>
Thu, 5 May 2022 23:32:55 +0000 (17:32 -0600)
file_operations->uring_cmd is a file private handler.
This is somewhat similar to ioctl but hopefully a lot more sane and
useful as it can be used to enable many io_uring capabilities for the
underlying operation.

IORING_OP_URING_CMD is a file private kind of request. io_uring doesn't
know what is in this command type, it's for the provider of ->uring_cmd()
to deal with. This operation can be issued only on the ring that is
setup with both IORING_SETUP_SQE128 and IORING_SETUP_CQE32 flags.

Co-developed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20220505060616.803816-2-joshi.k@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/linux/fs.h
include/linux/io_uring.h
include/uapi/linux/io_uring.h

index 20cfc569c4d9a95681f0c82e443765a61c73e8b1..884f40f5153673d88a423f3a690a31709abde4d8 100644 (file)
@@ -202,13 +202,6 @@ struct io_rings {
        struct io_uring_cqe     cqes[] ____cacheline_aligned_in_smp;
 };
 
-enum io_uring_cmd_flags {
-       IO_URING_F_COMPLETE_DEFER       = 1,
-       IO_URING_F_UNLOCKED             = 2,
-       /* int's last bit, sign checks are usually faster than a bit test */
-       IO_URING_F_NONBLOCK             = INT_MIN,
-};
-
 struct io_mapped_ubuf {
        u64             ubuf;
        u64             ubuf_end;
@@ -972,6 +965,7 @@ struct io_kiocb {
                struct io_xattr         xattr;
                struct io_socket        sock;
                struct io_nop           nop;
+               struct io_uring_cmd     uring_cmd;
        };
 
        u8                              opcode;
@@ -1289,6 +1283,12 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_SOCKET] = {
                .audit_skip             = 1,
        },
+       [IORING_OP_URING_CMD] = {
+               .needs_file             = 1,
+               .plug                   = 1,
+               .async_size             = 2 * sizeof(struct io_uring_sqe) -
+                                         offsetof(struct io_uring_sqe, cmd),
+       },
 };
 
 /* requests with any of those set should undergo io_disarm_next() */
@@ -1428,6 +1428,8 @@ const char *io_uring_get_opcode(u8 opcode)
                return "GETXATTR";
        case IORING_OP_SOCKET:
                return "SOCKET";
+       case IORING_OP_URING_CMD:
+               return "URING_CMD";
        case IORING_OP_LAST:
                return "INVALID";
        }
@@ -4910,6 +4912,91 @@ static int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
        return 0;
 }
 
+static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
+{
+       req->uring_cmd.task_work_cb(&req->uring_cmd);
+}
+
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
+       req->uring_cmd.task_work_cb = task_work_cb;
+       req->io_task_work.func = io_uring_cmd_work;
+       io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+
+/*
+ * Called by consumers of io_uring_cmd, if they originally returned
+ * -EIOCBQUEUED upon receiving the command.
+ */
+void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2)
+{
+       struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+
+       if (ret < 0)
+               req_set_fail(req);
+       __io_req_complete32(req, 0, ret, 0, res2, 0);
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_done);
+
+static int io_uring_cmd_prep_async(struct io_kiocb *req)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+       struct io_ring_ctx *ctx = req->ctx;
+       size_t cmd_size = sizeof(struct io_uring_sqe) -
+                               offsetof(struct io_uring_sqe, cmd);
+
+       if (ctx->flags & IORING_SETUP_SQE128)
+               cmd_size += sizeof(struct io_uring_sqe);
+
+       memcpy(req->async_data, ioucmd->cmd, cmd_size);
+       return 0;
+}
+
+static int io_uring_cmd_prep(struct io_kiocb *req,
+                            const struct io_uring_sqe *sqe)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+
+       if (sqe->ioprio || sqe->rw_flags)
+               return -EINVAL;
+       ioucmd->cmd = (void *) sqe->cmd;
+       ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+       return 0;
+}
+
+static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
+{
+       struct io_uring_cmd *ioucmd = &req->uring_cmd;
+       struct io_ring_ctx *ctx = req->ctx;
+       struct file *file = req->file;
+       int ret;
+
+       if (!req->file->f_op->uring_cmd)
+               return -EOPNOTSUPP;
+
+       if (ctx->flags & IORING_SETUP_SQE128)
+               issue_flags |= IO_URING_F_SQE128;
+       if (ctx->flags & IORING_SETUP_CQE32)
+               issue_flags |= IO_URING_F_CQE32;
+       if (ctx->flags & IORING_SETUP_IOPOLL)
+               issue_flags |= IO_URING_F_IOPOLL;
+
+       if (req_has_async_data(req))
+               ioucmd->cmd = req->async_data;
+
+       ret = file->f_op->uring_cmd(ioucmd, issue_flags);
+       if (ret == -EAGAIN)
+               return -EAGAIN;
+       else if (ret != -EIOCBQUEUED)
+               io_uring_cmd_done(ioucmd, ret, 0);
+
+       return 0;
+}
+
 static int io_shutdown_prep(struct io_kiocb *req,
                            const struct io_uring_sqe *sqe)
 {
@@ -7756,6 +7843,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return io_getxattr_prep(req, sqe);
        case IORING_OP_SOCKET:
                return io_socket_prep(req, sqe);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep(req, sqe);
        }
 
        printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -7783,6 +7872,8 @@ static int io_req_prep_async(struct io_kiocb *req)
                return io_recvmsg_prep_async(req);
        case IORING_OP_CONNECT:
                return io_connect_prep_async(req);
+       case IORING_OP_URING_CMD:
+               return io_uring_cmd_prep_async(req);
        }
        printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n",
                    req->opcode);
@@ -8077,6 +8168,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
        case IORING_OP_SOCKET:
                ret = io_socket(req, issue_flags);
                break;
+       case IORING_OP_URING_CMD:
+               ret = io_uring_cmd(req, issue_flags);
+               break;
        default:
                ret = -EINVAL;
                break;
@@ -12695,6 +12789,8 @@ static int __init io_uring_init(void)
 
        BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));
 
+       BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64);
+
        req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
                                SLAB_ACCOUNT);
        return 0;
index bbde95387a23af8daf91f0926069a2090c35bd0f..87b5af1d9fbe037dbcbe404547fac061544c3abb 100644 (file)
@@ -1953,6 +1953,7 @@ struct dir_context {
 #define REMAP_FILE_ADVISORY            (REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
+struct io_uring_cmd;
 
 struct file_operations {
        struct module *owner;
@@ -1995,6 +1996,7 @@ struct file_operations {
                                   struct file *file_out, loff_t pos_out,
                                   loff_t len, unsigned int remap_flags);
        int (*fadvise)(struct file *, loff_t, loff_t, int);
+       int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
 } __randomize_layout;
 
 struct inode_operations {
index 24651c229ed290f8df107be3d0f059556dd17208..b892b038de217e1327ada53d0e39652a483ac767 100644 (file)
@@ -5,7 +5,32 @@
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
+enum io_uring_cmd_flags {
+       IO_URING_F_COMPLETE_DEFER       = 1,
+       IO_URING_F_UNLOCKED             = 2,
+       /* int's last bit, sign checks are usually faster than a bit test */
+       IO_URING_F_NONBLOCK             = INT_MIN,
+
+       /* ctx state flags, for URING_CMD */
+       IO_URING_F_SQE128               = 4,
+       IO_URING_F_CQE32                = 8,
+       IO_URING_F_IOPOLL               = 16,
+};
+
+struct io_uring_cmd {
+       struct file     *file;
+       void            *cmd;
+       /* callback to defer completions to task context */
+       void (*task_work_cb)(struct io_uring_cmd *cmd);
+       u32             cmd_op;
+       u32             pad;
+       u8              pdu[32]; /* available inline for free use */
+};
+
 #if defined(CONFIG_IO_URING)
+void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
+void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *));
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
@@ -30,6 +55,14 @@ static inline void io_uring_free(struct task_struct *tsk)
                __io_uring_free(tsk);
 }
 #else
+static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
+               ssize_t ret2)
+{
+}
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+                       void (*task_work_cb)(struct io_uring_cmd *))
+{
+}
 static inline struct sock *io_uring_get_socket(struct file *file)
 {
        return NULL;
index ac2d90d669c36a985f7056460ee0958b7cd30cf8..23618be55dd2e6d7847f216541a66e3334d410a5 100644 (file)
@@ -22,6 +22,7 @@ struct io_uring_sqe {
        union {
                __u64   off;    /* offset into file */
                __u64   addr2;
+               __u32   cmd_op;
        };
        union {
                __u64   addr;   /* pointer to buffer or iovecs */
@@ -61,14 +62,17 @@ struct io_uring_sqe {
                __s32   splice_fd_in;
                __u32   file_index;
        };
-       __u64   addr3;
-       __u64   __pad2[1];
-
-       /*
-        * If the ring is initialized with IORING_SETUP_SQE128, then this field
-        * contains 64-bytes of padding, doubling the size of the SQE.
-        */
-       __u64   __big_sqe_pad[0];
+       union {
+               struct {
+                       __u64   addr3;
+                       __u64   __pad2[1];
+               };
+               /*
+                * If the ring is initialized with IORING_SETUP_SQE128, then
+                * this field is used for 80 bytes of arbitrary command data
+                */
+               __u8    cmd[0];
+       };
 };
 
 enum {
@@ -175,6 +179,7 @@ enum io_uring_op {
        IORING_OP_FGETXATTR,
        IORING_OP_GETXATTR,
        IORING_OP_SOCKET,
+       IORING_OP_URING_CMD,
 
        /* this goes last, obviously */
        IORING_OP_LAST,