]> www.infradead.org Git - nvme.git/commitdiff
io_uring/napi: use ktime in busy polling
authorPavel Begunkov <asml.silence@gmail.com>
Fri, 26 Jul 2024 14:24:30 +0000 (15:24 +0100)
committerJens Axboe <axboe@kernel.dk>
Fri, 26 Jul 2024 14:31:59 +0000 (08:31 -0600)
It's more natural to use ktime/ns instead of keeping around usec,
especially since we're comparing it against user provided timers,
so convert napi busy poll internal handling to ktime. It's also nicer
since the type (ktime_t vs unsigned long) now tells the unit of measure.

Keep everything as ktime, which we convert to/from micro seconds for
IORING_[UN]REGISTER_NAPI. The net/ busy polling works seems to work with
usec, however it's not real usec as shift by 10 is used to get it from
nsecs, see busy_loop_current_time(), so it's easy to get truncated nsec
back and we get back better precision.

Note, we can further improve it later by removing the truncation and
maybe convincing net/ to use ktime/ns instead.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/95e7ec8d095069a3ed5d40a4bc6f8b586698bc7e.1722003776.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h
io_uring/io_uring.h
io_uring/napi.c
io_uring/napi.h

index e62aa9f0629f9ad1ab33eedde66e469f1dfbf610..3315005df117b8b1c5ddb1fab39db6c77bd7af81 100644 (file)
@@ -404,7 +404,7 @@ struct io_ring_ctx {
        spinlock_t              napi_lock;      /* napi_list lock */
 
        /* napi busy poll default timeout */
-       unsigned int            napi_busy_poll_to;
+       ktime_t                 napi_busy_poll_dt;
        bool                    napi_prefer_busy_poll;
        bool                    napi_enabled;
 
index e1ce908f067992f21db86c45753aaafccf7e6be0..c2acf6180845db760c3ae4917ff8227302139e95 100644 (file)
@@ -43,7 +43,7 @@ struct io_wait_queue {
        ktime_t timeout;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
-       unsigned int napi_busy_poll_to;
+       ktime_t napi_busy_poll_dt;
        bool napi_prefer_busy_poll;
 #endif
 };
index 327e5f3a8abe01c481440081307a4f7d206b1da3..6bdb267e9c33c1d5239ac5450c779f41975c0d8e 100644 (file)
@@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
        return NULL;
 }
 
+static inline ktime_t net_to_ktime(unsigned long t)
+{
+       /* napi approximating usecs, reverse busy_loop_current_time */
+       return ns_to_ktime(t << 10);
+}
+
 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
 {
        struct hlist_head *hash_list;
@@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
                __io_napi_remove_stale(ctx);
 }
 
-static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
-                                            unsigned long bp_usec)
+static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
+                                            ktime_t bp)
 {
-       if (bp_usec) {
-               unsigned long end_time = start_time + bp_usec;
-               unsigned long now = busy_loop_current_time();
+       if (bp) {
+               ktime_t end_time = ktime_add(start_time, bp);
+               ktime_t now = net_to_ktime(busy_loop_current_time());
 
-               return time_after(now, end_time);
+               return ktime_after(now, end_time);
        }
 
        return true;
@@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data,
                return true;
        if (io_should_wake(iowq) || io_has_work(iowq->ctx))
                return true;
-       if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
+       if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
+                                     iowq->napi_busy_poll_dt))
                return true;
 
        return false;
@@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
  */
 void io_napi_init(struct io_ring_ctx *ctx)
 {
+       u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
+
        INIT_LIST_HEAD(&ctx->napi_list);
        spin_lock_init(&ctx->napi_lock);
        ctx->napi_prefer_busy_poll = false;
-       ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+       ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
 }
 
 /*
@@ -217,7 +226,7 @@ void io_napi_free(struct io_ring_ctx *ctx)
 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 {
        const struct io_uring_napi curr = {
-               .busy_poll_to     = ctx->napi_busy_poll_to,
+               .busy_poll_to     = ktime_to_us(ctx->napi_busy_poll_dt),
                .prefer_busy_poll = ctx->napi_prefer_busy_poll
        };
        struct io_uring_napi napi;
@@ -232,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
        if (copy_to_user(arg, &curr, sizeof(curr)))
                return -EFAULT;
 
-       WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
+       WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
        WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
        WRITE_ONCE(ctx->napi_enabled, true);
        return 0;
@@ -249,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 {
        const struct io_uring_napi curr = {
-               .busy_poll_to     = ctx->napi_busy_poll_to,
+               .busy_poll_to     = ktime_to_us(ctx->napi_busy_poll_dt),
                .prefer_busy_poll = ctx->napi_prefer_busy_poll
        };
 
        if (arg && copy_to_user(arg, &curr, sizeof(curr)))
                return -EFAULT;
 
-       WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+       WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
        WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
        WRITE_ONCE(ctx->napi_enabled, false);
        return 0;
@@ -275,23 +284,20 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
                              struct timespec64 *ts)
 {
-       unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
+       ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
 
        if (ts) {
                struct timespec64 poll_to_ts;
 
-               poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
+               poll_to_ts = ns_to_timespec64(ktime_to_ns(poll_dt));
                if (timespec64_compare(ts, &poll_to_ts) < 0) {
                        s64 poll_to_ns = timespec64_to_ns(ts);
-                       if (poll_to_ns > 0) {
-                               u64 val = poll_to_ns + 999;
-                               do_div(val, 1000);
-                               poll_to = val;
-                       }
+                       if (poll_to_ns > 0)
+                               poll_dt = ns_to_ktime(poll_to_ns);
                }
        }
 
-       iowq->napi_busy_poll_to = poll_to;
+       iowq->napi_busy_poll_dt = poll_dt;
 }
 
 /*
@@ -320,7 +326,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
        LIST_HEAD(napi_list);
        bool is_stale = false;
 
-       if (!READ_ONCE(ctx->napi_busy_poll_to))
+       if (!READ_ONCE(ctx->napi_busy_poll_dt))
                return 0;
        if (list_empty_careful(&ctx->napi_list))
                return 0;
index 6fc0393d0dbef799fa6eded131876d6c5c29625b..babbee36cd3eb6ff45b4fe9dc18f9626613fb1a3 100644 (file)
@@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req)
        struct io_ring_ctx *ctx = req->ctx;
        struct socket *sock;
 
-       if (!READ_ONCE(ctx->napi_busy_poll_to))
+       if (!READ_ONCE(ctx->napi_busy_poll_dt))
                return;
 
        sock = sock_from_file(req->file);