]> www.infradead.org Git - users/hch/misc.git/commitdiff
io_uring/rsrc: send exact nr_segs for fixed buffer
authorNitesh Shetty <nj.shetty@samsung.com>
Thu, 17 Apr 2025 09:32:34 +0000 (10:32 +0100)
committerJens Axboe <axboe@kernel.dk>
Thu, 17 Apr 2025 13:42:14 +0000 (07:42 -0600)
Sending exact nr_segs, avoids bio split check and processing in
block layer, which takes around 5%[1] of overall CPU utilization.

In our setup, we see overall improvement of IOPS from 7.15M to 7.65M [2]
and 5% less CPU utilization.

[1]
     3.52%  io_uring         [kernel.kallsyms]     [k] bio_split_rw_at
     1.42%  io_uring         [kernel.kallsyms]     [k] bio_split_rw
     0.62%  io_uring         [kernel.kallsyms]     [k] bio_submit_split

[2]
sudo taskset -c 0,1 ./t/io_uring -b512 -d128 -c32 -s32 -p1 -F1 -B1 -n2
-r4 /dev/nvme0n1 /dev/nvme1n1

Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
[Pavel: fixed for kbuf, rebased and reworked on top of cleanups]
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/7a1a49a8d053bd617c244291d63dbfbc07afde36.1744882081.git.asml.silence@gmail.com
[axboe: fold in fix factoring in buf reg offset]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
io_uring/rsrc.c

index 5cf854318b1dd07659887b52e1de988c132bfbad..0c6d7e7415c8b7e32762d3a0d49eaccf76f83647 100644 (file)
@@ -1037,6 +1037,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
                           u64 buf_addr, size_t len)
 {
        const struct bio_vec *bvec;
+       size_t folio_mask;
        unsigned nr_segs;
        size_t offset;
        int ret;
@@ -1067,6 +1068,7 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
         * 2) all bvecs are the same in size, except potentially the
         *    first and last bvec
         */
+       folio_mask = (1UL << imu->folio_shift) - 1;
        bvec = imu->bvec;
        if (offset >= bvec->bv_len) {
                unsigned long seg_skip;
@@ -1075,10 +1077,9 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
                offset -= bvec->bv_len;
                seg_skip = 1 + (offset >> imu->folio_shift);
                bvec += seg_skip;
-               offset &= (1UL << imu->folio_shift) - 1;
+               offset &= folio_mask;
        }
-
-       nr_segs = imu->nr_bvecs - (bvec - imu->bvec);
+       nr_segs = (offset + len + bvec->bv_offset + folio_mask) >> imu->folio_shift;
        iov_iter_bvec(iter, ddir, bvec, nr_segs, len);
        iter->iov_offset = offset;
        return 0;