static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
{
+ struct io_uring_region_desc rd;
struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
size_t size, sq_array_offset;
+ unsigned i, tail, old_head;
struct io_uring_params p;
- void *ptr;
- unsigned i, tail;
int ret;
/* for single issuer, must be owner resizing */
if (size == SIZE_MAX)
return -EOVERFLOW;
- if (!(p.flags & IORING_SETUP_NO_MMAP))
- n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
- else
- n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
- p.cq_off.user_addr, size);
- if (IS_ERR(n.rings))
- return PTR_ERR(n.rings);
+ memset(&rd, 0, sizeof(rd));
+ rd.size = PAGE_ALIGN(size);
+ if (p.flags & IORING_SETUP_NO_MMAP) {
+ rd.user_addr = p.cq_off.user_addr;
+ rd.flags |= IORING_MEM_REGION_TYPE_USER;
+ }
+ ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
+ if (ret) {
+ io_register_free_rings(ctx, &p, &n);
+ return ret;
+ }
+ n.rings = io_region_get_ptr(&n.ring_region);
- n.rings->sq_ring_mask = p.sq_entries - 1;
- n.rings->cq_ring_mask = p.cq_entries - 1;
- n.rings->sq_ring_entries = p.sq_entries;
- n.rings->cq_ring_entries = p.cq_entries;
+ /*
+ * At this point n.rings is shared with userspace, just like o.rings
+ * is as well. While we don't expect userspace to modify it while
+ * a resize is in progress, and it's most likely that userspace will
+ * shoot itself in the foot if it does, we can't always assume good
+ * intent... Use read/write once helpers from here on to indicate the
+ * shared nature of it.
+ */
+ WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1);
+ WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1);
+ WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries);
+ WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries);
if (copy_to_user(arg, &p, sizeof(p))) {
- io_register_free_rings(&p, &n);
+ io_register_free_rings(ctx, &p, &n);
return -EFAULT;
}
* Now copy SQ and CQ entries, if any. If either of the destination
* rings can't hold what is already there, then fail the operation.
*/
- n.sq_sqes = ptr;
- tail = o.rings->sq.tail;
- if (tail - o.rings->sq.head > p.sq_entries)
+ tail = READ_ONCE(o.rings->sq.tail);
+ old_head = READ_ONCE(o.rings->sq.head);
+ if (tail - old_head > p.sq_entries)
goto overflow;
- for (i = o.rings->sq.head; i < tail; i++) {
+ for (i = old_head; i < tail; i++) {
unsigned src_head = i & (ctx->sq_entries - 1);
- unsigned dst_head = i & n.rings->sq_ring_mask;
+ unsigned dst_head = i & (p.sq_entries - 1);
n.sq_sqes[dst_head] = o.sq_sqes[src_head];
}
- WRITE_ONCE(n.rings->sq.head, READ_ONCE(o.rings->sq.head));
- WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail));
- n.rings->sq.head = o.rings->sq.head;
- n.rings->sq.tail = o.rings->sq.tail;
++ WRITE_ONCE(n.rings->sq.head, old_head);
++ WRITE_ONCE(n.rings->sq.tail, tail);
- tail = o.rings->cq.tail;
- if (tail - o.rings->cq.head > p.cq_entries) {
+ tail = READ_ONCE(o.rings->cq.tail);
+ old_head = READ_ONCE(o.rings->cq.head);
+ if (tail - old_head > p.cq_entries) {
overflow:
/* restore old rings, and return -EOVERFLOW via cleanup path */
ctx->rings = o.rings;
n.rings->cqes[dst_head] = o.rings->cqes[src_head];
}
- WRITE_ONCE(n.rings->cq.head, READ_ONCE(o.rings->cq.head));
- WRITE_ONCE(n.rings->cq.tail, READ_ONCE(o.rings->cq.tail));
- n.rings->cq.head = o.rings->cq.head;
- n.rings->cq.tail = o.rings->cq.tail;
++ WRITE_ONCE(n.rings->cq.head, old_head);
++ WRITE_ONCE(n.rings->cq.tail, tail);
/* invalidate cached cqe refill */
ctx->cqe_cached = ctx->cqe_sentinel = NULL;