From: Pavel Begunkov Date: Fri, 18 Apr 2025 12:02:27 +0000 (+0100) Subject: io_uring/zcrx: fix late dma unmap for a dead dev X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=f12ecf5e1c5eca48b8652e893afcdb730384a6aa;p=nvme.git io_uring/zcrx: fix late dma unmap for a dead dev There is a problem with page pools not dma-unmapping immediately when the device is going down, and delaying it until the page pool is destroyed, which is not allowed (see links). That just got fixed for normal page pools, and we need to address memory providers as well. Unmap pages in the memory provider uninstall callback, and protect it with a new lock. There is also a gap between when a dma mapping is created and the mp is installed, so if the device is killed in between, io_uring would be holding on to dma mappings to a dead device with no one to call ->uninstall. Move it to page pool init and rely on ->is_mapped to make sure it's only done once. Link: https://lore.kernel.org/lkml/8067f204-1380-4d37-8ffd-007fc6f26738@kernel.org/T/ Link: https://lore.kernel.org/all/20250409-page-pool-track-dma-v9-0-6a9ef2e0cba8@redhat.com/ Fixes: 34a3e60821ab9 ("io_uring/zcrx: implement zerocopy receive pp memory provider") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ef9b7db249b14f6e0b570a1bb77ff177389f881c.1744965853.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 5defbe8f95f9..fe86606b9f30 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -51,14 +51,21 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs); + area->is_mapped = false; } static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { int i; + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) + return 0; + for (i = 0; i < area->nia.num_niovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; dma_addr_t dma; @@ -280,6 +287,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) ifq->ctx = ctx; spin_lock_init(&ifq->lock); spin_lock_init(&ifq->rq_lock); + mutex_init(&ifq->dma_lock); return ifq; } @@ -329,6 +337,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) put_device(ifq->dev); io_free_rbuf_ring(ifq); + mutex_destroy(&ifq->dma_lock); kfree(ifq); } @@ -400,10 +409,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; get_device(ifq->dev); - ret = io_zcrx_map_area(ifq, ifq->area); - if (ret) - goto err; - mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param); @@ -624,6 +629,7 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); + int ret; if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -636,6 +642,10 @@ static int io_pp_zc_init(struct page_pool *pp) if (pp->p.dma_dir != DMA_FROM_DEVICE) return -EOPNOTSUPP; + ret = io_zcrx_map_area(ifq, ifq->area); + if (ret) + return ret; + percpu_ref_get(&ifq->ctx->refs); return 0; } @@ -671,6 +681,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq) struct io_zcrx_ifq *ifq = mp_priv; io_zcrx_drop_netdev(ifq); + if (ifq->area) + io_zcrx_unmap_area(ifq, ifq->area); + p->mp_ops = NULL; p->mp_priv = NULL; } diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 47f1c0e8c197..f2bc811f022c 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -38,6 +38,7 @@ struct io_zcrx_ifq { struct net_device *netdev; netdevice_tracker netdev_tracker; spinlock_t lock; + struct mutex dma_lock; }; #if defined(CONFIG_IO_URING_ZCRX)