From bbcacab2e8ee373eb8f4bc613912e7c203deb820 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 May 2025 08:06:35 +0200 Subject: [PATCH] brd: avoid extra xarray lookups on first write The xarray can return the previous entry at a location. Use this fact to simplify the brd code when there is no existing page at a location. This also slighly improves the handling of racy discards as we now always have a page under RCU protection by the time we are ready to copy the data. Signed-off-by: Christoph Hellwig Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250507060700.3929430-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/brd.c | 76 ++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a3725673cf16..b1be6c510372 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -54,32 +54,33 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) /* * Insert a new page for a given sector, if one does not already exist. */ -static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) +static struct page *brd_insert_page(struct brd_device *brd, sector_t sector, + blk_opf_t opf) + __releases(rcu) + __acquires(rcu) { - pgoff_t idx = sector >> PAGE_SECTORS_SHIFT; - struct page *page; - int ret = 0; - - page = brd_lookup_page(brd, sector); - if (page) - return 0; + gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO; + struct page *page, *ret; + rcu_read_unlock(); page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); + rcu_read_lock(); if (!page) - return -ENOMEM; + return ERR_PTR(-ENOMEM); xa_lock(&brd->brd_pages); - ret = __xa_insert(&brd->brd_pages, idx, page, gfp); - if (!ret) - brd->brd_nr_pages++; - xa_unlock(&brd->brd_pages); - - if (ret < 0) { + ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL, + page, gfp); + if (ret) { + xa_unlock(&brd->brd_pages); __free_page(page); - if (ret == -EBUSY) - ret = 0; + if (xa_is_err(ret)) + return ERR_PTR(xa_err(ret)); + return ret; } - return ret; + brd->brd_nr_pages++; + xa_unlock(&brd->brd_pages); + return page; } /* @@ -114,36 +115,17 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset); - if (op_is_write(opf)) { - int err; - - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - err = brd_insert_page(brd, sector, - (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO); - if (err) { - if (err == -ENOMEM && (opf & REQ_NOWAIT)) - bio_wouldblock_error(bio); - else - bio_io_error(bio); - return false; - } - } - rcu_read_lock(); page = brd_lookup_page(brd, sector); + if (!page && op_is_write(opf)) { + page = brd_insert_page(brd, sector, opf); + if (IS_ERR(page)) + goto out_error; + } kaddr = bvec_kmap_local(&bv); if (op_is_write(opf)) { - /* - * Page can be removed by concurrent discard, it's fine to skip - * the write and user will read zero data if page does not - * exist. - */ - if (page) - memcpy_to_page(page, offset, kaddr, bv.bv_len); + memcpy_to_page(page, offset, kaddr, bv.bv_len); } else { if (page) memcpy_from_page(kaddr, page, offset, bv.bv_len); @@ -155,6 +137,14 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio) bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len); return true; + +out_error: + rcu_read_unlock(); + if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT)) + bio_wouldblock_error(bio); + else + bio_io_error(bio); + return false; } static void brd_free_one_page(struct rcu_head *head) -- 2.50.1