]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
brd: use page reference to protect page lifetime
authorYu Kuai <yukuai3@huawei.com>
Mon, 11 Aug 2025 06:56:28 +0000 (14:56 +0800)
committerJens Axboe <axboe@kernel.dk>
Mon, 1 Sep 2025 14:37:29 +0000 (08:37 -0600)
As discussed [1], hold rcu for copying data from/to page is too heavy,
it's better to protect page with rcu around for page lookup and then
grab a reference to prevent page to be freed by discard.

[1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250811065628.1829339-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/block/brd.c

index 0c2eabe14af3b923c1680b72c222b0b5f8feafeb..9778259b30d4aa3eea63f3204156ef779f3f0e9a 100644 (file)
@@ -44,45 +44,74 @@ struct brd_device {
 };
 
 /*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's page with reference grabbed for a given sector.
  */
 static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
 {
-       return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+       struct page *page;
+       XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+
+       rcu_read_lock();
+repeat:
+       page = xas_load(&xas);
+       if (xas_retry(&xas, page)) {
+               xas_reset(&xas);
+               goto repeat;
+       }
+
+       if (!page)
+               goto out;
+
+       if (!get_page_unless_zero(page)) {
+               xas_reset(&xas);
+               goto repeat;
+       }
+
+       if (unlikely(page != xas_reload(&xas))) {
+               put_page(page);
+               xas_reset(&xas);
+               goto repeat;
+       }
+out:
+       rcu_read_unlock();
+
+       return page;
 }
 
 /*
  * Insert a new page for a given sector, if one does not already exist.
+ * The returned page will grab reference.
  */
 static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
                blk_opf_t opf)
-       __releases(rcu)
-       __acquires(rcu)
 {
        gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
        struct page *page, *ret;
 
-       rcu_read_unlock();
        page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
-       if (!page) {
-               rcu_read_lock();
+       if (!page)
                return ERR_PTR(-ENOMEM);
-       }
 
        xa_lock(&brd->brd_pages);
        ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
                        page, gfp);
-       rcu_read_lock();
-       if (ret) {
+       if (!ret) {
+               brd->brd_nr_pages++;
+               get_page(page);
+               xa_unlock(&brd->brd_pages);
+               return page;
+       }
+
+       if (!xa_is_err(ret)) {
+               get_page(ret);
                xa_unlock(&brd->brd_pages);
-               __free_page(page);
-               if (xa_is_err(ret))
-                       return ERR_PTR(xa_err(ret));
+               put_page(page);
                return ret;
        }
-       brd->brd_nr_pages++;
+
        xa_unlock(&brd->brd_pages);
-       return page;
+       put_page(page);
+       return ERR_PTR(xa_err(ret));
 }
 
 /*
@@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd)
        pgoff_t idx;
 
        xa_for_each(&brd->brd_pages, idx, page) {
-               __free_page(page);
+               put_page(page);
                cond_resched();
        }
 
@@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
 
        bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
-       rcu_read_lock();
        page = brd_lookup_page(brd, sector);
        if (!page && op_is_write(opf)) {
                page = brd_insert_page(brd, sector, opf);
@@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
                        memset(kaddr, 0, bv.bv_len);
        }
        kunmap_local(kaddr);
-       rcu_read_unlock();
 
        bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+       if (page)
+               put_page(page);
        return true;
 
 out_error:
-       rcu_read_unlock();
        if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
                bio_wouldblock_error(bio);
        else
@@ -149,13 +177,6 @@ out_error:
        return false;
 }
 
-static void brd_free_one_page(struct rcu_head *head)
-{
-       struct page *page = container_of(head, struct page, rcu_head);
-
-       __free_page(page);
-}
-
 static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
 {
        sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
@@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
        while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
                page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
                if (page) {
-                       call_rcu(&page->rcu_head, brd_free_one_page);
+                       put_page(page);
                        brd->brd_nr_pages--;
                }
                aligned_sector += PAGE_SECTORS;