]> www.infradead.org Git - users/hch/block.git/commitdiff
blk-mq: add scatterlist-less DMA mapping helpers
authorChristoph Hellwig <hch@lst.de>
Sat, 5 Oct 2024 05:46:26 +0000 (07:46 +0200)
committerChristoph Hellwig <hch@lst.de>
Sat, 5 Oct 2024 17:19:34 +0000 (19:19 +0200)
Add a new blk_rq_dma_map / blk_rq_dma_unmap pair that does away with
the wasteful scatterlist structure.  Instead it uses the mapping iterator
to either add segments to the IOVA for IOMMU operations, or just maps
them one by one for the direct mapping.  For the IOMMU case instead of
a scatterlist with an entry for each segment, only a single [dma_addr,len]
pair needs to be stored for processing a request, and for the direct
mapping the per-segment allocation shrinks from
[page,offset,len,dma_addr,dma_len] to just [dma_addr,len].

The major downѕide of this API is that the IOVA collapsing only works
when the driver sets a virt_boundary that matches the IOMMU granule.

Note that struct blk_dma_vec, struct blk_dma_mapping and blk_rq_dma_unmap
aren't really block specific, but for they are kept with the only mapping
routine to keep things simple.

Signed-off-by: Christoph Hellwig <hch@lst.de>
block/blk-merge.c
include/linux/blk-mq.h

index d77d62815c0fbbc7058f7bcaae88e49e6a9b12b0..3a22b62e42d0d13dc7b29df809db541348ef8f91 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blk-integrity.h>
+#include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
 #include <linux/part_stat.h>
 #include <linux/blk-cgroup.h>
@@ -521,6 +522,102 @@ static bool blk_map_iter_next(struct request *req,
        return true;
 }
 
+#define blk_phys_to_page(_paddr) \
+       (pfn_to_page(__phys_to_pfn(_paddr)))
+
+blk_status_t blk_rq_dma_map(struct request *req, struct device *dma_dev,
+               struct dma_iova_state *state, struct blk_dma_mapping *dma)
+{
+       unsigned short nr_segments = blk_rq_nr_phys_segments(req);
+       unsigned int total_len = blk_rq_payload_bytes(req);
+       DEFINE_REQ_ITERATOR(iter, req);
+       struct phys_vec vec;
+       int error;
+
+       dma_init_iova_state(state, dma_dev, rq_dma_dir(req));
+
+       dma->nr_entries = 0;
+       dma->map = &dma->single_map;
+       if (nr_segments > 1 && !dma_can_use_iova(state)) {
+               /*
+                * XXX: this needs to be backed by a mempool and move to common
+                * code.
+                */
+               dma->map = kmalloc_array(nr_segments, sizeof(*dma->map),
+                               GFP_ATOMIC);
+               if (!dma->map)
+                       return BLK_STS_RESOURCE;
+       }
+
+       /*
+        * Grab the first segment ASAP because we'll need it to check alignment
+        * for the IOVA allocation and to check for a P2P transfer.
+        */
+       if (!blk_map_iter_next(req, &iter, &vec))
+               return BLK_STS_OK;
+
+       /* XXX: how is passing the page without offset going to work here? */
+       dma_set_iova_state(state, blk_phys_to_page(vec.paddr), vec.len);
+
+       error = dma_start_range(state);
+       if (error)
+               return errno_to_blk_status(error);
+
+       /*
+        * XXX: For a single segment doing the simple dma_map_page is probably
+        * still going to be faster with an iommu.
+        */
+       if (dma_can_use_iova(state)) {
+               dma->map[0].len = total_len;
+               dma->map[0].addr = dma_alloc_iova(state, vec.paddr, total_len);
+               if (dma_mapping_error(state->dev, dma->map[0].addr))
+                       return BLK_STS_RESOURCE;
+
+               do {
+                       error = dma_link_range(state, vec.paddr, vec.len);
+                       if (error)
+                               return errno_to_blk_status(error);
+               } while (blk_map_iter_next(req, &iter, &vec));
+
+               dma->nr_entries++;
+       } else {
+               do {
+                       struct blk_dma_vec *ent = &dma->map[dma->nr_entries];
+                       struct page *page = blk_phys_to_page(vec.paddr);
+                       unsigned int offset = offset_in_page(vec.paddr);
+
+                       ent->addr = dma_map_page(state->dev, page, offset,
+                                       vec.len, rq_dma_dir(req));
+                       if (dma_mapping_error(state->dev, ent->addr))
+                               return BLK_STS_RESOURCE;
+                       ent->len = vec.len;
+                       dma->nr_entries++;
+               } while (blk_map_iter_next(req, &iter, &vec));
+       }
+
+       dma_end_range(state);
+       return BLK_STS_OK;
+}
+
+void blk_rq_dma_unmap(struct dma_iova_state *state, struct blk_dma_mapping *dma)
+{
+       if (dma_can_use_iova(state)) {
+               WARN_ON_ONCE(dma->nr_entries > 1);
+               dma_unlink_and_free_iova(state, dma->map[0].addr,
+                               dma->map[0].len);
+       } else {
+               unsigned int i;
+
+               for (i = 0; i < dma->nr_entries; i++) {
+                       dma_unmap_page(state->dev, dma->map[i].addr,
+                                       dma->map[i].len, state->dir);
+               }
+       }
+
+       if (dma->map != &dma->single_map)
+               kfree(dma->map);
+}
+
 static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
                struct scatterlist *sglist)
 {
@@ -550,7 +647,7 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
        int nsegs = 0;
 
        while (blk_map_iter_next(rq, &iter, &vec)) {
-               struct page *page = pfn_to_page(__phys_to_pfn(vec.paddr));
+               struct page *page = blk_phys_to_page(vec.paddr);
                unsigned int offset = offset_in_page(vec.paddr);
 
                *last_sg = blk_next_sg(last_sg, sglist);
index 4fecf46ef681b357d83e7c64433e1035269fb50c..6534a32ce79a7e367e37686171eeb79691e03865 100644 (file)
@@ -12,6 +12,7 @@
 
 struct blk_mq_tags;
 struct blk_flush_queue;
+struct dma_iova_state;
 
 #define BLKDEV_MIN_RQ  4
 #define BLKDEV_DEFAULT_RQ      128
@@ -1165,6 +1166,21 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
        return max_t(unsigned short, rq->nr_phys_segments, 1);
 }
 
+struct blk_dma_vec {
+       dma_addr_t      addr;
+       u32             len;
+};
+
+struct blk_dma_mapping {
+       struct blk_dma_vec *map;
+       struct blk_dma_vec single_map;
+       unsigned int nr_entries;
+};
+
+blk_status_t blk_rq_dma_map(struct request *req, struct device *dma_dev,
+               struct dma_iova_state *state, struct blk_dma_mapping *dma);
+void blk_rq_dma_unmap(struct dma_iova_state *state, struct blk_dma_mapping *dma);
+
 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
                struct scatterlist *sglist, struct scatterlist **last_sg);
 static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,