]> www.infradead.org Git - users/hch/block.git/commitdiff
vfio/mlx5: Convert vfio to use DMA link API
authorLeon Romanovsky <leonro@nvidia.com>
Sun, 21 Apr 2024 14:20:47 +0000 (17:20 +0300)
committerLeon Romanovsky <leon@kernel.org>
Thu, 3 Oct 2024 16:05:53 +0000 (19:05 +0300)
Remove intermediate scatter-gather table as it is not needed
if DMA link API is used. This conversion reduces drastically
the memory used to manage that table.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
drivers/vfio/pci/mlx5/cmd.c
drivers/vfio/pci/mlx5/cmd.h
drivers/vfio/pci/mlx5/main.c

index 34ae3e299a9ed9d6faa929612f6466e1887c0871..2a846bb6d8bed0b53f64dae644be73c595ee297e 100644 (file)
@@ -345,25 +345,78 @@ static u32 *alloc_mkey_in(u32 npages, u32 pdn)
        return in;
 }
 
-static int create_mkey(struct mlx5_core_dev *mdev, u32 npages,
-                      struct mlx5_vhca_data_buffer *buf, u32 *mkey_in,
+static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
                       u32 *mkey)
 {
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+               sizeof(__be64) * round_up(npages, 2);
+
+       return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
+}
+
+static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+                                u32 *mkey_in, struct dma_iova_state *state)
+{
+       dma_addr_t addr;
        __be64 *mtt;
-       int inlen;
+       int i;
 
-       mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
-       if (buf) {
-               struct sg_dma_page_iter dma_iter;
+       WARN_ON_ONCE(state->dir == DMA_NONE);
 
-               for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
-                       *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
+       if (dma_can_use_iova(state)) {
+               dma_unlink_range(state);
+       } else {
+               mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
+                                            klm_pas_mtt);
+               for (i = npages - 1; i >= 0; i--) {
+                       addr = be64_to_cpu(mtt[i]);
+                       dma_unmap_page(state->dev, addr, PAGE_SIZE, state->dir);
+               }
        }
+       dma_free_iova(state);
+}
 
-       inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
-               sizeof(__be64) * round_up(npages, 2);
+static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
+                             struct page **page_list, u32 *mkey_in,
+                             struct dma_iova_state *state)
+{
+       dma_addr_t addr;
+       __be64 *mtt;
+       int i, err;
 
-       return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
+       WARN_ON_ONCE(state->dir == DMA_NONE);
+
+       err = dma_alloc_iova(state, npages * PAGE_SIZE);
+       if (err)
+               return err;
+
+       dma_set_iova_state(state, page_list[0], PAGE_SIZE);
+
+       mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
+       err = dma_start_range(state);
+       if (err) {
+               dma_free_iova(state);
+               return err;
+       }
+       for (i = 0; i < npages; i++) {
+               if (dma_can_use_iova(state))
+                       addr = dma_link_range(state, page_to_phys(page_list[i]),
+                                             PAGE_SIZE);
+               else
+                       addr = dma_map_page(mdev->device, page_list[i], 0,
+                                           PAGE_SIZE, state->dir);
+               err = dma_mapping_error(mdev->device, addr);
+               if (err)
+                       goto error;
+               *mtt++ = cpu_to_be64(addr);
+       }
+       dma_end_range(state);
+
+       return 0;
+
+error:
+       unregister_dma_pages(mdev, i, mkey_in, state);
+       return err;
 }
 
 static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
@@ -379,50 +432,56 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
        if (buf->mkey_in || !buf->npages)
                return -EINVAL;
 
-       ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
-       if (ret)
-               return ret;
-
        buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
-       if (!buf->mkey_in) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!buf->mkey_in)
+               return -ENOMEM;
 
-       ret = create_mkey(mdev, buf->npages, buf, buf->mkey_in, &buf->mkey);
+       ret = register_dma_pages(mdev, buf->npages, buf->page_list,
+                                buf->mkey_in, &buf->state);
+       if (ret)
+               goto err_register_dma;
+
+       ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
        if (ret)
                goto err_create_mkey;
 
        return 0;
 
 err_create_mkey:
+       unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state);
+err_register_dma:
        kvfree(buf->mkey_in);
        buf->mkey_in = NULL;
-err:
-       dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
        return ret;
 }
 
+static void free_page_list(u32 npages, struct page **page_list)
+{
+       int i;
+
+       /* Undo alloc_pages_bulk_array() */
+       for (i = npages - 1; i >= 0; i--)
+               __free_page(page_list[i]);
+
+       kvfree(page_list);
+}
+
 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
 {
-       struct mlx5_vf_migration_file *migf = buf->migf;
-       struct sg_page_iter sg_iter;
+       struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
+       struct mlx5_core_dev *mdev = mvdev->mdev;
 
-       lockdep_assert_held(&migf->mvdev->state_mutex);
-       WARN_ON(migf->mvdev->mdev_detach);
+       lockdep_assert_held(&mvdev->state_mutex);
+       WARN_ON(mvdev->mdev_detach);
 
        if (buf->mkey_in) {
-               mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
+               mlx5_core_destroy_mkey(mdev, buf->mkey);
+               unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
+                                    &buf->state);
                kvfree(buf->mkey_in);
-               dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
-                                 buf->dma_dir, 0);
        }
 
-       /* Undo alloc_pages_bulk_array() */
-       for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
-               __free_page(sg_page_iter_page(&sg_iter));
-       sg_free_append_table(&buf->table);
-       kvfree(buf->page_list);
+       free_page_list(buf->npages, buf->page_list);
        kfree(buf);
 }
 
@@ -433,7 +492,6 @@ static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
        struct page **page_list;
        unsigned long filled;
        unsigned int to_fill;
-       int ret;
 
        to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*buf->page_list));
        page_list = kvzalloc(to_fill * sizeof(*buf->page_list), GFP_KERNEL_ACCOUNT);
@@ -443,22 +501,13 @@ static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
        buf->page_list = page_list;
 
        do {
-               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
-                               buf->page_list + buf->npages);
+               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_alloc,
+                                               buf->page_list + buf->npages);
                if (!filled)
                        return -ENOMEM;
 
                to_alloc -= filled;
-               ret = sg_alloc_append_table_from_pages(
-                       &buf->table, buf->page_list + buf->npages, filled, 0,
-                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
-                       GFP_KERNEL_ACCOUNT);
-
-               if (ret)
-                       return ret;
                buf->npages += filled;
-               to_fill = min_t(unsigned int, to_alloc,
-                               PAGE_SIZE / sizeof(*buf->page_list));
        } while (to_alloc > 0);
 
        return 0;
@@ -468,6 +517,7 @@ struct mlx5_vhca_data_buffer *
 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
                         enum dma_data_direction dma_dir)
 {
+       struct mlx5_core_dev *mdev = migf->mvdev->mdev;
        struct mlx5_vhca_data_buffer *buf;
        int ret;
 
@@ -475,7 +525,7 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
        if (!buf)
                return ERR_PTR(-ENOMEM);
 
-       buf->dma_dir = dma_dir;
+       dma_init_iova_state(&buf->state, mdev->device, dma_dir);
        buf->migf = migf;
        if (npages) {
                ret = mlx5vf_add_migration_pages(buf, npages);
@@ -518,7 +568,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
 
        spin_lock_irq(&migf->list_lock);
        list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
-               if (buf->dma_dir == dma_dir) {
+               if (buf->state.dir == dma_dir) {
                        list_del_init(&buf->buf_elm);
                        if (buf->npages >= npages) {
                                spin_unlock_irq(&migf->list_lock);
@@ -1340,17 +1390,6 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
        kfree(qp);
 }
 
-static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
-{
-       int i;
-
-       /* Undo alloc_pages_bulk_array() */
-       for (i = 0; i < recv_buf->npages; i++)
-               __free_page(recv_buf->page_list[i]);
-
-       kvfree(recv_buf->page_list);
-}
-
 static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
                            unsigned int npages)
 {
@@ -1386,45 +1425,6 @@ err:
        kvfree(recv_buf->page_list);
        return -ENOMEM;
 }
-static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
-                                u32 *mkey_in)
-{
-       dma_addr_t addr;
-       __be64 *mtt;
-       int i;
-
-       mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
-       for (i = npages - 1; i >= 0; i--) {
-               addr = be64_to_cpu(mtt[i]);
-               dma_unmap_single(mdev->device, addr, PAGE_SIZE,
-                               DMA_FROM_DEVICE);
-       }
-}
-
-static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
-                             struct page **page_list, u32 *mkey_in)
-{
-       dma_addr_t addr;
-       __be64 *mtt;
-       int i;
-
-       mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
-
-       for (i = 0; i < npages; i++) {
-               addr = dma_map_page(mdev->device, page_list[i], 0, PAGE_SIZE,
-                                   DMA_FROM_DEVICE);
-               if (dma_mapping_error(mdev->device, addr))
-                       goto error;
-
-               *mtt++ = cpu_to_be64(addr);
-       }
-
-       return 0;
-
-error:
-       unregister_dma_pages(mdev, i, mkey_in);
-       return -ENOMEM;
-}
 
 static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
                                          struct mlx5_vhca_qp *qp)
@@ -1432,9 +1432,10 @@ static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
        struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
 
        mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
-       unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in);
+       unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
+                            &recv_buf->state);
        kvfree(recv_buf->mkey_in);
-       free_recv_pages(&qp->recv_buf);
+       free_page_list(recv_buf->npages, recv_buf->page_list);
 }
 
 static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
@@ -1455,25 +1456,25 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
                goto end;
        }
 
+       recv_buf->state.dir = DMA_FROM_DEVICE;
        err = register_dma_pages(mdev, npages, recv_buf->page_list,
-                                recv_buf->mkey_in);
+                                recv_buf->mkey_in, &recv_buf->state);
        if (err)
                goto err_register_dma;
 
-       err = create_mkey(mdev, npages, NULL, recv_buf->mkey_in,
-                         &recv_buf->mkey);
+       err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
        if (err)
                goto err_create_mkey;
 
        return 0;
 
 err_create_mkey:
-       unregister_dma_pages(mdev, npages, recv_buf->mkey_in);
+       unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state);
 err_register_dma:
        kvfree(recv_buf->mkey_in);
        recv_buf->mkey_in = NULL;
 end:
-       free_recv_pages(recv_buf);
+       free_page_list(npages, recv_buf->page_list);
        return err;
 }
 
index 5b764199db539fd5bd2cb2a907cbef7f8e8fefee..8b0cd0ee11a052aa3e6adf841eb36be889a9d505 100644 (file)
@@ -54,20 +54,15 @@ struct mlx5_vf_migration_header {
 
 struct mlx5_vhca_data_buffer {
        struct page **page_list;
-       struct sg_append_table table;
+       struct dma_iova_state state;
        loff_t start_pos;
        u64 length;
        u32 npages;
        u32 mkey;
        u32 *mkey_in;
-       enum dma_data_direction dma_dir;
        u8 stop_copy_chunk_num;
        struct list_head buf_elm;
        struct mlx5_vf_migration_file *migf;
-       /* Optimize mlx5vf_get_migration_page() for sequential access */
-       struct scatterlist *last_offset_sg;
-       unsigned int sg_last_entry;
-       unsigned long last_offset;
 };
 
 struct mlx5vf_async_data {
@@ -134,6 +129,7 @@ struct mlx5_vhca_cq {
 struct mlx5_vhca_recv_buf {
        u32 npages;
        struct page **page_list;
+       struct dma_iova_state state;
        u32 next_rq_offset;
        u32 *mkey_in;
        u32 mkey;
index a1dbee3be1e0bde0cc141a3bccc9c3ea6eb595e8..8619660199a355ce06a5f02de7a3404ff32040b8 100644 (file)
@@ -34,35 +34,10 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
                            core_device);
 }
 
-struct page *
-mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
-                         unsigned long offset)
+struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
+                                      unsigned long offset)
 {
-       unsigned long cur_offset = 0;
-       struct scatterlist *sg;
-       unsigned int i;
-
-       /* All accesses are sequential */
-       if (offset < buf->last_offset || !buf->last_offset_sg) {
-               buf->last_offset = 0;
-               buf->last_offset_sg = buf->table.sgt.sgl;
-               buf->sg_last_entry = 0;
-       }
-
-       cur_offset = buf->last_offset;
-
-       for_each_sg(buf->last_offset_sg, sg,
-                       buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
-               if (offset < sg->length + cur_offset) {
-                       buf->last_offset_sg = sg;
-                       buf->sg_last_entry += i;
-                       buf->last_offset = cur_offset;
-                       return nth_page(sg_page(sg),
-                                       (offset - cur_offset) / PAGE_SIZE);
-               }
-               cur_offset += sg->length;
-       }
-       return NULL;
+       return buf->page_list[offset / PAGE_SIZE];
 }
 
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
@@ -121,7 +96,7 @@ static void mlx5vf_buf_read_done(struct mlx5_vhca_data_buffer *vhca_buf)
        struct mlx5_vf_migration_file *migf = vhca_buf->migf;
 
        if (vhca_buf->stop_copy_chunk_num) {
-               bool is_header = vhca_buf->dma_dir == DMA_NONE;
+               bool is_header = vhca_buf->state.dir == DMA_NONE;
                u8 chunk_num = vhca_buf->stop_copy_chunk_num;
                size_t next_required_umem_size = 0;