]> www.infradead.org Git - users/hch/dma-mapping.git/commitdiff
net/smc: optimize for smc_sndbuf_sync_sg_for_device and smc_rmb_sync_sg_for_cpu
authorGuangguan Wang <guangguan.wang@linux.alibaba.com>
Thu, 14 Jul 2022 09:44:01 +0000 (17:44 +0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 18 Jul 2022 10:19:17 +0000 (11:19 +0100)
Some CPU, such as Xeon, can guarantee DMA cache coherency.
So it is no need to use dma sync APIs to flush cache on such CPUs.
In order to avoid calling dma sync APIs on the IO path, use the
dma_need_sync to check whether smc_buf_desc needs dma sync when
creating smc_buf_desc.

Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/smc/smc_core.c
net/smc/smc_core.h
net/smc/smc_ib.c
net/smc/smc_ib.h

index 1faa0cb661e4160e985be08f488dd7e5cb646550..fa3a7a851c602eead5d87e9575dfc0acfac633b6 100644 (file)
@@ -2016,6 +2016,9 @@ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
                goto free_table;
        }
 
+       buf_desc->is_dma_need_sync |=
+               smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
+
        /* create a new memory region for the RMB */
        if (is_rmb) {
                rc = smc_ib_get_memory_region(lnk->roce_pd,
@@ -2234,6 +2237,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                /* check for reusable slot in the link group */
                buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
                if (buf_desc) {
+                       buf_desc->is_dma_need_sync = 0;
                        SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
                        SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
                        break; /* found reusable slot */
@@ -2292,6 +2296,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
+       if (!conn->sndbuf_desc->is_dma_need_sync)
+               return;
        if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
            !smc_link_active(conn->lnk))
                return;
@@ -2302,6 +2308,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
        int i;
 
+       if (!conn->rmb_desc->is_dma_need_sync)
+               return;
        if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
                return;
        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
index c441dfeefa02303409c9a81919fae1dd051cd0ec..46ddec5f1edc65c8bf738fda50c62d4984129a9c 100644 (file)
@@ -180,6 +180,7 @@ struct smc_buf_desc {
                                        /* mem region registered */
                        u8              is_map_ib[SMC_LINKS_PER_LGR_MAX];
                                        /* mem region mapped to lnk */
+                       u8              is_dma_need_sync;
                        u8              is_reg_err;
                                        /* buffer registration err */
                };
index dcda4165d107b76a868805f13d6ce8bcf3fac6dc..60e5095890b1e2c88b72be8183f2529683fe2996 100644 (file)
@@ -729,6 +729,29 @@ int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
        return 0;
 }
 
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+                           struct smc_buf_desc *buf_slot)
+{
+       struct scatterlist *sg;
+       unsigned int i;
+       bool ret = false;
+
+       /* for now there is just one DMA address */
+       for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+                   buf_slot->sgt[lnk->link_idx].nents, i) {
+               if (!sg_dma_len(sg))
+                       break;
+               if (dma_need_sync(lnk->smcibdev->ibdev->dma_device,
+                                 sg_dma_address(sg))) {
+                       ret = true;
+                       goto out;
+               }
+       }
+
+out:
+       return ret;
+}
+
 /* synchronize buffer usage for cpu access */
 void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
                            struct smc_buf_desc *buf_slot,
@@ -737,6 +760,9 @@ void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
        struct scatterlist *sg;
        unsigned int i;
 
+       if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+               return;
+
        /* for now there is just one DMA address */
        for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
                    buf_slot->sgt[lnk->link_idx].nents, i) {
@@ -757,6 +783,9 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk,
        struct scatterlist *sg;
        unsigned int i;
 
+       if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+               return;
+
        /* for now there is just one DMA address */
        for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
                    buf_slot->sgt[lnk->link_idx].nents, i) {
index 5d8b49c57f507b62ab23a8067000757d0bfd0265..034295676e881039096a097160560fa1e8641a6f 100644 (file)
@@ -102,6 +102,8 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
 int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
                             struct smc_buf_desc *buf_slot, u8 link_idx);
 void smc_ib_put_memory_region(struct ib_mr *mr);
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+                           struct smc_buf_desc *buf_slot);
 void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
                            struct smc_buf_desc *buf_slot,
                            enum dma_data_direction data_direction);