]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/rds: reduce memory footprint during rds_sendmsg with IB transport
authorWei Lin Guay <wei.lin.guay@oracle.com>
Thu, 22 Jun 2017 19:52:35 +0000 (21:52 +0200)
committerDhaval Giani <dhaval.giani@oracle.com>
Wed, 15 Nov 2017 06:18:11 +0000 (01:18 -0500)
The RDS IB large fragment size feature requires order 2 memory allocations
and it introduces memory pressure in the allocation system.  Thus, this
patch removes this dependency and uses multiple sge in the IB work requests
to support large fragment size.

Orabug: 26770234

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Tested-by: Shih-Yu Huang <shih-yu.huang@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Dhaval Giani <dhaval.giani@oracle.com>
net/rds/ib_send.c
net/rds/send.c

index de75c6cd3dd09cc0a16b8d1d46e4b0a11b88348a..b2a7827ebfc715ed6dce9e6a3b33e757d04fa254 100644 (file)
@@ -237,7 +237,9 @@ static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
 void rds_ib_send_init_ring(struct rds_ib_connection *ic)
 {
        struct rds_ib_send_work *send;
+       u32 num_send_sge = ic->i_frag_pages;
        u32 i;
+       u32 j;
 
        for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
                struct ib_sge *sge;
@@ -253,7 +255,8 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
                sge->length = sizeof(struct rds_header);
                sge->lkey = ic->i_mr->lkey;
 
-               send->s_sge[1].lkey = ic->i_mr->lkey;
+               for (j = 1; j <= num_send_sge; j++)
+                       send->s_sge[j].lkey = ic->i_mr->lkey;
        }
 }
 
@@ -561,6 +564,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        struct rds_ib_send_work *prev;
        struct ib_send_wr *failed_wr;
        struct scatterlist *scat;
+       int remaining_sge = 0;
        u32 pos;
        u32 i;
        u32 work_alloc;
@@ -573,7 +577,6 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        int flow_controlled = 0;
        int nr_sig = 0;
 
-       BUG_ON(off % ic->i_frag_sz);
        BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
 
        /* Do not send cong updates to IB loopback */
@@ -715,6 +718,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        prev = NULL;
        scat = &ic->i_data_op->op_sg[rm->data.op_dmasg];
        i = 0;
+       remaining_sge = rm->data.op_count - sg;
        do {
                unsigned int len = 0;
 
@@ -735,20 +739,27 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                /* Set up the data, if present */
                if (i < work_alloc
                    && scat != &rm->data.op_sg[rm->data.op_count]) {
-                       len = min((unsigned int)ic->i_frag_sz,
-                                 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
-                       send->s_wr.num_sge = 2;
-
-                       send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
-                       send->s_sge[1].addr += rm->data.op_dmaoff;
-                       send->s_sge[1].length = len;
-
-                       bytes_sent += len;
-                       rm->data.op_dmaoff += len;
-                       if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
-                               scat++;
-                               rm->data.op_dmasg++;
-                               rm->data.op_dmaoff = 0;
+                       unsigned int num_sge = min_t(unsigned long, remaining_sge,
+                                                    ic->i_frag_pages);
+                       unsigned int j = 1;
+
+                       send->s_wr.num_sge += num_sge;
+                       while (j <= num_sge) {
+                               len = min((unsigned int)PAGE_SIZE,
+                                         ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+                               send->s_sge[j].addr = ib_sg_dma_address(dev, scat);
+                               send->s_sge[j].addr += rm->data.op_dmaoff;
+                               send->s_sge[j].length = len;
+
+                               bytes_sent += len;
+                               rm->data.op_dmaoff += len;
+                               if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
+                                       scat++;
+                                       rm->data.op_dmasg++;
+                                       rm->data.op_dmaoff = 0;
+                               }
+                               j++;
+                               remaining_sge--;
                        }
                }
 
index c657f43b2b293e0d78f05f05279a5f1a8fb676a2..96e44a4b0faf282158661ef45bbe669ee0661e4d 100644 (file)
@@ -1277,7 +1277,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        if (payload_len) {
                rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
                ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL,
-                                                large_page);
+                                                false);
                if (ret)
                        goto out;
        }