]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: IB: make fragment size (RDS_FRAG_SIZE) dynamic
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Wed, 21 Oct 2015 23:47:28 +0000 (16:47 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Thu, 14 Apr 2016 00:58:07 +0000 (17:58 -0700)
IB fabric is capable of fragment 4GB of data payload into
send_first, send_middle and send_last. Nevertheless,
RDS fragments each payload into PAGE_SIZE, which is usually
4KB. This patch makes the RDS_FRAG_SIZE for RDS IB transport
dynamic.

In the preperation for subsequent patch(es), this patch
adds per connection peer negotiation to determine the
supported fragment size for IB transport.

Orabug: 21894138
Reviewed-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/ib.h
net/rds/ib_recv.c
net/rds/ib_send.c

index 1c24e3c33cf9f2c00c001763f011e4c3a2c6e928..8f3c967262e7c72c870f0eb430149e58459e949e 100644 (file)
@@ -53,7 +53,7 @@ extern struct rw_semaphore rds_ib_devices_lock;
 extern struct list_head rds_ib_devices;
 
 /*
- * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
+ * IB posts i_frag_sz fragments of pages to the receive queues to
  * try and minimize the amount of memory tied up both the device and
  * socket receive queues.
  */
@@ -215,6 +215,7 @@ struct rds_ib_connection {
 
        /* Protocol version specific information */
        unsigned int            i_flowctl:1;    /* enable/disable flow ctl */
+       u16                     i_frag_sz;      /* IB fragment size */
 
        /* Batched completions */
        unsigned int            i_unsignaled_wrs;
index f1bdeaaf2c530b5bf6b5fe9794acabffe02cf167..f3425bf31c89823977dff98bfc8e3c9f9640a861 100644 (file)
@@ -81,7 +81,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
 
                sge = &recv->r_sge[1];
                sge->addr = 0;
-               sge->length = RDS_FRAG_SIZE;
+               sge->length = ic->i_frag_sz;
                sge->lkey = ic->i_mr->lkey;
        }
 }
@@ -310,7 +310,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic
 
                sg_init_table(&frag->f_sg, 1);
                ret = rds_page_remainder_alloc(&frag->f_sg,
-                                              RDS_FRAG_SIZE, page_mask);
+                                              ic->i_frag_sz, page_mask);
                if (ret) {
                        kmem_cache_free(rds_ib_frag_slab, frag);
                        atomic_dec(&rds_ib_allocation);
@@ -481,7 +481,7 @@ static int rds_ib_srq_prefill_one(struct rds_ib_device *rds_ibdev,
                goto out;
        sg_init_table(&recv->r_frag->f_sg, 1);
        ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
-                       RDS_FRAG_SIZE, page_mask);
+                       recv->r_ic->i_frag_sz, page_mask);
        if (ret) {
                kmem_cache_free(rds_ib_frag_slab, recv->r_frag);
                goto out;
@@ -714,6 +714,7 @@ static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache
 
 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
 {
+       struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
        struct rds_ib_incoming *ibinc;
        struct rds_page_frag *frag;
        unsigned long to_copy;
@@ -727,13 +728,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
        len = be32_to_cpu(inc->i_hdr.h_len);
 
        while (iov_iter_count(to) && copied < len) {
-               if (frag_off == RDS_FRAG_SIZE) {
+               if (frag_off == ic->i_frag_sz) {
                        frag = list_entry(frag->f_item.next,
                                          struct rds_page_frag, f_item);
                        frag_off = 0;
                }
                to_copy = min_t(unsigned long, iov_iter_count(to),
-                               RDS_FRAG_SIZE - frag_off);
+                               ic->i_frag_sz - frag_off);
                to_copy = min_t(unsigned long, to_copy, len - copied);
 
                /* XXX needs + offset for multiple recvs per page */
@@ -963,6 +964,7 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
 static void rds_ib_cong_recv(struct rds_connection *conn,
                              struct rds_ib_incoming *ibinc)
 {
+       struct rds_ib_connection *ic = conn->c_transport_data;
        struct rds_cong_map *map;
        unsigned int map_off;
        unsigned int map_page;
@@ -990,7 +992,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
                uint64_t *src, *dst;
                unsigned int k;
 
-               to_copy = min(RDS_FRAG_SIZE - frag_off, RDS_CONG_PAGE_SIZE - map_off);
+               to_copy = min(ic->i_frag_sz - frag_off, RDS_CONG_PAGE_SIZE - map_off);
                BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
 
                addr = kmap_atomic(sg_page(&frag->f_sg));
@@ -1014,7 +1016,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
                }
 
                frag_off += to_copy;
-               if (frag_off == RDS_FRAG_SIZE) {
+               if (frag_off == ic->i_frag_sz) {
                        frag = list_entry(frag->f_item.next,
                                          struct rds_page_frag, f_item);
                        frag_off = 0;
@@ -1133,8 +1135,8 @@ static void rds_ib_process_recv(struct rds_connection *conn,
        list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
        recv->r_frag = NULL;
 
-       if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
-               ic->i_recv_data_rem -= RDS_FRAG_SIZE;
+       if (ic->i_recv_data_rem > ic->i_frag_sz)
+               ic->i_recv_data_rem -= ic->i_frag_sz;
        else {
                ic->i_recv_data_rem = 0;
                ic->i_ibinc = NULL;
@@ -1230,8 +1232,8 @@ void rds_ib_srq_process_recv(struct rds_connection *conn,
 
        recv->r_frag = NULL;
 
-       if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
-               ic->i_recv_data_rem -= RDS_FRAG_SIZE;
+       if (ic->i_recv_data_rem > ic->i_frag_sz)
+               ic->i_recv_data_rem -= ic->i_frag_sz;
        else {
                ic->i_recv_data_rem = 0;
                ic->i_ibinc = NULL;
index f06de3df3e01949350d654b7058ed374a6d34913..2e2820937f726dca97178f7c8350613f007c4372 100644 (file)
@@ -549,7 +549,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        int flow_controlled = 0;
        int nr_sig = 0;
 
-       BUG_ON(off % RDS_FRAG_SIZE);
+       BUG_ON(off % ic->i_frag_sz);
        BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
 
        /* Do not send cong updates to IB loopback */
@@ -563,7 +563,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
        if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
                i = 1;
        else
-               i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
+               i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), ic->i_frag_sz);
 
        work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
        if (work_alloc == 0) {
@@ -711,8 +711,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
                /* Set up the data, if present */
                if (i < work_alloc
                    && scat != &rm->data.op_sg[rm->data.op_count]) {
-                       len = min(RDS_FRAG_SIZE,
-                               ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+                       len = min((unsigned int)ic->i_frag_sz,
+                                 ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
                        send->s_wr.num_sge = 2;
 
                        send->s_sge[1].addr = ib_sg_dma_address(dev, scat);