From 0f0f08915fde3f2bb6c5e79f93c0d37d087451d2 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Wed, 21 Oct 2015 16:47:28 -0700 Subject: [PATCH] RDS: IB: make fragment size (RDS_FRAG_SIZE) dynamic IB fabric is capable of fragment 4GB of data payload into send_first, send_middle and send_last. Nevertheless, RDS fragments each payload into PAGE_SIZE, which is usually 4KB. This patch makes the RDS_FRAG_SIZE for RDS IB transport dynamic. In the preperation for subsequent patch(es), this patch adds per connection peer negotiation to determine the supported fragment size for IB transport. Orabug: 21894138 Reviewed-by: Wei Lin Guay Signed-off-by: Wei Lin Guay Signed-off-by: Santosh Shilimkar --- net/rds/ib.h | 3 ++- net/rds/ib_recv.c | 24 +++++++++++++----------- net/rds/ib_send.c | 8 ++++---- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 1c24e3c33cf9..8f3c967262e7 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -53,7 +53,7 @@ extern struct rw_semaphore rds_ib_devices_lock; extern struct list_head rds_ib_devices; /* - * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to + * IB posts i_frag_sz fragments of pages to the receive queues to * try and minimize the amount of memory tied up both the device and * socket receive queues. */ @@ -215,6 +215,7 @@ struct rds_ib_connection { /* Protocol version specific information */ unsigned int i_flowctl:1; /* enable/disable flow ctl */ + u16 i_frag_sz; /* IB fragment size */ /* Batched completions */ unsigned int i_unsignaled_wrs; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index f1bdeaaf2c53..f3425bf31c89 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -81,7 +81,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic) sge = &recv->r_sge[1]; sge->addr = 0; - sge->length = RDS_FRAG_SIZE; + sge->length = ic->i_frag_sz; sge->lkey = ic->i_mr->lkey; } } @@ -310,7 +310,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic sg_init_table(&frag->f_sg, 1); ret = rds_page_remainder_alloc(&frag->f_sg, - RDS_FRAG_SIZE, page_mask); + ic->i_frag_sz, page_mask); if (ret) { kmem_cache_free(rds_ib_frag_slab, frag); atomic_dec(&rds_ib_allocation); @@ -481,7 +481,7 @@ static int rds_ib_srq_prefill_one(struct rds_ib_device *rds_ibdev, goto out; sg_init_table(&recv->r_frag->f_sg, 1); ret = rds_page_remainder_alloc(&recv->r_frag->f_sg, - RDS_FRAG_SIZE, page_mask); + recv->r_ic->i_frag_sz, page_mask); if (ret) { kmem_cache_free(rds_ib_frag_slab, recv->r_frag); goto out; @@ -714,6 +714,7 @@ static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) { + struct rds_ib_connection *ic = inc->i_conn->c_transport_data; struct rds_ib_incoming *ibinc; struct rds_page_frag *frag; unsigned long to_copy; @@ -727,13 +728,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) len = be32_to_cpu(inc->i_hdr.h_len); while (iov_iter_count(to) && copied < len) { - if (frag_off == RDS_FRAG_SIZE) { + if (frag_off == ic->i_frag_sz) { frag = list_entry(frag->f_item.next, struct rds_page_frag, f_item); frag_off = 0; } to_copy = min_t(unsigned long, iov_iter_count(to), - RDS_FRAG_SIZE - frag_off); + ic->i_frag_sz - frag_off); to_copy = min_t(unsigned long, to_copy, len - copied); /* XXX needs + offset for multiple recvs per page */ @@ -963,6 +964,7 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic) static void rds_ib_cong_recv(struct rds_connection *conn, struct rds_ib_incoming *ibinc) { + struct rds_ib_connection *ic = conn->c_transport_data; struct rds_cong_map *map; unsigned int map_off; unsigned int map_page; @@ -990,7 +992,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, uint64_t *src, *dst; unsigned int k; - to_copy = min(RDS_FRAG_SIZE - frag_off, RDS_CONG_PAGE_SIZE - map_off); + to_copy = min(ic->i_frag_sz - frag_off, RDS_CONG_PAGE_SIZE - map_off); BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ addr = kmap_atomic(sg_page(&frag->f_sg)); @@ -1014,7 +1016,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, } frag_off += to_copy; - if (frag_off == RDS_FRAG_SIZE) { + if (frag_off == ic->i_frag_sz) { frag = list_entry(frag->f_item.next, struct rds_page_frag, f_item); frag_off = 0; @@ -1133,8 +1135,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags); recv->r_frag = NULL; - if (ic->i_recv_data_rem > RDS_FRAG_SIZE) - ic->i_recv_data_rem -= RDS_FRAG_SIZE; + if (ic->i_recv_data_rem > ic->i_frag_sz) + ic->i_recv_data_rem -= ic->i_frag_sz; else { ic->i_recv_data_rem = 0; ic->i_ibinc = NULL; @@ -1230,8 +1232,8 @@ void rds_ib_srq_process_recv(struct rds_connection *conn, recv->r_frag = NULL; - if (ic->i_recv_data_rem > RDS_FRAG_SIZE) - ic->i_recv_data_rem -= RDS_FRAG_SIZE; + if (ic->i_recv_data_rem > ic->i_frag_sz) + ic->i_recv_data_rem -= ic->i_frag_sz; else { ic->i_recv_data_rem = 0; ic->i_ibinc = NULL; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index f06de3df3e01..2e2820937f72 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -549,7 +549,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, int flow_controlled = 0; int nr_sig = 0; - BUG_ON(off % RDS_FRAG_SIZE); + BUG_ON(off % ic->i_frag_sz); BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); /* Do not send cong updates to IB loopback */ @@ -563,7 +563,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) i = 1; else - i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); + i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), ic->i_frag_sz); work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); if (work_alloc == 0) { @@ -711,8 +711,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, /* Set up the data, if present */ if (i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]) { - len = min(RDS_FRAG_SIZE, - ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); + len = min((unsigned int)ic->i_frag_sz, + ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff); send->s_wr.num_sge = 2; send->s_sge[1].addr = ib_sg_dma_address(dev, scat); -- 2.49.0