From ea6e04f14569f6cc92b3a112eccbf20f4c711f34 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 16 Nov 2015 13:28:11 -0800 Subject: [PATCH] RDS: make congestion code independent of PAGE_SIZE RDS congestion map code is designed with base assumption of 4K page size. The map update as well transport code assumes it that way. Ofcourse it breaks when transport like IB starts supporting larger fragments than 4K. To overcome this limitation without too many changes to the core congestion map update logic, define indepedent RDS_CONG_PAGE_SIZE and use it. While at it we also move rds_message_map_pages() whose sole purpose it to map congestion pages to congestion code. Orabug: 21894138 Reviwed-by: Wei Lin Guay Signed-off-by: Santosh Shilimkar --- net/rds/cong.c | 46 ++++++++++++++++++++++++++++++++++------------ net/rds/ib_recv.c | 4 ++-- net/rds/message.c | 25 ------------------------- net/rds/rds.h | 7 ++++--- 4 files changed, 40 insertions(+), 42 deletions(-) diff --git a/net/rds/cong.c b/net/rds/cong.c index 6b0bf1cb2b8b..e71527776b68 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -138,6 +138,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr) unsigned long zp; unsigned long i; unsigned long flags; + gfp_t mask = GFP_KERNEL | __GFP_ZERO; map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); if (!map) @@ -147,12 +148,12 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr) init_waitqueue_head(&map->m_waitq); INIT_LIST_HEAD(&map->m_conn_list); - for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { - zp = get_zeroed_page(GFP_KERNEL); - if (zp == 0) - goto out; - map->m_page_addrs[i] = zp; - } + zp = __get_free_pages(mask, get_order(RDS_CONG_MAP_BYTES)); + if (zp == 0) + goto out; + + for (i = 0; i < RDS_CONG_MAP_PAGES; i++) + map->m_page_addrs[i] = zp + i * RDS_CONG_PAGE_SIZE; spin_lock_irqsave(&rds_cong_lock, flags); ret = rds_cong_tree_walk(addr, map); @@ -165,8 +166,9 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr) out: if (map) { - for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) - free_page(map->m_page_addrs[i]); + if (zp) + __free_pages(virt_to_page(map->m_page_addrs[0]), + get_order(RDS_CONG_MAP_BYTES)); kfree(map); } @@ -175,6 +177,26 @@ out: return ret; } +static struct rds_message *rds_cong_map_pages(unsigned long *page_addrs, unsigned int total_len) +{ + struct rds_message *rm; + int num_sgs = RDS_CONG_MAP_SGE; + int extra_bytes = num_sgs * sizeof(struct scatterlist); + + rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); + if (!rm) + return ERR_PTR(-ENOMEM); + + set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); + rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); + rm->data.op_nents = RDS_CONG_MAP_SGE; + rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); + + sg_set_page(&rm->data.op_sg[0], virt_to_page(page_addrs[0]), + total_len, 0); + return rm; +} + /* * Put the conn on its local map's list. This is called when the conn is * really added to the hash. It's nested under the rds_conn_lock, sadly. @@ -377,14 +399,14 @@ void rds_cong_exit(void) { struct rb_node *node; struct rds_cong_map *map; - unsigned long i; while ((node = rb_first(&rds_cong_tree))) { map = rb_entry(node, struct rds_cong_map, m_rb_node); rdsdebug("freeing map %p\n", map); rb_erase(&map->m_rb_node, &rds_cong_tree); - for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) - free_page(map->m_page_addrs[i]); + if (map->m_page_addrs[0]) + __free_pages(virt_to_page(map->m_page_addrs[0]), + get_order(RDS_CONG_MAP_BYTES)); kfree(map); } } @@ -397,7 +419,7 @@ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn) struct rds_cong_map *map = conn->c_lcong; struct rds_message *rm; - rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES); + rm = rds_cong_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES); if (!IS_ERR(rm)) rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index d5d6700d938a..f1bdeaaf2c53 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -990,7 +990,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, uint64_t *src, *dst; unsigned int k; - to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); + to_copy = min(RDS_FRAG_SIZE - frag_off, RDS_CONG_PAGE_SIZE - map_off); BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ addr = kmap_atomic(sg_page(&frag->f_sg)); @@ -1008,7 +1008,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, copied += to_copy; map_off += to_copy; - if (map_off == PAGE_SIZE) { + if (map_off == RDS_CONG_PAGE_SIZE) { map_off = 0; map_page++; } diff --git a/net/rds/message.c b/net/rds/message.c index 164518d68abd..ab535e2cb184 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -287,31 +287,6 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) return sg_ret; } -struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) -{ - struct rds_message *rm; - unsigned int i; - int num_sgs = ceil(total_len, PAGE_SIZE); - int extra_bytes = num_sgs * sizeof(struct scatterlist); - - rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); - if (!rm) - return ERR_PTR(-ENOMEM); - - set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); - rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); - rm->data.op_nents = ceil(total_len, PAGE_SIZE); - rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); - - for (i = 0; i < rm->data.op_nents; ++i) { - sg_set_page(&rm->data.op_sg[i], - virt_to_page(page_addrs[i]), - PAGE_SIZE, 0); - } - - return rm; -} - int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, gfp_t gfp) { diff --git a/net/rds/rds.h b/net/rds/rds.h index e9e81a814dd4..94abf90a2ddf 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -59,9 +59,11 @@ rdsdebug(char *fmt, ...) #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) #define RDS_CONG_MAP_BYTES (65536 / 8) +#define RDS_CONG_PAGE_SIZE (1UL << 12) #define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) -#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) -#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) +#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / RDS_CONG_PAGE_SIZE) +#define RDS_CONG_MAP_PAGE_BITS (RDS_CONG_PAGE_SIZE * 8) +#define RDS_CONG_MAP_SGE 1 struct rds_cong_map { struct rb_node m_rb_node; @@ -768,7 +770,6 @@ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, gfp_t gfp); -struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq); int rds_message_add_extension(struct rds_header *hdr, -- 2.50.1