From 2d80dcbe382c54ba0c7ddb45682949d8602ea5ba Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 25 Jun 2016 14:56:18 -0700 Subject: [PATCH] RDS: avoid large pages for sg allocation for TCP transport To reduce SGEs, commit '23f90cc {"RDS: fix the sg allocation based on actual message size" used buddy allocator to allocate large pages based on messages size. This change though seems to create issue for TCP transport most likely triggering memory leak some where in RDS TCP driver path. The same core code with large pages seems to work just fine with IB transport. Patch avoids the hugepage allocation for RDS TCP sockets. Orabug: 23635336 Reviewed-by: Sowmini Varadhan Signed-off-by: Santosh Shilimkar --- net/rds/af_rds.c | 1 + net/rds/bind.c | 3 +++ net/rds/ib_recv.c | 6 +++--- net/rds/message.c | 5 +++-- net/rds/page.c | 15 ++++++++++++--- net/rds/rds.h | 6 ++++-- net/rds/send.c | 5 ++++- 7 files changed, 30 insertions(+), 11 deletions(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index f3948435c1b9..2d864afc9eba 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -616,6 +616,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) rs->rs_conn = 0; rs->rs_netfilter_enabled = 0; rs->rs_rx_traces = 0; + rs->rs_large_page = true; if (rs->rs_bound_addr) printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr); diff --git a/net/rds/bind.c b/net/rds/bind.c index da29cdf6644b..8ebbcb9bae24 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -220,6 +220,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } rs->rs_transport = trans; + if (rs->rs_transport->t_type == RDS_TRANS_TCP) + rs->rs_large_page = false; + ret = 0; out: diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 7739aaed9edb..5b2525a09a21 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -346,7 +346,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic sg_init_table(&frag->f_sg, 1); ret = rds_page_remainder_alloc(&frag->f_sg, - ic->i_frag_sz, page_mask); + ic->i_frag_sz, page_mask, true); if (ret) { kmem_cache_free(rds_ib_frag_slab, frag); atomic_sub(ic->i_frag_pages, &rds_ib_allocation); @@ -518,10 +518,10 @@ static int rds_ib_srq_prefill_one(struct rds_ib_device *rds_ibdev, sg_init_table(&recv->r_frag->f_sg, 1); if (recv->r_ic) ret = rds_page_remainder_alloc(&recv->r_frag->f_sg, - recv->r_ic->i_frag_sz, page_mask); + recv->r_ic->i_frag_sz, page_mask, true); else ret = rds_page_remainder_alloc(&recv->r_frag->f_sg, - RDS_FRAG_SIZE, page_mask); + RDS_FRAG_SIZE, page_mask, true); if (ret) { kmem_cache_free(rds_ib_frag_slab, recv->r_frag); goto out; diff --git a/net/rds/message.c b/net/rds/message.c index f0e540094ed3..70697a3942c9 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -291,7 +291,7 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) } int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, - gfp_t gfp) + gfp_t gfp, bool large_page) { unsigned long to_copy, nbytes; unsigned long sg_off; @@ -310,7 +310,8 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, if (!sg_page(sg)) { ret = rds_page_remainder_alloc(sg, iov_iter_count(from), GFP_ATOMIC == gfp ? - gfp : GFP_HIGHUSER); + gfp : GFP_HIGHUSER, + large_page); if (ret) return ret; diff --git a/net/rds/page.c b/net/rds/page.c index 1dae84883291..59cd71cbb991 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -116,22 +116,31 @@ EXPORT_SYMBOL_GPL(rds_page_copy_user); * reference until they are done with the region. */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, - gfp_t gfp) + gfp_t gfp, bool large_page) { struct rds_page_remainder *rem; unsigned long flags; struct page *page; int ret; + unsigned int order, size; gfp |= __GFP_HIGHMEM; /* jump straight to allocation if we're trying for a huge page */ if (bytes >= PAGE_SIZE) { - page = alloc_pages(gfp, get_order(bytes)); + if (large_page) { + order = get_order(bytes); + size = bytes; + } else { + order = 0; + size = PAGE_SIZE; + } + + page = alloc_pages(gfp, order); if (!page) { ret = -ENOMEM; } else { - sg_set_page(scat, page, bytes, 0); + sg_set_page(scat, page, size, 0); ret = 0; } goto out; diff --git a/net/rds/rds.h b/net/rds/rds.h index a3d6284edd2a..43104d2613f8 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -717,6 +717,8 @@ struct rds_sock { /* Socket receive path trace points*/ u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; + + bool rs_large_page; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -878,7 +880,7 @@ rds_conn_connecting(struct rds_connection *conn) struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents); int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, - gfp_t gfp); + gfp_t gfp, bool n); void rds_message_populate_header(struct rds_header *hdr, __be16 sport, __be16 dport, u64 seq); int rds_message_add_extension(struct rds_header *hdr, @@ -910,7 +912,7 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr) /* page.c */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, - gfp_t gfp); + gfp_t gfp, bool n); int rds_page_copy_user(struct page *page, unsigned long offset, void __user *ptr, unsigned long bytes, int to_user); diff --git a/net/rds/send.c b/net/rds/send.c index 5bc6400ae368..edc3083e7a0e 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1189,6 +1189,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) int nonblock = msg->msg_flags & MSG_DONTWAIT; long timeo = sock_sndtimeo(sk, nonblock); size_t total_payload_len = payload_len; + bool large_page; /* Mirror Linux UDP mirror of BSD error message compatibility */ /* XXX: Perhaps MSG_MORE someday */ @@ -1215,6 +1216,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) } lock_sock(sk); + large_page = rs->rs_large_page; if (daddr == 0 || rs->rs_bound_addr == 0) { release_sock(sk); ret = -ENOTCONN; /* XXX not a great errno */ @@ -1241,7 +1243,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Attach data to the rm */ if (payload_len) { rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE)); - ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL); + ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL, + large_page); if (ret) goto out; } -- 2.50.1