]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: avoid large pages for sg allocation for TCP transport
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Sat, 25 Jun 2016 21:56:18 +0000 (14:56 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 8 Jul 2016 03:29:48 +0000 (20:29 -0700)
To reduce SGEs, commit '23f90cc {"RDS: fix the sg allocation based
on actual message size" used buddy allocator to allocate large
pages based on messages size.

This change though seems to create issue for TCP transport most
likely triggering memory leak some where in RDS TCP driver path.
The same core code with large pages seems to work just fine with
IB transport.

Patch avoids the hugepage allocation for RDS TCP sockets.

Orabug: 23635336

Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/af_rds.c
net/rds/bind.c
net/rds/ib_recv.c
net/rds/message.c
net/rds/page.c
net/rds/rds.h
net/rds/send.c

index f3948435c1b91208fd1e5123613646876b4b241f..2d864afc9ebadf8a716cabda27aa8ffee06e7a2a 100644 (file)
@@ -616,6 +616,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
        rs->rs_conn = 0;
        rs->rs_netfilter_enabled = 0;
        rs->rs_rx_traces = 0;
+       rs->rs_large_page = true;
 
        if (rs->rs_bound_addr)
                printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);
index da29cdf6644b231ccb1c3b271f865a5e401a7524..8ebbcb9bae243ad4267192138b4fae78619f2071 100644 (file)
@@ -220,6 +220,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        }
 
        rs->rs_transport = trans;
+       if (rs->rs_transport->t_type == RDS_TRANS_TCP)
+               rs->rs_large_page = false;
+
        ret = 0;
 
 out:
index 7739aaed9edbe40c7606e14e72ac0ac0d5552e3f..5b2525a09a21d5800d25aa2ffbc561b5f5f33974 100644 (file)
@@ -346,7 +346,7 @@ static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic
 
                sg_init_table(&frag->f_sg, 1);
                ret = rds_page_remainder_alloc(&frag->f_sg,
-                                              ic->i_frag_sz, page_mask);
+                                              ic->i_frag_sz, page_mask, true);
                if (ret) {
                        kmem_cache_free(rds_ib_frag_slab, frag);
                        atomic_sub(ic->i_frag_pages, &rds_ib_allocation);
@@ -518,10 +518,10 @@ static int rds_ib_srq_prefill_one(struct rds_ib_device *rds_ibdev,
        sg_init_table(&recv->r_frag->f_sg, 1);
        if (recv->r_ic)
                ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
-                               recv->r_ic->i_frag_sz, page_mask);
+                               recv->r_ic->i_frag_sz, page_mask, true);
        else
                ret = rds_page_remainder_alloc(&recv->r_frag->f_sg,
-                               RDS_FRAG_SIZE, page_mask);
+                               RDS_FRAG_SIZE, page_mask, true);
        if (ret) {
                kmem_cache_free(rds_ib_frag_slab, recv->r_frag);
                goto out;
index f0e540094ed36ad191a7baeefc75db90958757ce..70697a3942c9720c29be6607e092fd1a05bcf4c5 100644 (file)
@@ -291,7 +291,7 @@ struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
 }
 
 int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
-                              gfp_t gfp)
+                              gfp_t gfp, bool large_page)
 {
        unsigned long to_copy, nbytes;
        unsigned long sg_off;
@@ -310,7 +310,8 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
                if (!sg_page(sg)) {
                        ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
                                                       GFP_ATOMIC == gfp ?
-                                                      gfp : GFP_HIGHUSER);
+                                                      gfp : GFP_HIGHUSER,
+                                                      large_page);
 
                        if (ret)
                                return ret;
index 1dae848832918a5de712ecea6083e681194a5db4..59cd71cbb991f5a22ad2bb12dbbc83e99f1edb49 100644 (file)
@@ -116,22 +116,31 @@ EXPORT_SYMBOL_GPL(rds_page_copy_user);
  * reference until they are done with the region.
  */
 int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
-                            gfp_t gfp)
+                            gfp_t gfp, bool large_page)
 {
        struct rds_page_remainder *rem;
        unsigned long flags;
        struct page *page;
        int ret;
+       unsigned int order, size;
 
        gfp |= __GFP_HIGHMEM;
 
        /* jump straight to allocation if we're trying for a huge page */
        if (bytes >= PAGE_SIZE) {
-               page = alloc_pages(gfp, get_order(bytes));
+               if (large_page) {
+                       order =  get_order(bytes);
+                       size = bytes;
+               } else {
+                       order =  0;
+                       size = PAGE_SIZE;
+               }
+
+               page = alloc_pages(gfp, order);
                if (!page) {
                        ret = -ENOMEM;
                } else {
-                       sg_set_page(scat, page, bytes, 0);
+                       sg_set_page(scat, page, size, 0);
                        ret = 0;
                }
                goto out;
index a3d6284edd2a72e153759c018996adba701abc66..43104d2613f82cb65c5e2e335a53e6e9a5c3b39b 100644 (file)
@@ -717,6 +717,8 @@ struct rds_sock {
        /* Socket receive path trace points*/
        u8                      rs_rx_traces;
        u8                      rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+
+       bool                    rs_large_page;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -878,7 +880,7 @@ rds_conn_connecting(struct rds_connection *conn)
 struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
 int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
-                              gfp_t gfp);
+                              gfp_t gfp, bool n);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
                                 __be16 dport, u64 seq);
 int rds_message_add_extension(struct rds_header *hdr,
@@ -910,7 +912,7 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr)
 
 /* page.c */
 int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
-                            gfp_t gfp);
+                            gfp_t gfp, bool n);
 int rds_page_copy_user(struct page *page, unsigned long offset,
                       void __user *ptr, unsigned long bytes,
                       int to_user);
index 5bc6400ae3687ef4d43384320405b95fb5612185..edc3083e7a0ee627f520e96e3832736983f9241c 100644 (file)
@@ -1189,6 +1189,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        int nonblock = msg->msg_flags & MSG_DONTWAIT;
        long timeo = sock_sndtimeo(sk, nonblock);
        size_t total_payload_len = payload_len;
+       bool large_page;
 
        /* Mirror Linux UDP mirror of BSD error message compatibility */
        /* XXX: Perhaps MSG_MORE someday */
@@ -1215,6 +1216,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        }
 
        lock_sock(sk);
+       large_page = rs->rs_large_page;
        if (daddr == 0 || rs->rs_bound_addr == 0) {
                release_sock(sk);
                ret = -ENOTCONN; /* XXX not a great errno */
@@ -1241,7 +1243,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
        /* Attach data to the rm */
        if (payload_len) {
                rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
-               ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL);
+               ret = rds_message_copy_from_user(rm, &msg->msg_iter, GFP_KERNEL,
+                                                large_page);
                if (ret)
                        goto out;
        }