]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
IB/sdp: slow start recv buffer sizes, and try to resize if out of credits (bugzilla...
authorAmi Perlmutter <amip@dev.mellanox.co.il>
Thu, 10 May 2007 12:23:13 +0000 (15:23 +0300)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Tue, 6 Oct 2015 12:04:07 +0000 (05:04 -0700)
Signed-off-by: Ami Perlmutter <amip@dev.mellanox.co.il>
drivers/infiniband/ulp/sdp/sdp.h
drivers/infiniband/ulp/sdp/sdp_bcopy.c
drivers/infiniband/ulp/sdp/sdp_cma.c
drivers/infiniband/ulp/sdp/sdp_main.c

index 7cfc9faebaca77687fd99b84af741677f86636f3..6626811a073b7d7a7961d9eca6b42ae609f6a105 100644 (file)
@@ -56,6 +56,8 @@ enum sdp_mid {
        SDP_MID_HELLO = 0x0,
        SDP_MID_HELLO_ACK = 0x1,
        SDP_MID_DISCONN = 0x2,
+       SDP_MID_CHRCVBUF = 0xB,
+       SDP_MID_CHRCVBUF_ACK = 0xC,
        SDP_MID_DATA = 0xFF,
 };
 
@@ -138,12 +140,22 @@ struct sdp_sock {
 
        struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1];
        struct ib_wc  ibwc[SDP_NUM_WC];
+
+       /* SDP slow start */
+       int rcvbuf_scale;
+       int sent_request;
+       int sent_request_head;
+       int recv_request_head;
+       int recv_request;
+       int recv_frags;
+       int send_frags;
 };
 
 extern struct proto sdp_proto;
 extern struct workqueue_struct *sdp_workqueue;
 
-extern atomic_t current_mem_usage;
+extern atomic_t sdp_current_mem_usage;
+extern spinlock_t sdp_large_sockets_lock;
 
 /* just like TCP fs */
 struct sdp_seq_afinfo {
@@ -206,5 +218,6 @@ struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq);
 void sdp_urg(struct sdp_sock *ssk, struct sk_buff *skb);
 void sdp_add_sock(struct sdp_sock *ssk);
 void sdp_remove_sock(struct sdp_sock *ssk);
+void sdp_remove_large_sock(void);
 
 #endif
index 62206482a61be26b3c4dbef1fdc48d00a98552ac..6b132e2e7f2995b13202de281be78f13f979ec99 100644 (file)
 #include <rdma/rdma_cm.h>
 #include "sdp.h"
 
+#define SDP_RESIZE_WAIT 16
+
+struct sdp_chrecvbuf {
+       u32 size;
+};
+
 static int rcvbuf_scale = 0x10;
+
 module_param_named(rcvbuf_scale, rcvbuf_scale, int, 0644);
 MODULE_PARM_DESC(rcvbuf_scale, "Receive buffer size scale factor.");
 
@@ -45,7 +52,37 @@ static int top_mem_usage = 0;
 module_param_named(top_mem_usage, top_mem_usage, int, 0644);
 MODULE_PARM_DESC(top_mem_usage, "Top system wide sdp memory usage for recv (in MB).");
 
-atomic_t current_mem_usage;
+#ifdef CONFIG_PPC
+static int max_large_sockets = 100;
+#else
+static int max_large_sockets = 1000;
+#endif
+module_param_named(max_large_sockets, max_large_sockets, int, 0644);
+MODULE_PARM_DESC(max_large_sockets, "Max number of large sockets (32k buffers).");
+
+static int curr_large_sockets = 0;
+atomic_t sdp_current_mem_usage;
+spinlock_t sdp_large_sockets_lock;
+
+static int sdp_can_resize(void)
+{
+       int count, ret;
+       spin_lock_irq(&sdp_large_sockets_lock);
+       count = curr_large_sockets;
+       ret = curr_large_sockets < max_large_sockets;
+       if (ret)
+               curr_large_sockets++;
+       spin_unlock_irq(&sdp_large_sockets_lock);
+
+       return ret;
+}
+
+void sdp_remove_large_sock(void)
+{
+       spin_lock_irq(&sdp_large_sockets_lock);
+       curr_large_sockets--;
+       spin_unlock_irq(&sdp_large_sockets_lock);
+}
 
 /* Like tcp_fin */
 static void sdp_fin(struct sock *sk)
@@ -86,6 +123,7 @@ void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid)
                h->flags = SDP_OOB_PRES | SDP_OOB_PEND;
        else
                h->flags = 0;
+
        h->bufs = htons(ssk->rx_head - ssk->rx_tail);
        h->len = htonl(skb->len);
        h->mseq = htonl(mseq);
@@ -191,7 +229,7 @@ static void sdp_post_recv(struct sdp_sock *ssk)
        /* FIXME */
        BUG_ON(!skb);
        h = (struct sdp_bsdh *)skb->head;
-       for (i = 0; i < SDP_MAX_SEND_SKB_FRAGS; ++i) {
+       for (i = 0; i < ssk->recv_frags; ++i) {
                page = alloc_pages(GFP_HIGHUSER, 0);
                BUG_ON(!page);
                frag = &skb_shinfo(skb)->frags[i];
@@ -242,22 +280,22 @@ static void sdp_post_recv(struct sdp_sock *ssk)
                sdp_reset(&ssk->isk.sk);
        }
 
-       atomic_add(SDP_MAX_SEND_SKB_FRAGS, &current_mem_usage);
+       atomic_add(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
 }
 
 void sdp_post_recvs(struct sdp_sock *ssk)
 {
-       int scale = rcvbuf_scale;
+       int scale = ssk->rcvbuf_scale;
        if (unlikely(!ssk->id))
                return;
 
        if (top_mem_usage &&
-           (top_mem_usage * 0x100000) < atomic_read(&current_mem_usage) * PAGE_SIZE)
+           (top_mem_usage * 0x100000) < atomic_read(&sdp_current_mem_usage) * PAGE_SIZE)
                scale = 1;
 
        while ((likely(ssk->rx_head - ssk->rx_tail < SDP_RX_SIZE) &&
                (ssk->rx_head - ssk->rx_tail - SDP_MIN_BUFS) *
-               (SDP_HEAD_SIZE + SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE) +
+               (SDP_HEAD_SIZE + ssk->recv_frags * PAGE_SIZE) +
                ssk->rcv_nxt - ssk->copied_seq <
                ssk->isk.sk.sk_rcvbuf * scale) ||
               unlikely(ssk->rx_head - ssk->rx_tail < SDP_MIN_BUFS))
@@ -362,6 +400,23 @@ void sdp_post_sends(struct sdp_sock *ssk, int nonagle)
                return;
        }
 
+       if (ssk->recv_request &&
+           ssk->rx_tail >= ssk->recv_request_head &&
+           ssk->bufs >= SDP_MIN_BUFS &&
+           ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE) {
+               struct sdp_chrecvbuf *resp_size;
+               ssk->recv_request = 0;
+               skb = sk_stream_alloc_skb(&ssk->isk.sk,
+                                         sizeof(struct sdp_bsdh) +
+                                         sizeof(*resp_size),
+                                         GFP_KERNEL);
+               /* FIXME */
+               BUG_ON(!skb);
+               resp_size = (struct sdp_chrecvbuf *)skb_push(skb, sizeof *resp_size);
+               resp_size->size = htons(ssk->recv_frags * PAGE_SIZE);
+               sdp_post_send(ssk, skb, SDP_MID_CHRCVBUF_ACK);
+       }
+
        while (ssk->bufs > SDP_MIN_BUFS &&
               ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE &&
               (skb = ssk->isk.sk.sk_send_head) &&
@@ -370,6 +425,25 @@ void sdp_post_sends(struct sdp_sock *ssk, int nonagle)
                __skb_dequeue(&ssk->isk.sk.sk_write_queue);
                sdp_post_send(ssk, skb, SDP_MID_DATA);
        }
+
+       if (ssk->bufs == SDP_MIN_BUFS &&
+           !ssk->sent_request &&
+           ssk->tx_head > ssk->sent_request_head + SDP_RESIZE_WAIT &&
+           ssk->tx_head - ssk->tx_tail < SDP_TX_SIZE) {
+               struct sdp_chrecvbuf *req_size;
+               skb = sk_stream_alloc_skb(&ssk->isk.sk,
+                                         sizeof(struct sdp_bsdh) +
+                                         sizeof(*req_size),
+                                         GFP_KERNEL);
+               /* FIXME */
+               BUG_ON(!skb);
+               ssk->sent_request = SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE;
+               ssk->sent_request_head = ssk->tx_head;
+               req_size = (struct sdp_chrecvbuf *)skb_push(skb, sizeof *req_size);
+               req_size->size = htons(ssk->sent_request);
+               sdp_post_send(ssk, skb, SDP_MID_CHRCVBUF);
+       }
+
        c = ssk->remote_credits;
        if (likely(c > SDP_MIN_BUFS))
                c *= 2;
@@ -402,6 +476,13 @@ void sdp_post_sends(struct sdp_sock *ssk, int nonagle)
        }
 }
 
+static inline void sdp_resize(struct sdp_sock *ssk, u32 new_size)
+{
+       ssk->recv_frags = PAGE_ALIGN(new_size - SDP_HEAD_SIZE)  / PAGE_SIZE;
+       if (ssk->recv_frags > SDP_MAX_SEND_SKB_FRAGS)
+               ssk->recv_frags = SDP_MAX_SEND_SKB_FRAGS;
+}
+
 static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
 {
        struct sk_buff *skb;
@@ -413,7 +494,7 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                if (unlikely(!skb))
                        return;
 
-               atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &current_mem_usage);
+               atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
 
                if (unlikely(wc->status)) {
                        if (wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -424,6 +505,8 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                        }
                        __kfree_skb(skb);
                } else {
+                       int frags;
+
                        sdp_dbg_data(&ssk->isk.sk,
                                     "Recv completion. ID %d Length %d\n",
                                     (int)wc->wr_id, wc->byte_len);
@@ -449,11 +532,12 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                        ssk->bufs = ntohl(h->mseq_ack) - ssk->tx_head + 1 +
                                ntohs(h->bufs);
 
+                       frags = skb_shinfo(skb)->nr_frags;
                        pagesz = PAGE_ALIGN(skb->data_len);
                        skb_shinfo(skb)->nr_frags = pagesz / PAGE_SIZE;
 
                        for (i = skb_shinfo(skb)->nr_frags;
-                            i < SDP_MAX_SEND_SKB_FRAGS; ++i) {
+                            i < frags; ++i) {
                                put_page(skb_shinfo(skb)->frags[i].page);
                                skb->truesize -= PAGE_SIZE;
                        }
@@ -475,6 +559,30 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                                /* this will wake recvmsg */
                                sdp_sock_queue_rcv_skb(&ssk->isk.sk, skb);
                                sdp_fin(&ssk->isk.sk);
+                       } else if (h->mid == SDP_MID_CHRCVBUF) {
+                               u32 new_size = *(u32 *)skb->data;
+
+                               if (ssk->recv_request || sdp_can_resize()) {
+                                       ssk->rcvbuf_scale = rcvbuf_scale;
+                                       sdp_resize(ssk, ntohs(new_size));
+                                       ssk->recv_request_head = ssk->rx_head + 1;
+                               } else
+                                       ssk->recv_request_head = ssk->rx_tail;
+                               ssk->recv_request = 1;
+                               __kfree_skb(skb);
+                       } else if (h->mid == SDP_MID_CHRCVBUF_ACK) {
+                               u32 new_size = *(u32 *)skb->data;
+                               new_size = ntohs(new_size);
+
+                               if (new_size > ssk->xmit_size_goal) {
+                                       ssk->sent_request = -1;
+                                       ssk->xmit_size_goal = new_size;
+                                       ssk->send_frags =
+                                               PAGE_ALIGN(ssk->xmit_size_goal) /
+                                               PAGE_SIZE;
+                               } else
+                                       ssk->sent_request = 0;
+                               __kfree_skb(skb);
                        } else {
                                /* TODO: Handle other messages */
                                printk("SDP: FIXME MID %d\n", h->mid);
index 9700e15f0b9f51672bbb354e530ad8c09388ffe2..9594c8e7384d84f035e03b820dea9ef040af247d 100644 (file)
@@ -175,6 +175,8 @@ int sdp_init_qp(struct sock *sk, struct rdma_cm_id *id)
 
        init_waitqueue_head(&sdp_sk(sk)->wq);
 
+       sdp_sk(sk)->recv_frags = 0;
+       sdp_sk(sk)->rcvbuf_scale = 1;
        sdp_post_recvs(sdp_sk(sk));
 
        sdp_dbg(sk, "%s done\n", __func__);
@@ -235,6 +237,8 @@ int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
        sdp_sk(child)->bufs = ntohs(h->bsdh.bufs);
        sdp_sk(child)->xmit_size_goal = ntohl(h->localrcvsz) -
                sizeof(struct sdp_bsdh);
+       sdp_sk(child)->send_frags = PAGE_ALIGN(sdp_sk(child)->xmit_size_goal) /
+               PAGE_SIZE;
 
        sdp_dbg(child, "%s bufs %d xmit_size_goal %d\n", __func__,
                sdp_sk(child)->bufs,
@@ -272,6 +276,8 @@ static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
        sdp_sk(sk)->bufs = ntohs(h->bsdh.bufs);
        sdp_sk(sk)->xmit_size_goal = ntohl(h->actrcvsz) -
                sizeof(struct sdp_bsdh);
+       sdp_sk(sk)->send_frags = PAGE_ALIGN(sdp_sk(sk)->xmit_size_goal) /
+               PAGE_SIZE;
 
        sdp_dbg(sk, "%s bufs %d xmit_size_goal %d\n", __func__,
                sdp_sk(sk)->bufs,
@@ -387,7 +393,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
                hh.bsdh.len = htonl(sizeof(struct sdp_bsdh) + SDP_HH_SIZE);
                hh.max_adverts = 1;
                hh.majv_minv = SDP_MAJV_MINV;
-               hh.localrcvsz = hh.desremrcvsz = htonl(SDP_MAX_SEND_SKB_FRAGS *
+               hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_frags *
                                                       PAGE_SIZE + SDP_HEAD_SIZE);
                hh.max_adverts = 0x1;
                inet_sk(sk)->saddr = inet_sk(sk)->rcv_saddr =
@@ -421,7 +427,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
                hah.majv_minv = SDP_MAJV_MINV;
                hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec,
                                            but just in case */
-               hah.actrcvsz = htonl(SDP_MAX_SEND_SKB_FRAGS * PAGE_SIZE + SDP_HEAD_SIZE);
+               hah.actrcvsz = htonl(sdp_sk(child)->recv_frags * PAGE_SIZE + SDP_HEAD_SIZE);
                memset(&conn_param, 0, sizeof conn_param);
                conn_param.private_data_len = sizeof hah;
                conn_param.private_data = &hah;
index 321ba92c4b2120f9c4a9d7bfc2644210b303d4c0..ee6721dc22cda85f5fe3aaacd6703bcf260d7afa 100644 (file)
@@ -122,8 +122,6 @@ struct workqueue_struct *sdp_workqueue;
 static struct list_head sock_list;
 static spinlock_t sock_list_lock;
 
-extern atomic_t current_mem_usage;
-
 DEFINE_RWLOCK(device_removal_lock);
 
 inline void sdp_add_sock(struct sdp_sock *ssk)
@@ -195,7 +193,7 @@ static void sdp_destroy_qp(struct sdp_sock *ssk)
                        skb = sdp_recv_completion(ssk, ssk->rx_tail);
                        if (!skb)
                                break;
-                       atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &current_mem_usage);
+                       atomic_sub(SDP_MAX_SEND_SKB_FRAGS, &sdp_current_mem_usage);
                        __kfree_skb(skb);
                }
                while (ssk->tx_head != ssk->tx_tail) {
@@ -216,6 +214,9 @@ static void sdp_destroy_qp(struct sdp_sock *ssk)
        if (pd)
                ib_dealloc_pd(pd);
 
+       if (ssk->recv_frags)
+               sdp_remove_large_sock();
+
        kfree(ssk->rx_ring);
        kfree(ssk->tx_ring);
 }
@@ -1160,7 +1161,7 @@ new_segment:
                                        /* We can extend the last page
                                         * fragment. */
                                        merge = 1;
-                               } else if (i == SDP_MAX_SEND_SKB_FRAGS ||
+                               } else if (i == ssk->send_frags ||
                                           (!i &&
                                           !(sk->sk_route_caps & NETIF_F_SG))) {
                                        /* Need to add new fragment and cannot
@@ -1856,6 +1857,9 @@ static int __init sdp_proc_init(void)
        sdp_seq_afinfo.seq_fops->llseek        = seq_lseek;
        sdp_seq_afinfo.seq_fops->release       = seq_release_private;
 
+       p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops);
+       if (p)
+               p->data = &sdp_seq_afinfo;
        p = proc_net_fops_create(sdp_seq_afinfo.name, S_IRUGO, sdp_seq_afinfo.seq_fops);
        if (p)
                p->data = &sdp_seq_afinfo;
@@ -1912,6 +1916,7 @@ static int __init sdp_init(void)
 
        INIT_LIST_HEAD(&sock_list);
        spin_lock_init(&sock_list_lock);
+       spin_lock_init(&sdp_large_sockets_lock);
 
        sdp_workqueue = create_singlethread_workqueue("sdp");
        if (!sdp_workqueue) {
@@ -1935,7 +1940,7 @@ static int __init sdp_init(void)
 
        sdp_proc_init();
 
-       atomic_set(&current_mem_usage, 0);
+       atomic_set(&sdp_current_mem_usage, 0);
 
        ib_register_client(&sdp_client);
 
@@ -1955,9 +1960,9 @@ static void __exit sdp_exit(void)
 
        BUG_ON(!list_empty(&sock_list));
 
-       if (atomic_read(&current_mem_usage))
+       if (atomic_read(&sdp_current_mem_usage))
                printk(KERN_WARNING "%s: current mem usage %d\n", __func__,
-                      atomic_read(&current_mem_usage));
+                      atomic_read(&sdp_current_mem_usage));
 
        sdp_proc_unregister();