]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: update IB flow control algorithm
authorWei Lin Guay <wei.lin.guay@oracle.com>
Thu, 17 Dec 2015 08:34:33 +0000 (09:34 +0100)
committerChuck Anderson <chuck.anderson@oracle.com>
Sun, 17 Apr 2016 21:49:50 +0000 (14:49 -0700)
The current algorithm that uses 16 as a hard-coded value
in rds_ib_advertise_credits() doesn't serve the purpose, as
post_recvs() are performed in bulk. Thus, the test
condition will always be true.

This patch moves rds_ib_advertise_credits() in to the
post_recvs() loop. Instead of updating the post_recv credits
after all the post_recvs() have completed, the post_recv
credit is being updated in log2 incremental manner.
The proposed exponential quadrupling algorithm serves as a
good compromise between early start of the peer and at the
same time reducing the amount of explicit ACKs. The credit
update explicit ACKs will be generated starting from 16,
256, 4096...etc.

The performance number below shows that this new flow
control algorithm has minimal impact performance even though
it requires additional explicit ACKs.

4 QPs, 32t, 16d -q 8448 -a 256 (rds-parameter)

HCAs flow_ctl no flow_ctl
Mellanox CX3 744K 742K
Oracle QDR M4 819K 831K

Orabug: 22306628

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
net/rds/ib_recv.c
net/rds/ib_send.c

index 6eb1b3f795ec705147fdb0628df7665829b081c4..22a8e2bd028a5fc72d8411a2e4eef0f599d17a44 100644 (file)
@@ -578,6 +578,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
        struct rds_ib_recv_work *recv;
        struct ib_recv_wr *failed_wr;
        unsigned int posted = 0;
+       unsigned int flowctl_credits = 0;
+       /* For the time being, 16 seems to be a good starting number to
+        * perform flow control update.
+        */
+       unsigned int flow_cntl_log2_cnt = 16;
        int ret = 0;
        int can_wait = gfp & __GFP_WAIT;
        int must_wake = 0;
@@ -637,6 +642,27 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
                }
 
                posted++;
+               if (ic->i_flowctl) {
+                       flowctl_credits++;
+                       /* Decide whether to send an update to the peer now.
+                        * If we would send a credit update for every single
+                        * buffer we post, we would end up with an ACK
+                        * storm (ACK arrives,consumes buffer, we refill
+                        * the ring, send ACK to remote advertising the
+                        * newly posted buffer... ad inf)
+                        *
+                        * Performance pretty much depends on how often we send
+                        * credit updates - too frequent updates mean lots of
+                        * ACKs. Too infrequent updates, and the peer will run
+                        * out of credits and has to throttle.
+                        * For the time being, incremental cnt << 4 is used.
+                        */
+                       if (flowctl_credits == flow_cntl_log2_cnt) {
+                               rds_ib_advertise_credits(conn, flowctl_credits);
+                               flow_cntl_log2_cnt <<= 4;
+                               flowctl_credits = 0;
+                       }
+               }
 
                if ((posted > 128 && need_resched()) || posted > 8192) {
                        must_wake = 1;
@@ -649,8 +675,8 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
        ring_empty = rds_ib_ring_empty(&ic->i_recv_ring);
 
        /* We're doing flow control - update the window. */
-       if (ic->i_flowctl && posted)
-               rds_ib_advertise_credits(conn, posted);
+       if (ic->i_flowctl && flowctl_credits)
+               rds_ib_advertise_credits(conn, flowctl_credits);
 
        if (ret)
                rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
index a17d5ccacf7f3dacafc61802bbf3e306f2477f9f..621860fb091730c2e767e62fadb7d479447bb3fa 100644 (file)
@@ -498,8 +498,7 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
         */
        rds_rtd(RDS_RTD_FLOW_CNTRL, "ic->i_credits %u\n",
                IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)));
-       if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
-               set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
+       set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 }
 
 static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,