]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
rds: ib: Use a delay when reconnecting to the very same IP address
authorHåkon Bugge <haakon.bugge@oracle.com>
Wed, 2 Jan 2019 13:59:35 +0000 (14:59 +0100)
committerBrian Maly <brian.maly@oracle.com>
Tue, 22 Jan 2019 19:13:30 +0000 (14:13 -0500)
An RDS IB connection may be formed from the very same IB port using
HCA level internal loop-back. If this connection attempt is performed
after RDS has cleared the ARP cache of the same IP address, an ARP IB
multicast is sent out on the IPoIB interface.

If the above scenario is performed on IPoIB interfaces that are
members of an IB Limited Partition, the ARP multicast will be dropped
by the HCA port. A corresponding PKey Violation is counted and a
corresponding PKey Violation Trap is sent to the OpenSM, subject to
rate control.

Now, due to a bug in RDS connection management, where it was not
anticipated that the peers of a connection could actually be the very
same port and have the same IP address, the reconnect attempts happens
with zero delay.

This leads to about 7700 connection attempts per second, about
4400 PKey Violations per second, and 8500 ARP multicasts per second.

This commit reduces the reconnect rate down to one second. This
because the RDS uses exponential backoff to calculate the delay, which
will shortly end up at rds_sysctl_reconnect_max_jiffies, which by
default is HZ, in other words, a delay at one second after the 10
first reconnects.

Orabug: 29138813

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Ka-cheong Poon <ka-cheong.poon@oracle.com>
---

v1 -> v2:
   * Amended commit message as per Ka-Cheong's suggestions

Signed-off-by: Brian Maly <brian.maly@oracle.com>
net/rds/threads.c

index 12ac53c360cbda2a6ada09d87b8988cda9961cab..d828f1be63f7d37c5da165639b0a55788a5dd214 100644 (file)
@@ -134,7 +134,6 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
  */
 void rds_queue_reconnect(struct rds_conn_path *cp)
 {
-       unsigned long rand;
        struct rds_connection *conn = cp->cp_conn;
        bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
 
@@ -154,17 +153,16 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
                return;
        }
 
-       get_random_bytes(&rand, sizeof(rand));
        rds_rtd_ptr(RDS_RTD_CM_EXT,
-                   "%lu delay %lu ceil conn %p for %pI6c -> %pI6c tos %d\n",
-                   rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
-                   conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+                   "delay %lu conn %p for %pI6c -> %pI6c tos %d\n",
+                   cp->cp_reconnect_jiffies, conn, &conn->c_laddr,
+                   &conn->c_faddr, conn->c_tos);
 
-       if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
+       if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0)
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
        else
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
-                                  msecs_to_jiffies(100));
+                                  cp->cp_reconnect_jiffies);
 
        cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
                                        rds_sysctl_reconnect_max_jiffies);