]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/rds: determine active/passive connection with IP addresses
authorWei Lin Guay <wei.lin.guay@oracle.com>
Mon, 15 May 2017 11:42:56 +0000 (13:42 +0200)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 23 Jun 2017 04:30:08 +0000 (21:30 -0700)
This patch changes RDS to use randomize backoff only in the first attempt
to reconnect. This means both ends try to be active by sending out REQ to
its peer in random t seconds. If the connection can't be established due to
a race, the peer IP addresses comparison is used to determine
active/passive connection establishment. (e.g IP_A > IP_B)

The following description illustrates the connection establishment,

t1randA: 192.168.1.A (active)  --------------> 192.168.1.B (passive)
t1randB: 192.168.1.A (passive) <-------------  192.168.1.B (active)
t2     : 192.168.1.A (active) ---------------> REJ
t3     : 192.168.1.B (active) ---------------> REJ
t4     : Connection between A,B is not up.
t5     : 192.168.1.A (active) --------------> 192.168.1.B (passive)

Orabug: 25521901

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Suggested-by : Håkon Bugge <haakon.bugge@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
Tested-by: Rosa Isela Lopez Romero <rosa.lopez@oracle.com>
net/rds/ib_cm.c
net/rds/rds.h
net/rds/threads.c

index 4485f02d8119b03368fc85cc3cd8c96f0e389011..6ca5ce32e1de816044423ad8df922f5d28f2d47a 100644 (file)
@@ -914,6 +914,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
         * see the comment above rds_queue_reconnect()
         */
        mutex_lock(&conn->c_cm_lock);
+       if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN) &&
+           (conn->c_laddr < conn->c_faddr)) {
+               rds_rtd(RDS_RTD_CM_EXT_P,
+                       "incoming passive connection is trying to connect %p\n",
+                       conn);
+               rds_conn_drop(conn, DR_IB_CONN_DROP_RACE);
+               goto out;
+       }
        if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
                /*
                 * in both of the cases below, the conn is half setup.
@@ -944,10 +952,39 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                                        NIPQUAD(conn->c_laddr),
                                        NIPQUAD(conn->c_faddr),
                                        conn->c_tos);
+                               rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
+                                       " id %p conn %p "
+                                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
+                                       "racing for 15s, forcing reset\n",
+                                       cm_id, conn,
+                                       NIPQUAD(conn->c_laddr),
+                                       NIPQUAD(conn->c_faddr),
+                                       conn->c_tos);
                                rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
                                /* Wait and see - our connect may still be succeeding */
+                               rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
+                                       " id %p conn %p "
+                                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
+                                       " will be rejected\n",
+                                       cm_id, conn,
+                                       NIPQUAD(conn->c_laddr),
+                                       NIPQUAD(conn->c_faddr),
+                                       conn->c_tos);
+                               if (test_and_clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags) ||
+                                   (conn->c_laddr > conn->c_faddr) ||
+                                   rds_conn_self_loopback_passive(conn)) {
+                                       rds_rtd(RDS_RTD_CM, "RDS/IB: connection "
+                                               " id %p conn %p "
+                                               "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
+                                               " will be rejected as passive conn\n",
+                                               cm_id, conn,
+                                               NIPQUAD(conn->c_laddr),
+                                               NIPQUAD(conn->c_faddr),
+                                               conn->c_tos);
+                                       rds_conn_drop(conn, DR_IB_CONN_DROP_RACE);
+                               }
                                rds_ib_stats_inc(s_ib_connect_raced);
                        }
                }
index 53e71e1b41b3f6c9aeeeff7ac42547b5ef3dd576..b7a0227b20ba666908ff83df615ab07fb3fb56cd 100644 (file)
@@ -136,6 +136,7 @@ enum {
 #define RDS_IN_XMIT            2
 #define RDS_RECV_REFILL                3
 #define RDS_DESTROY_PENDING    4
+#define RDS_INITIAL_RECONNECT  5
 
 #define RDS_RDMA_RESOLVE_TO_MAX_INDEX   5
 #define RDS_ADDR_RES_TM_INDEX_MAX 5
index c8a3861052caba57f9effb0348bb6e484f569e8c..7a04853fd4a1b373debcd3a4e9199fc2f807f3a4 100644 (file)
@@ -134,6 +134,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
  */
 void rds_queue_reconnect(struct rds_conn_path *cp)
 {
+       unsigned long delay = 0;
        unsigned long rand;
        struct rds_connection *conn = cp->cp_conn;
        bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
@@ -149,20 +150,22 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
 
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
        if (cp->cp_reconnect_jiffies == 0) {
+               set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
                cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
-               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies);
                return;
        }
 
-       get_random_bytes(&rand, sizeof(rand));
+       clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags);
+       if ((conn->c_laddr > conn->c_faddr) ||
+           rds_conn_self_loopback_passive(conn))
+               delay = msecs_to_jiffies(15000);
        rds_rtd(RDS_RTD_CM_EXT,
                "%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n",
-               rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
+               delay, cp->cp_reconnect_jiffies,
                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
-       queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
-                          rand % cp->cp_reconnect_jiffies);
-
+       queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay);
        cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
                                        rds_sysctl_reconnect_max_jiffies);
 }