]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
rds: Fix one-sided connect
authorHåkon Bugge <haakon.bugge@oracle.com>
Thu, 7 Mar 2019 11:39:22 +0000 (12:39 +0100)
committerBrian Maly <brian.maly@oracle.com>
Tue, 21 May 2019 22:00:26 +0000 (18:00 -0400)
The decision to designate a peer to be the active side did not take
loopback connections into account. Further, a bug in
rds_shutdown_worker where the passive side, in case of no reconnect
racing, did not attempt to restart the connection.

Orabug: 29391909

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Tested-by: Rosa Lopez <rosa.lopez@oracle.com>
Reviewed-by: Dag Moxnes <dag.moxnes@oracle.com>
---

v1 -> v2:
   * Incorporated review comments from Dag
   * Split the commit in two

Signed-off-by: Brian Maly <brian.maly@oracle.com>
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/threads.c

index b3137db27735e9153d4b00c00b0f842671df00b6..766b0e100c12eaedcbc1ce8a6fa439822a1f50b5 100644 (file)
@@ -1112,6 +1112,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
                        rds_conn_drop(conn, DR_IB_REQ_WHILE_CONN_UP);
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
+                       conn->c_reconnect_racing++;
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
                        conn->c_reconnect_racing++;
index d7a68a21b4ac265066758d0d6eaabefb9ebac292..08372651c2747330735eaeaede855bb04fdd924a 100644 (file)
@@ -117,13 +117,15 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                /* If the connection is being shut down, bail out
                 * right away. We return 0 so cm_id doesn't get
                 * destroyed prematurely */
-               if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
+               if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING ||
+                   rds_conn_state(conn) == RDS_CONN_ERROR) {
                        /* Reject incoming connections while we're tearing
                         * down an existing one. */
                        if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
                                ret = 1;
                        rds_rtd(RDS_RTD_CM, "Bailing, conn %p being shut down, ret: %d\n",
                                conn, ret);
+                       conn->c_reconnect_racing = 1;
                        goto out;
                }
        }
@@ -202,10 +204,10 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                break;
 
        case RDMA_CM_EVENT_ROUTE_ERROR:
-               /* IP might have been moved so flush the ARP entry and retry */
-               rds_ib_flush_arp_entry(&conn->c_faddr);
-
                if (conn) {
+                       /* IP might have been moved so flush the ARP entry and retry */
+                       rds_ib_flush_arp_entry(&conn->c_faddr);
+
                        rds_rtd_ptr(RDS_RTD_ERR,
                                    "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    conn, &conn->c_laddr,
@@ -313,7 +315,8 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                conn->c_reconnect_racing = 0;
                /* reset route resolution flag */
                conn->c_route = 1;
-               rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
+               if (!rds_conn_self_loopback_passive(conn))
+                       rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
                break;
 
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
index 3acc7814632872ee94200c6fc17cff33ebf6947a..db4660dde8c3bee67352ed3ebe7c7f36795c4c63 100644 (file)
@@ -99,6 +99,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
        queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
        queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0);
        queue_delayed_work(cp->cp_wq, &cp->cp_hb_w, 0);
+       cancel_delayed_work(&cp->cp_reconn_w);
        cp->cp_hb_start = 0;
 
        cp->cp_connection_start = get_seconds();
@@ -115,6 +116,15 @@ void rds_connect_complete(struct rds_connection *conn)
 }
 EXPORT_SYMBOL_GPL(rds_connect_complete);
 
+static bool rds_conn_is_active_peer(struct rds_connection *conn)
+{
+       bool greater_ip = rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0;
+       bool self_loopback = rds_conn_self_loopback_passive(conn);
+       bool passive = greater_ip || self_loopback;
+
+       return !passive;
+}
+
 /*
  * This random exponential backoff is relied on to eventually resolve racing
  * connects.
@@ -137,36 +147,41 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
 {
        struct rds_connection *conn = cp->cp_conn;
        bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
+       bool active = rds_conn_is_active_peer(conn);
+       uint64_t delay = 0;
 
        rds_rtd_ptr(RDS_RTD_CM_EXT,
                    "conn %p:%d <%pI6c,%pI6c,%d> reconnect jiffies %lu\n",
-                   conn, !!conn->c_passive, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                   conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
                    cp->cp_reconnect_jiffies);
 
        /* let peer with smaller addr initiate reconnect, to avoid duels */
        if (is_tcp && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
                return;
 
-       set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
-       if (cp->cp_reconnect_jiffies == 0) {
-               cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
-               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+       /* If we're the passive initiator and we're racing, let the
+        * active peer drive the reconnect
+        */
+       if (!active && cp->cp_reconnect_racing)
                return;
-       }
-
-       rds_rtd_ptr(RDS_RTD_CM_EXT,
-                   "delay %lu conn %p <%pI6c,%pI6c,%d>\n",
-                   cp->cp_reconnect_jiffies, conn, &conn->c_laddr,
-                   &conn->c_faddr, conn->c_tos);
 
-       if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0)
-               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+       if (cp->cp_reconnect_jiffies == 0)
+               cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
        else
-               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
-                                  cp->cp_reconnect_jiffies);
+               delay = cp->cp_reconnect_jiffies;
+
+       if (!active)
+               delay = rds_sysctl_reconnect_max_jiffies;
 
+       rds_rtd_ptr(RDS_RTD_CM_EXT,
+                   "conn %p:%d <%pI6c,%pI6c,%d> delay %llu reconnect jiffies %lu\n",
+                   conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                   delay, cp->cp_reconnect_jiffies);
+
+       set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+       queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay);
        cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
-                                       rds_sysctl_reconnect_max_jiffies);
+                                      rds_sysctl_reconnect_max_jiffies);
 }
 
 void rds_connect_worker(struct work_struct *work)
@@ -325,6 +340,7 @@ void rds_shutdown_worker(struct work_struct *work)
        unsigned long now = get_seconds();
        bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP;
        struct rds_connection *conn = cp->cp_conn;
+       bool restart = true;
 
        if ((now - cp->cp_reconnect_start >
                rds_sysctl_shutdown_trace_start_time) &&
@@ -340,21 +356,17 @@ void rds_shutdown_worker(struct work_struct *work)
        /* If racing is detected, the bigger IP backs off and lets the
         * smaller IP drive the reconnect (one-sided reconnect).
         */
-       if ((rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0 ||
-            rds_conn_self_loopback_passive(conn)) &&
-           cp->cp_reconnect_racing) {
-               rds_rtd_ptr(RDS_RTD_CM,
-                           "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n",
-                           conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
-               rds_conn_shutdown(cp, 0);
+       if (cp->cp_reconnect_racing)
+               restart = rds_conn_is_active_peer(conn);
+
+       rds_rtd_ptr(RDS_RTD_CM,
+                   "calling rds_conn_shutdown, conn %p restart: %d racing: %d <%pI6c,%pI6c,%d>\n",
+                   conn, restart, cp->cp_reconnect_racing,
+                   &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+       rds_conn_shutdown(cp, restart);
+       if (!restart)
                queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
                                   msecs_to_jiffies(RDS_RECONNECT_RETRY_MS));
-       } else {
-               rds_rtd_ptr(RDS_RTD_CM,
-                           "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n",
-                           conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
-               rds_conn_shutdown(cp, 1);
-       }
 }
 
 void rds_threads_exit(void)