]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/rds: use one sided reconnection during a race
authorWei Lin Guay <wei.lin.guay@oracle.com>
Fri, 9 Jun 2017 12:20:56 +0000 (14:20 +0200)
committerBrian Maly <brian.maly@oracle.com>
Tue, 12 Jun 2018 00:37:12 +0000 (20:37 -0400)
This commit reverts commit 812c02791add ("RDS: restore the exponential
back-off scheme") to use one sided reconnection when a race is
detected. When a race is detected, the active side reconnects as fast
as possible, whereas the passive side wait for 15s.

Orabug: 28068627

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
(cherry picked from commit 464c84386ab55a2700d963619a470a55e53a1b66
repo https://linux-git.us.oracle.com/UEK/linux-wguay-public)

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/threads.c

Made it checkpatch clean.

Signed-off-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rdma_transport.h
net/rds/rds.h
net/rds/threads.c

index e6417d8122933610eb40bd73f0d384cf53242b14..89f2539c9be4334933e4950dce7789af452a4ff4 100644 (file)
@@ -378,7 +378,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
 }
 EXPORT_SYMBOL_GPL(rds_conn_find);
 
-void rds_conn_shutdown(struct rds_conn_path *cp)
+void rds_conn_shutdown(struct rds_conn_path *cp, int restart)
 {
        struct rds_connection *conn = cp->cp_conn;
 
@@ -447,7 +447,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp)
         * conn - the reconnect is always triggered by the active peer. */
        cancel_delayed_work_sync(&cp->cp_conn_w);
        rcu_read_lock();
-       if (!hlist_unhashed(&conn->c_hash_node)) {
+       if (!hlist_unhashed(&conn->c_hash_node) && restart) {
                rcu_read_unlock();
                rds_queue_reconnect(cp);
        } else {
index d1e44efa0d8229870d90f973daadac3999d657b7..603b295987bd35667168172cb275702dbb9b23c8 100644 (file)
@@ -1071,13 +1071,27 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
+                       conn->c_reconnect_racing++;
 
+                       /* When a race is detected, one side should fall back
+                        * to passive and let the active side to reconnect.
+                        * If the connection is in CONNECTING and still receive
+                        * multiple back-to-back REQ, it means something is
+                        * horribly wrong. Thus, drop the connection.
+                        */
+                       if (conn->c_reconnect_racing > 5) {
+                               rds_rtd_ptr(RDS_RTD_CM,
+                                           "RDS/IB: conn <%pI6c,%pI6c,%d> back-to-back REQ, reset\n",
+                                           &conn->c_laddr, &conn->c_faddr,
+                                           conn->c_tos);
+                               conn->c_reconnect_racing = 0;
+                               rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
                        /* After 15 seconds, give up on existing connection
                         * attempts and make them try again.  At this point
                         * it's no longer a race but something has gone
-                        * horribly wrong
+                        * horribly wrong.
                         */
-                       if (now > conn->c_connection_start &&
+                       } else if (now > conn->c_connection_start &&
                            now - conn->c_connection_start > 15) {
                                rds_rtd_ptr(RDS_RTD_CM,
                                            "RDS/IB: connection <%pI6c,%pI6c,%d> racing for 15s, forcing reset",
index ebf336b3e4ae7a6b48e8472c137ddf3a906125b7..6ae9aa0670f64c31be2359a1fe7862f8cc33920f 100644 (file)
@@ -203,6 +203,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    conn, &conn->c_laddr,
                                    &conn->c_faddr, conn->c_tos);
+                       conn->c_reconnect_racing = 0;
                        rds_conn_drop(conn, DR_IB_ROUTE_ERR);
                }
                break;
@@ -217,6 +218,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    conn, &conn->c_laddr,
                                    &conn->c_faddr, conn->c_tos);
+                       conn->c_reconnect_racing = 0;
                        rds_conn_drop(conn, DR_IB_ADDR_ERR);
                }
                break;
@@ -229,6 +231,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    conn, &conn->c_laddr,
                                    &conn->c_faddr, conn->c_tos);
+                       conn->c_reconnect_racing = 0;
                        rds_conn_drop(conn, DR_IB_CONNECT_ERR);
                }
                break;
@@ -236,6 +239,15 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
        case RDMA_CM_EVENT_REJECTED:
                err = (int *)event->param.conn.private_data;
 
+               if (conn && event->status == RDS_REJ_CONSUMER_DEFINED &&
+                   *err <= 1) {
+                       conn->c_reconnect_racing++;
+                       rds_rtd_ptr(RDS_RTD_ERR,
+                                   "conn %p, reconnect racing (%d) rds_conn_drop <%pI6c,%pI6c,%d>\n",
+                                   conn, conn->c_reconnect_racing, &conn->c_laddr,
+                                   &conn->c_faddr, conn->c_tos);
+               }
+
                if (conn) {
                        if (event->status == RDS_REJ_CONSUMER_DEFINED &&
                            (*err) == 0) {
@@ -278,6 +290,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    &conn->c_laddr, &conn->c_faddr,
                                    conn->c_tos);
+                       conn->c_reconnect_racing = 0;
                        if (!rds_conn_self_loopback_passive(conn))
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
                }
@@ -287,6 +300,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                rds_rtd_ptr(RDS_RTD_CM,
                            "DISCONNECT event - dropping connection %pI6c->%pI6c tos %d\n",
                            &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+               conn->c_reconnect_racing = 0;
                rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
                break;
 
index ed08a41f1699bfc8b2bd395222c1491ebb781305..8b95f1d446818763d75cca72ae4071b6e39a6fd9 100644 (file)
@@ -5,7 +5,7 @@
 #include <rdma/rdma_cm.h>
 #include "rds.h"
 
-#define RDS_RDMA_RESOLVE_TIMEOUT_MS     5000
+#define RDS_RDMA_RESOLVE_TIMEOUT_MS     RDS_RECONNECT_RETRY_MS
 
 /* Per IB specification 7.7.3, service level is a 4-bit field. */
 #define TOS_TO_SL(tos) ((tos) & 0xF)
index 3be84f04df8340ea2b991cc9f9c0595e0f4fbc3d..59f6063c3032b5abf6c3c258eee78dcb1ac3ff0d 100644 (file)
@@ -48,6 +48,8 @@
 #define KERNEL_HAS_ATOMIC64
 #endif
 
+#define RDS_RECONNECT_RETRY_MS 15000
+
 #ifdef RDS_DEBUG
 #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
 #else
@@ -333,7 +335,6 @@ struct rds_connection {
        wait_queue_head_t       c_hs_waitq; /* handshake waitq */
 
 
-       /* used by RDS_CONN_RESET */
        struct list_head        c_laddr_node;
 
        u32                     c_my_gen_num;
@@ -917,7 +918,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr,
                                     struct in6_addr *faddr,
                                     struct rds_transport *trans, u8 tos,
                                     int dev_if);
-void rds_conn_shutdown(struct rds_conn_path *cp);
+void rds_conn_shutdown(struct rds_conn_path *cp, int restart);
 void rds_conn_destroy(struct rds_connection *conn, int shutdown);
 void rds_conn_reset(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn, int reason);
index bd1e64a71fc4b1ac038662cb91639d6c2d4f4dea..cc58563c6ba97b23bf1f69c044f24dd6b15cee4f 100644 (file)
@@ -158,8 +158,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
                    rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
                    conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
-       queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
-                          rand % cp->cp_reconnect_jiffies);
+       if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr))
+               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+       else
+               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
+                                  msecs_to_jiffies(100));
 
        cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
                                        rds_sysctl_reconnect_max_jiffies);
@@ -308,6 +311,7 @@ void rds_reconnect_timeout(struct work_struct *work)
                            "conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
                            &conn->c_laddr, &conn->c_faddr, conn->c_tos,
                            cp->cp_reconnect_retry_count);
+               cp->cp_reconnect_racing = 0;
                rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
        }
 }
@@ -332,7 +336,24 @@ void rds_shutdown_worker(struct work_struct *work)
                        conn->c_tos,
                        conn_drop_reason_str(cp->cp_drop_source));
 
-       rds_conn_shutdown(cp);
+       /* if racing is detected, lower IP backs off and let the higher IP
+        * drives the reconnect (one-sided reconnect)
+        */
+       if ((rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) ||
+            rds_conn_self_loopback_passive(conn)) &&
+           cp->cp_reconnect_racing) {
+               rds_rtd_ptr(RDS_RTD_CM,
+                           "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n",
+                           conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+               rds_conn_shutdown(cp, 0);
+               queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
+                                  msecs_to_jiffies(RDS_RECONNECT_RETRY_MS));
+       } else {
+               rds_rtd_ptr(RDS_RTD_CM,
+                           "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n",
+                           conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+               rds_conn_shutdown(cp, 1);
+       }
 }
 
 void rds_threads_exit(void)