]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Revert "Revert "net/rds: Revert "RDS: add reconnect retry scheme for stalled"
authorHåkon Bugge <Haakon.Bugge@oracle.com>
Wed, 23 May 2018 09:21:08 +0000 (11:21 +0200)
committerBrian Maly <brian.maly@oracle.com>
Tue, 12 Jun 2018 00:36:19 +0000 (20:36 -0400)
This reverts commit fde8284e499b ("Revert "net/rds: Revert "RDS: add
reconnect retry scheme for stalled connections""").

Here is the commit message for the first revert:

<quote>
Commit 54312602fb26 ("RDS: add reconnect retry scheme for stalled
connections") introduces the rds_reconnect_timeout to retry the
connection establishment after sysctl_reconnect_retry_ms (default is
1000ms). Nevertheless, this proactive mechanism is overkilled and it
is causing long brownout time in the virtualized environment. In
short, below are the justifications why said commit is reverted.

a) The retry counter starts ticking after RDS received an ADDR_CHANGE
event. After receiving an ADDR_CHANGE event, RDS needs to perform shutdown
via shutdown_worker. Then, initiate a new connection via connect_worker.
Eventually, a CM REQ is only sent out after RDS received
RDMA_CM_EVENT_ADDR_RESOLVED and RDMA_CM_EVENT_ROUTE_RESOLVED events. If the
retry is made to cater for stalled connection due to missing CM messages,
the retry should only happen after a CM REQ is sent.  With the current
retry scheme (and with the default 1000 ms) that happens after ADDR_CHANGE
event, it introduces congestion in the single threaded workqueue.

b) Assuming that we modify the retry counter to start ticking after a
CM REQ message is sent out. By introducing another retry timeout, it
complicates the system tuning. Why? First, the
sysctl_reconnect_retry_ms relies on the underlying
cma_response_timeout. Any modication of cma_response_timeout requires
to tune sysctl_reconnect_retry_ms. Second, it is hard to find a
universal timing that fits all configurations (bare-metal,
virtualized, mixed environment, and homo/heterogenous system).
</quote>

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/threads.c

Fixed checkpatch warnings.

Orabug: 28068627

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/rds_single_path.h
net/rds/sysctl.c
net/rds/threads.c

index cd0817022abdb1a4e43ecbcfdd591c1b1f760d10..e6417d8122933610eb40bd73f0d384cf53242b14 100644 (file)
@@ -268,9 +268,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
                __rds_conn_path_init(conn, cp, is_outgoing);
                cp->cp_index = i;
-               cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
-               cp->cp_reconnect_retry_count = 0;
-
                if (conn->c_loopback)
                        cp->cp_wq = rds_local_wq;
                else
index ff08c1a17a9e21eaa2340d79440e98a015e05c54..d1e44efa0d8229870d90f973daadac3999d657b7 100644 (file)
@@ -1071,23 +1071,20 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
-                       unsigned long retry = conn->c_reconnect_retry;
 
-
-                       /* after retry seconds, give up on
-                        * existing connection attempts and try again.
-                        * At this point it's no longer backoff race but
-                        * something has gone horribly wrong.
+                       /* After 15 seconds, give up on existing connection
+                        * attempts and make them try again.  At this point
+                        * it's no longer a race but something has gone
+                        * horribly wrong
                         */
-                       retry = DIV_ROUND_UP(retry, 1000);
                        if (now > conn->c_connection_start &&
-                           now - conn->c_connection_start > retry) {
-                               pr_info("RDS/IB: conn <%pI6c,%pI6c,%d> racing for more than %lus, retry\n",
-                                       &conn->c_laddr, &conn->c_faddr,
-                                       conn->c_tos, retry);
-                               set_bit(RDS_RECONNECT_TIMEDOUT,
-                                       &conn->c_reconn_flags);
-                               rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
+                           now - conn->c_connection_start > 15) {
+                               rds_rtd_ptr(RDS_RTD_CM,
+                                           "RDS/IB: connection <%pI6c,%pI6c,%d> racing for 15s, forcing reset",
+                                           &conn->c_laddr,
+                                           &conn->c_faddr,
+                                           conn->c_tos);
+                               rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
                                /* Wait and see - our connect may still be succeeding */
index 80f6a05517d76829dba3bab0ff99e2cea6298b89..ebf336b3e4ae7a6b48e8472c137ddf3a906125b7 100644 (file)
@@ -278,12 +278,8 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
                                    &conn->c_laddr, &conn->c_faddr,
                                    conn->c_tos);
-                       if (!rds_conn_self_loopback_passive(conn)) {
-                               queue_delayed_work(conn->c_path[0].cp_wq,
-                                                  &conn->c_reconn_w,
-                                                  msecs_to_jiffies(conn->c_reconnect_retry));
+                       if (!rds_conn_self_loopback_passive(conn))
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
-                       }
                }
                break;
 
index b6011ce79e50cdce3803b4742174622a6a0bf907..3be84f04df8340ea2b991cc9f9c0595e0f4fbc3d 100644 (file)
@@ -149,8 +149,6 @@ enum {
 #define RDS_MPATH_WORKERS       8
 #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
                               (rs)->rs_hash_initval) & ((n) - 1))
-/* Bits for c_reconn_flags */
-#define RDS_RECONNECT_TIMEDOUT 0
 enum rds_conn_drop_src {
        /* rds-core */
        DR_DEFAULT,
@@ -319,6 +317,15 @@ struct rds_connection {
        unsigned int            c_version;
        struct net              *c_net;
 
+       /* Re-connect stall diagnostics */
+       unsigned long           c_reconnect_start;
+       unsigned int            c_reconnect_drops;
+       int                     c_reconnect_warn;
+       int                     c_reconnect_err;
+       int                     c_to_index;
+
+       unsigned int            c_reconnect;
+
        /* Qos support */
        u8                      c_tos;
 
@@ -1139,8 +1146,6 @@ extern unsigned long rds_sysctl_trace_flags;
 extern unsigned int  rds_sysctl_trace_level;
 extern unsigned int  rds_sysctl_shutdown_trace_start_time;
 extern unsigned int  rds_sysctl_shutdown_trace_end_time;
-extern unsigned long rds_sysctl_reconnect_retry_ms;
-extern unsigned int rds_sysctl_reconnect_max_retries;
 
 /* threads.c */
 int rds_threads_init(void);
index 4b6f6398eb8808f994bd4732e46424cf4e1dc452..b9051724feff2d4e2074fce71e01c1725d12f45d 100644 (file)
@@ -21,7 +21,6 @@
 #define c_send_w               c_path[0].cp_send_w
 #define c_recv_w               c_path[0].cp_recv_w
 #define c_conn_w               c_path[0].cp_conn_w
-#define c_reconn_w             c_path[0].cp_reconn_w
 #define c_down_w               c_path[0].cp_down_w
 #define c_cm_lock              c_path[0].cp_cm_lock
 #define c_waitq                        c_path[0].cp_waitq
@@ -32,8 +31,6 @@
 #define c_acl_init             c_path[0].cp_acl_init
 #define c_connection_start     c_path[0].cp_connection_start
 #define c_reconnect_racing     c_path[0].cp_reconnect_racing
-#define c_reconnect_retry      c_path[0].cp_reconnect_retry
-#define c_reconn_flags         c_path[0].cp_reconn_flags
 #define c_reconnect            c_path[0].cp_reconnect
 #define c_to_index             c_path[0].cp_to_index
 #define c_acl_en               c_path[0].cp_acl_en
index 64b4e77f785530c9c9f2012d68ee52440362764f..b22e8b8b6b89dd48ba5674b4acd5113b64292998 100644 (file)
@@ -52,13 +52,6 @@ unsigned int rds_sysctl_ping_enable = 1;
 unsigned int rds_sysctl_shutdown_trace_start_time;
 unsigned int rds_sysctl_shutdown_trace_end_time;
 
-unsigned long rds_sysctl_reconnect_retry_ms = 1000;
-static unsigned long reconnect_retry_ms_min = 100;
-static unsigned long reconnect_retry_ms_max = 15000;
-
-unsigned int rds_sysctl_reconnect_max_retries = 60;
-static unsigned long reconnect_min_retries = 15;
-
 /*
  * We have official values, but must maintain the sysctl interface for existing
  * software that expects to find these values here.
@@ -133,25 +126,6 @@ static struct ctl_table rds_sysctl_rds_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
-
-       },
-       {
-               .procname       = "reconnect_retry_ms",
-               .data           = &rds_sysctl_reconnect_retry_ms,
-               .maxlen         = sizeof(unsigned long),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &reconnect_retry_ms_min,
-               .extra2         = &reconnect_retry_ms_max,
-       },
-       {
-               .procname       = "reconnect_max_retries",
-               .data           = &rds_sysctl_reconnect_max_retries,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &reconnect_min_retries,
-               .extra2         = &rds_sysctl_reconnect_max_retries,
        },
        { }
 };
index 854c2a0900a317e83e3f61a1e4e18dd856fbcaad..bd1e64a71fc4b1ac038662cb91639d6c2d4f4dea 100644 (file)
@@ -94,8 +94,6 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                    conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
        cp->cp_reconnect_jiffies = 0;
-       cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
-       cp->cp_reconnect_retry_count = 0;
        set_bit(0, &conn->c_map_queued);
        queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
        queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0);
@@ -148,8 +146,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
                return;
 
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
-       if (cp->cp_reconnect_jiffies == 0 ||
-           test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags)) {
+       if (cp->cp_reconnect_jiffies == 0) {
                cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
                return;
@@ -306,28 +303,12 @@ void rds_reconnect_timeout(struct work_struct *work)
                                                cp_reconn_w.work);
        struct rds_connection *conn = cp->cp_conn;
 
-       if (cp->cp_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
-               pr_info("RDS: connection <%pI6c,%pI6c,%d> reconnect retries(%d) exceeded, stop retry\n",
-                       &conn->c_laddr, &conn->c_faddr, conn->c_tos,
-                       cp->cp_reconnect_retry_count);
-               return;
-       }
-
-       if (!rds_conn_up(conn)) {
-               if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
-                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
-                                          msecs_to_jiffies(100));
-               } else {
-                       cp->cp_reconnect_retry_count++;
-                       rds_rtd_ptr(RDS_RTD_CM,
-                                   "conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
-                                   &conn->c_laddr, &conn->c_faddr, conn->c_tos,
-                                   cp->cp_reconnect_retry_count);
-                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
-                                          msecs_to_jiffies(cp->cp_reconnect_retry));
-                       set_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags);
-                       rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
-               }
+       if (!rds_conn_path_up(cp)) {
+               rds_rtd_ptr(RDS_RTD_CM,
+                           "conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
+                           &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                           cp->cp_reconnect_retry_count);
+               rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
        }
 }