]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Revert "net/rds: Revert "RDS: add reconnect retry scheme for stalled
authorWei Lin Guay <wei.lin.guay@oracle.com>
Mon, 11 Sep 2017 13:17:28 +0000 (15:17 +0200)
committerChuck Anderson <chuck.anderson@oracle.com>
Wed, 13 Sep 2017 04:52:40 +0000 (21:52 -0700)
connections""

This commit restores commit 5acb959ad599 ("RDS: add reconnect retry scheme
for stalled connections").  Even though this retry scheme "workaround"
causes a long brownout time in the OVM configuration, it is needed to avoid
RDS loopback connections stalls after switch reboot in the bare-metal
system. As for now, the plan agreed with Exadata is to put back this commit
first and have a similar code path among QU6, QU5 and QU4.

Orabug: 26497333

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/rds_single_path.h
net/rds/sysctl.c
net/rds/threads.c

index 6a565fe9f324c008ae57ca678fe27675dbd5b60c..80ce8baa83b1c7de27137cf91e7845b8f051ee91 100644 (file)
@@ -260,6 +260,9 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 
                __rds_conn_path_init(conn, cp, is_outgoing);
                cp->cp_index = i;
+               cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
+               cp->cp_reconnect_retry_count = 0;
+
                if (conn->c_loopback)
                        cp->cp_wq = rds_local_wq;
                else
index 3fcf0734920086e2828d59548642b662899de31b..6a47845872624f67e3aca0ff7936f655330faf57 100644 (file)
@@ -937,22 +937,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
+                       unsigned long retry = conn->c_reconnect_retry;
 
-                       /*
-                        * after 15 seconds, give up on existing connection
-                        * attempts and make them try again.  At this point
-                        * it's no longer a race but something has gone
-                        * horribly wrong
+
+                       /* after retry seconds, give up on
+                        * existing connection attempts and try again.
+                        * At this point it's no longer backoff race but
+                        * something has gone horribly wrong.
                         */
+                       retry = DIV_ROUND_UP(retry, 1000);
                        if (now > conn->c_connection_start &&
-                           now - conn->c_connection_start > 15) {
-                               printk(KERN_CRIT "RDS/IB: connection "
-                                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
-                                       "racing for 15s, forcing reset ",
-                                       NIPQUAD(conn->c_laddr),
-                                       NIPQUAD(conn->c_faddr),
-                                       conn->c_tos);
-                               rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
+                           now - conn->c_connection_start > retry) {
+                               pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
+                                       &conn->c_laddr, &conn->c_faddr,
+                                       conn->c_tos, retry);
+                               set_bit(RDS_RECONNECT_TIMEDOUT,
+                                       &conn->c_reconn_flags);
+                               rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
                                /* Wait and see - our connect may still be succeeding */
index 2d673a1e09c3f14460dfcadbc03cf5f218d7b5a8..4dfd784a835516170e0903a2d9bb057d19475902 100644 (file)
@@ -308,8 +308,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos);
-                       if (!rds_conn_self_loopback_passive(conn))
+                       if (!rds_conn_self_loopback_passive(conn)) {
+                               queue_delayed_work(conn->c_path[0].cp_wq,
+                                                  &conn->c_reconn_w,
+                                                  msecs_to_jiffies(conn->c_reconnect_retry));
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
+                       }
                }
                break;
 
index 69ec690a99a5e31136980a4dd7cabaae6f03031f..0b74ca9934bb77511f09e64b41e405d26ddc2967 100644 (file)
@@ -145,6 +145,8 @@ enum {
 #define RDS_MPATH_WORKERS       8
 #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
                               (rs)->rs_hash_initval) & ((n) - 1))
+/* Bits for c_reconn_flags */
+#define RDS_RECONNECT_TIMEDOUT 0
 enum rds_conn_drop_src {
        /* rds-core */
        DR_DEFAULT,
@@ -318,15 +320,6 @@ struct rds_connection {
        unsigned int            c_version;
        struct net              *c_net;
 
-       /* Re-connect stall diagnostics */
-       unsigned long           c_reconnect_start;
-       unsigned int            c_reconnect_drops;
-       int                     c_reconnect_warn;
-       int                     c_reconnect_err;
-       int                     c_to_index;
-
-       unsigned int            c_reconnect;
-
        /* Qos support */
        u8                      c_tos;
 
@@ -1135,6 +1128,8 @@ extern unsigned long rds_sysctl_trace_flags;
 extern unsigned int  rds_sysctl_trace_level;
 extern unsigned int  rds_sysctl_shutdown_trace_start_time;
 extern unsigned int  rds_sysctl_shutdown_trace_end_time;
+extern unsigned long rds_sysctl_reconnect_retry_ms;
+extern unsigned int rds_sysctl_reconnect_max_retries;
 
 /* threads.c */
 int rds_threads_init(void);
index 2f06ee548eed8f7342116325a84573f6024022e2..1757a78a0fa2d397269fe14d19ac84e6d39fabb7 100644 (file)
@@ -21,6 +21,7 @@
 #define c_send_w               c_path[0].cp_send_w
 #define c_recv_w               c_path[0].cp_recv_w
 #define c_conn_w               c_path[0].cp_conn_w
+#define c_reconn_w             c_path[0].cp_reconn_w
 #define c_down_w               c_path[0].cp_down_w
 #define c_cm_lock              c_path[0].cp_cm_lock
 #define c_waitq                        c_path[0].cp_waitq
@@ -31,6 +32,8 @@
 #define c_acl_init             c_path[0].cp_acl_init
 #define c_connection_start     c_path[0].cp_connection_start
 #define c_reconnect_racing     c_path[0].cp_reconnect_racing
+#define c_reconnect_retry      c_path[0].cp_reconnect_retry
+#define c_reconn_flags         c_path[0].cp_reconn_flags
 #define c_reconnect            c_path[0].cp_reconnect
 #define c_to_index             c_path[0].cp_to_index
 #define c_base_conn            c_path[0].cp_base_conn
index b22e8b8b6b89dd48ba5674b4acd5113b64292998..64b4e77f785530c9c9f2012d68ee52440362764f 100644 (file)
@@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1;
 unsigned int rds_sysctl_shutdown_trace_start_time;
 unsigned int rds_sysctl_shutdown_trace_end_time;
 
+unsigned long rds_sysctl_reconnect_retry_ms = 1000;
+static unsigned long reconnect_retry_ms_min = 100;
+static unsigned long reconnect_retry_ms_max = 15000;
+
+unsigned int rds_sysctl_reconnect_max_retries = 60;
+static unsigned long reconnect_min_retries = 15;
+
 /*
  * We have official values, but must maintain the sysctl interface for existing
  * software that expects to find these values here.
@@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
+
+       },
+       {
+               .procname       = "reconnect_retry_ms",
+               .data           = &rds_sysctl_reconnect_retry_ms,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &reconnect_retry_ms_min,
+               .extra2         = &reconnect_retry_ms_max,
+       },
+       {
+               .procname       = "reconnect_max_retries",
+               .data           = &rds_sysctl_reconnect_max_retries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &reconnect_min_retries,
+               .extra2         = &rds_sysctl_reconnect_max_retries,
        },
        { }
 };
index dc76c0c90df30760458f7ee19d47477b4951b9ee..28b2f03948320ef4bb979a97c9b40ac6e8b995ff 100644 (file)
@@ -93,6 +93,8 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
        cp->cp_reconnect_jiffies = 0;
+       cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
+       cp->cp_reconnect_retry_count = 0;
        set_bit(0, &conn->c_map_queued);
        queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
        queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0);
@@ -146,7 +148,8 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
                return;
 
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
-       if (cp->cp_reconnect_jiffies == 0) {
+       if (cp->cp_reconnect_jiffies == 0 ||
+           test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags)) {
                cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
                return;
@@ -316,12 +319,28 @@ void rds_reconnect_timeout(struct work_struct *work)
                                                cp_reconn_w.work);
        struct rds_connection *conn = cp->cp_conn;
 
-       if (!rds_conn_path_up(cp)) {
-               rds_rtd(RDS_RTD_CM,
-                       "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
+       if (cp->cp_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
+               pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
                        &conn->c_laddr, &conn->c_faddr, conn->c_tos,
                        cp->cp_reconnect_retry_count);
-               rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
+               return;
+       }
+
+       if (!rds_conn_up(conn)) {
+               if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
+                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
+                                          msecs_to_jiffies(100));
+               } else {
+                       cp->cp_reconnect_retry_count++;
+                       rds_rtd(RDS_RTD_CM,
+                               "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
+                               &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                               cp->cp_reconnect_retry_count);
+                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
+                                          msecs_to_jiffies(cp->cp_reconnect_retry));
+                       set_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags);
+                       rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
+               }
        }
 }