]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: add reconnect retry scheme for stalled connections
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Tue, 12 Jul 2016 06:39:45 +0000 (23:39 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Wed, 12 Oct 2016 17:20:26 +0000 (10:20 -0700)
RDS IB connections gets stalled at times and letting the connections
take its sweet time to reconnect. On passive side, we wait for 15 seconds
for such stalled connections which is too slow based on application
IO timeouts. IB connections are established in milliseconds so we better
drop these stuck connections early and retry.

The retry timeout is kept tunable via reconnect_retry_ms sysctl. The
upper bound for retries is tunbale via rds_sysctl_reconnect_max_retries.

Orabug: 22347191

Tested-by: Michael Nowak <michael.nowak@oracle.com>
Tested-by: Rafael Alejandro Peralez <rafael.peralez@oracle.com>
Tested-by: Liwen Huang <liwen.huang@oracle.com>
Tested-by: Hong Liu <hong.x.liu@oracle.com>
Reviewed-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/sysctl.c
net/rds/threads.c

index 241df31bedab3bba3e9892bfd3fa837104415478..0d509765e383932a115da19010adfb4643c145fd 100644 (file)
@@ -215,6 +215,9 @@ static struct rds_connection *__rds_conn_create(struct net *net,
        }
 
        conn->c_trans = trans;
+       conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
+       conn->c_reconnect_retry_count = 0;
+
        if (conn->c_loopback)
                conn->c_wq = rds_local_wq;
        else
index 8ea47b6af17a3d39b00ac1bc0bb6a944260e833b..13a3ef4e54d7b739d239a0935263cbf4b7d097ae 100644 (file)
@@ -914,22 +914,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
+                       unsigned long retry = conn->c_reconnect_retry;
 
-                       /*
-                        * after 15 seconds, give up on existing connection
-                        * attempts and make them try again.  At this point
-                        * it's no longer a race but something has gone
-                        * horribly wrong
+
+                       /* after retry seconds, give up on
+                        * existing connection attempts and try again.
+                        * At this point it's no longer backoff race but
+                        * something has gone horribly wrong.
                         */
+                       retry = DIV_ROUND_UP(retry, 1000);
                        if (now > conn->c_connection_start &&
-                           now - conn->c_connection_start > 15) {
-                               printk(KERN_CRIT "RDS/IB: connection "
-                                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
-                                       "racing for 15s, forcing reset ",
-                                       NIPQUAD(conn->c_laddr),
-                                       NIPQUAD(conn->c_faddr),
-                                       conn->c_tos);
-                               rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
+                           now - conn->c_connection_start > retry) {
+                               pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
+                                       &conn->c_laddr, &conn->c_faddr,
+                                       conn->c_tos, retry);
+                               set_bit(RDS_RECONNECT_TIMEDOUT,
+                                       &conn->c_reconn_flags);
+                               rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
                                /* Wait and see - our connect may still be succeeding */
index 6e1c49d059bf33fb7cd5864a33a7e1f0f0722a71..16aa421a7c8c9842b25e57c647e8f15a932b82f1 100644 (file)
@@ -305,8 +305,11 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos);
-                       if (!rds_conn_self_loopback_passive(conn))
+                       if (!rds_conn_self_loopback_passive(conn)) {
+                               queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
+                                       msecs_to_jiffies(conn->c_reconnect_retry));
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
+                       }
                }
                break;
 
index 81e20b0252babdc9c69bedb3a44a86f032a55eb3..8db391d1c567a69aab05e2af44a8db5b95e0ac2b 100644 (file)
@@ -140,6 +140,9 @@ enum {
 #define RDS_RDMA_RESOLVE_TO_MAX_INDEX   5
 #define RDS_ADDR_RES_TM_INDEX_MAX 5
 
+/* Bits for c_reconn_flags */
+#define RDS_RECONNECT_TIMEDOUT 0
+
 enum rds_conn_drop_src {
        /* rds-core */
        DR_DEFAULT,
@@ -272,6 +275,9 @@ struct rds_connection {
        possible_net_t          c_net;
 
        /* Re-connect stall diagnostics */
+       unsigned long           c_reconn_flags;
+       unsigned long           c_reconnect_retry;
+       unsigned int            c_reconnect_retry_count;
        unsigned long           c_reconnect_start;
        unsigned int            c_reconnect_drops;
        int                     c_reconnect_warn;
@@ -1039,6 +1045,8 @@ extern unsigned long rds_sysctl_trace_flags;
 extern unsigned int  rds_sysctl_trace_level;
 extern unsigned int  rds_sysctl_shutdown_trace_start_time;
 extern unsigned int  rds_sysctl_shutdown_trace_end_time;
+extern unsigned long rds_sysctl_reconnect_retry_ms;
+extern unsigned int rds_sysctl_reconnect_max_retries;
 
 /* threads.c */
 int rds_threads_init(void);
index b22e8b8b6b89dd48ba5674b4acd5113b64292998..64b4e77f785530c9c9f2012d68ee52440362764f 100644 (file)
@@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1;
 unsigned int rds_sysctl_shutdown_trace_start_time;
 unsigned int rds_sysctl_shutdown_trace_end_time;
 
+unsigned long rds_sysctl_reconnect_retry_ms = 1000;
+static unsigned long reconnect_retry_ms_min = 100;
+static unsigned long reconnect_retry_ms_max = 15000;
+
+unsigned int rds_sysctl_reconnect_max_retries = 60;
+static unsigned long reconnect_min_retries = 15;
+
 /*
  * We have official values, but must maintain the sysctl interface for existing
  * software that expects to find these values here.
@@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
+
+       },
+       {
+               .procname       = "reconnect_retry_ms",
+               .data           = &rds_sysctl_reconnect_retry_ms,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &reconnect_retry_ms_min,
+               .extra2         = &reconnect_retry_ms_max,
+       },
+       {
+               .procname       = "reconnect_max_retries",
+               .data           = &rds_sysctl_reconnect_max_retries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &reconnect_min_retries,
+               .extra2         = &rds_sysctl_reconnect_max_retries,
        },
        { }
 };
index 38b5ee2c8b980e9f02841348d897a4e4eacd8b64..68fc403077d42d0573e1b16e289e8827438326dc 100644 (file)
@@ -91,6 +91,8 @@ void rds_connect_path_complete(struct rds_connection *conn, int curr)
                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
        conn->c_reconnect_jiffies = 0;
+       conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms;
+       conn->c_reconnect_retry_count = 0;
        set_bit(0, &conn->c_map_queued);
        queue_delayed_work(conn->c_wq, &conn->c_send_w, 0);
        queue_delayed_work(conn->c_wq, &conn->c_recv_w, 0);
@@ -138,7 +140,8 @@ void rds_queue_reconnect(struct rds_connection *conn)
                conn->c_reconnect_jiffies);
 
        set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
-       if (conn->c_reconnect_jiffies == 0) {
+       if (conn->c_reconnect_jiffies == 0 ||
+           test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags)) {
                conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
                queue_delayed_work(conn->c_wq, &conn->c_conn_w, 0);
                return;
@@ -284,15 +287,28 @@ void rds_reconnect_timeout(struct work_struct *work)
        struct rds_connection *conn =
                container_of(work, struct rds_connection, c_reconn_w.work);
 
-       /* if the higher IP has not reconnected, reset back to two-sided
-        * reconnect.
-        */
+       if (conn->c_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
+               pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
+                       &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                       conn->c_reconnect_retry_count);
+               return;
+       }
+
        if (!rds_conn_up(conn)) {
-               rds_rtd(RDS_RTD_CM,
-                       "conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
-                       NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
-                       conn->c_tos);
-               rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
+               if (rds_conn_up(conn) == RDS_CONN_DISCONNECTING) {
+                       queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
+                                          msecs_to_jiffies(100));
+               } else {
+                       conn->c_reconnect_retry_count++;
+                       rds_rtd(RDS_RTD_CM,
+                               "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
+                               &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+                               conn->c_reconnect_retry_count);
+                       queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
+                                          msecs_to_jiffies(conn->c_reconnect_retry));
+                       set_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags);
+                       rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
+               }
        }
 }