]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/rds: Revert "RDS: add reconnect retry scheme for stalled connections"
authorWei Lin Guay <wei.lin.guay@oracle.com>
Fri, 28 Apr 2017 23:18:58 +0000 (01:18 +0200)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 23 Jun 2017 04:29:51 +0000 (21:29 -0700)
This reverts commit 5acb959ad59966b0b6905802ed720d26c560c3c5.

Commit "RDS: add reconnect retry scheme for stalled connections" introduces
the rds_reconnect_timeout to retry the connection establishment after
sysctl_reconnect_retry_ms (default is 1000ms). Nevertheless, this proactive
mechanism is overkilled and it is causing long brownout time in the
virtualized environment. In short, below are the justifications why commit
5acb959ad59966b0b6905802ed720d26c560c3c5 is reverted.

a) The retry counter starts ticking after RDS received an ADDR_CHANGE
event. After receiving an ADDR_CHANGE event, RDS needs to perform shutdown
via shutdown_worker. Then, initiate a new connection via connect_worker.
Eventually, a CM REQ is only sent out after rds received
RDMA_CM_EVENT_ADDR_RESOLVED and RDMA_CM_EVENT_ROUTE_RESOLVED events. If the
retry is made to cater for stalled connection due to missing CM messages,
the retry should only happen after a CM REQ is sent.  With the current
retry scheme (and with the default 1000 ms) that happens after ADDR_CHANGE
event, it introduces congestion in the single threaded workqueue.

b) Assuming that we modify the retry counter to start ticking after a CM
REQ message is sent out. By introducing another retry timeout, it
complicates the system tuning. Why? First, the sysctl_reconnect_retry_ms
relies on the underlying cma_response_timeout. Any modication of
cma_response_timeout requires to tune sysctl_reconnect_retry_ms. Second, it
is hard to find a universal timing that fits all configurations
(bare-metal, virtualized, mixed environment, and homo/heterogenous system).

Orabug: 25521901

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
Tested-by: Rosa Isela Lopez Romero <rosa.lopez@oracle.com>
Acked-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/sysctl.c
net/rds/threads.c

index 97639fb0d5ee17328477f33a5f3d6e7666a822ec..19e259945d38048ac532e49bcecde40eaef70dd4 100644 (file)
@@ -251,9 +251,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
                cp = &conn->c_path[i];
                __rds_conn_path_init(conn, cp, is_outgoing);
                cp->cp_index = i;
-               cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
-               cp->cp_reconnect_retry_count = 0;
-
                if (conn->c_loopback)
                        cp->cp_wq = rds_local_wq;
                else
index c4103f4c0ad057c19b434f1796e386ecfc65665b..4485f02d8119b03368fc85cc3cd8c96f0e389011 100644 (file)
@@ -206,7 +206,7 @@ static int rds_ib_match_acl(struct rdma_cm_id *cm_id, __be32 saddr)
        __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
        __be64 fsubnet = cm_id->route.path_rec->dgid.global.subnet_prefix;
        struct ib_cm_dpp dpp;
-       u32 addr; 
+       u32 addr;
 
        ib_cm_dpp_init(&dpp, cm_id->device, cm_id->port_num,
                       ntohs(cm_id->route.path_rec->pkey));
@@ -929,24 +929,22 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                        rds_ib_stats_inc(s_ib_listen_closed_stale);
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
-                       unsigned long retry =
-                                       conn->c_path[0].cp_reconnect_retry;
 
-
-                       /* after retry seconds, give up on
-                        * existing connection attempts and try again.
-                        * At this point it's no longer backoff race but
-                        * something has gone horribly wrong.
+                       /*
+                        * after 15 seconds, give up on existing connection
+                        * attempts and make them try again.  At this point
+                        * it's no longer a race but something has gone
+                        * horribly wrong
                         */
-                       retry = DIV_ROUND_UP(retry, 1000);
                        if (now > conn->c_connection_start &&
-                           now - conn->c_connection_start > retry) {
-                               pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n",
-                                       &conn->c_laddr, &conn->c_faddr,
-                                       conn->c_tos, retry);
-                               set_bit(RDS_RECONNECT_TIMEDOUT,
-                                       &conn->c_path[0].cp_reconn_flags);
-                               rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
+                           now - conn->c_connection_start > 15) {
+                               printk(KERN_CRIT "RDS/IB: connection "
+                                       "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
+                                       "racing for 15s, forcing reset ",
+                                       NIPQUAD(conn->c_laddr),
+                                       NIPQUAD(conn->c_faddr),
+                                       conn->c_tos);
+                               rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
                                rds_ib_stats_inc(s_ib_listen_closed_stale);
                        } else {
                                /* Wait and see - our connect may still be succeeding */
index 8b472640f82d3cca42ddd2c83cd174cb2bc7edbb..4b4e442aef1d4790302a5f1f9ea9a1d6cdfbfaa3 100644 (file)
@@ -306,12 +306,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
                                "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                                NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                                conn->c_tos);
-                       if (!rds_conn_self_loopback_passive(conn)) {
-                               queue_delayed_work(conn->c_path[0].cp_wq,
-                                                  &conn->c_path[0].cp_reconn_w,
-                                                  msecs_to_jiffies(conn->c_path[0].cp_reconnect_retry));
+                       if (!rds_conn_self_loopback_passive(conn))
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
-                       }
                }
                break;
 
index 5cb7109da4bd99dde363a8c67dcd00b896eb55bd..9c2ec849d22e3853f6459d4411d0942cccdccc58 100644 (file)
@@ -140,14 +140,10 @@ enum {
 #define RDS_RDMA_RESOLVE_TO_MAX_INDEX   5
 #define RDS_ADDR_RES_TM_INDEX_MAX 5
 
-/* Bits for c_reconn_flags */
-#define RDS_RECONNECT_TIMEDOUT 0
-
 /* Max number of multipaths per RDS connection. Must be a power of 2 */
 #define RDS_MPATH_WORKERS       8
 #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \
                               (rs)->rs_hash_initval) & ((n) - 1))
-
 enum rds_conn_drop_src {
        /* rds-core */
        DR_DEFAULT,
@@ -320,6 +316,15 @@ struct rds_connection {
        unsigned int            c_version;
        struct net              *c_net;
 
+       /* Re-connect stall diagnostics */
+       unsigned long           c_reconnect_start;
+       unsigned int            c_reconnect_drops;
+       int                     c_reconnect_warn;
+       int                     c_reconnect_err;
+       int                     c_to_index;
+
+       unsigned int            c_reconnect;
+
        /* Qos support */
        u8                      c_tos;
 
@@ -1126,8 +1131,6 @@ extern unsigned long rds_sysctl_trace_flags;
 extern unsigned int  rds_sysctl_trace_level;
 extern unsigned int  rds_sysctl_shutdown_trace_start_time;
 extern unsigned int  rds_sysctl_shutdown_trace_end_time;
-extern unsigned long rds_sysctl_reconnect_retry_ms;
-extern unsigned int rds_sysctl_reconnect_max_retries;
 
 /* threads.c */
 int rds_threads_init(void);
index 64b4e77f785530c9c9f2012d68ee52440362764f..b22e8b8b6b89dd48ba5674b4acd5113b64292998 100644 (file)
@@ -52,13 +52,6 @@ unsigned int rds_sysctl_ping_enable = 1;
 unsigned int rds_sysctl_shutdown_trace_start_time;
 unsigned int rds_sysctl_shutdown_trace_end_time;
 
-unsigned long rds_sysctl_reconnect_retry_ms = 1000;
-static unsigned long reconnect_retry_ms_min = 100;
-static unsigned long reconnect_retry_ms_max = 15000;
-
-unsigned int rds_sysctl_reconnect_max_retries = 60;
-static unsigned long reconnect_min_retries = 15;
-
 /*
  * We have official values, but must maintain the sysctl interface for existing
  * software that expects to find these values here.
@@ -133,25 +126,6 @@ static struct ctl_table rds_sysctl_rds_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
-
-       },
-       {
-               .procname       = "reconnect_retry_ms",
-               .data           = &rds_sysctl_reconnect_retry_ms,
-               .maxlen         = sizeof(unsigned long),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &reconnect_retry_ms_min,
-               .extra2         = &reconnect_retry_ms_max,
-       },
-       {
-               .procname       = "reconnect_max_retries",
-               .data           = &rds_sysctl_reconnect_max_retries,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &reconnect_min_retries,
-               .extra2         = &rds_sysctl_reconnect_max_retries,
        },
        { }
 };
index dda33cda1331212c3c27e8e418bd68ccac0bc30d..dc76c0c90df30760458f7ee19d47477b4951b9ee 100644 (file)
@@ -93,8 +93,6 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
        cp->cp_reconnect_jiffies = 0;
-       cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms;
-       cp->cp_reconnect_retry_count = 0;
        set_bit(0, &conn->c_map_queued);
        queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
        queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0);
@@ -148,8 +146,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
                return;
 
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
-       if (cp->cp_reconnect_jiffies == 0 ||
-           test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags)) {
+       if (cp->cp_reconnect_jiffies == 0) {
                cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
                return;
@@ -319,29 +316,12 @@ void rds_reconnect_timeout(struct work_struct *work)
                                                cp_reconn_w.work);
        struct rds_connection *conn = cp->cp_conn;
 
-       if (cp->cp_reconnect_retry_count > rds_sysctl_reconnect_max_retries) {
-               pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n",
+       if (!rds_conn_path_up(cp)) {
+               rds_rtd(RDS_RTD_CM,
+                       "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
                        &conn->c_laddr, &conn->c_faddr, conn->c_tos,
                        cp->cp_reconnect_retry_count);
-               return;
-       }
-
-       if (!rds_conn_path_up(cp)) {
-               if (rds_conn_path_state(cp) == RDS_CONN_DISCONNECTING) {
-                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
-                                          msecs_to_jiffies(100));
-               } else {
-                       cp->cp_reconnect_retry_count++;
-                       rds_rtd(RDS_RTD_CM,
-                               "conn <%pI4,%pI4,%d> not up, retry(%d)\n",
-                               &conn->c_laddr, &conn->c_faddr, conn->c_tos,
-                               cp->cp_reconnect_retry_count);
-                       queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
-                                          msecs_to_jiffies(
-                                          cp->cp_reconnect_retry));
-                       set_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags);
-                       rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
-               }
+               rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
        }
 }