From d8bd5dfb5de44f079d3d4858d5aafbf092987f78 Mon Sep 17 00:00:00 2001 From: Wei Lin Guay Date: Fri, 9 Jun 2017 14:20:56 +0200 Subject: [PATCH] net/rds: use one sided reconnection during a race MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit reverts commit 812c02791add ("RDS: restore the exponential back-off scheme") to use one sided reconnection when a race is detected. When a race is detected, the active side reconnects as fast as possible, whereas the passive side wait for 15s. Orabug: 28068627 Signed-off-by: Wei Lin Guay Tested-by: Dib Chatterjee (cherry picked from commit 464c84386ab55a2700d963619a470a55e53a1b66 repo https://linux-git.us.oracle.com/UEK/linux-wguay-public) Signed-off-by: Brian Maly Conflicts: net/rds/ib_cm.c net/rds/rdma_transport.c net/rds/threads.c Made it checkpatch clean. Signed-off-by: HÃ¥kon Bugge Reviewed-by: Shannon Nelson Reviewed-by: Zhu Yanjun Signed-off-by: Brian Maly --- net/rds/connection.c | 4 ++-- net/rds/ib_cm.c | 18 ++++++++++++++++-- net/rds/rdma_transport.c | 14 ++++++++++++++ net/rds/rdma_transport.h | 2 +- net/rds/rds.h | 5 +++-- net/rds/threads.c | 27 ++++++++++++++++++++++++--- 6 files changed, 60 insertions(+), 10 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index e6417d8122933..89f2539c9be43 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -378,7 +378,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr, } EXPORT_SYMBOL_GPL(rds_conn_find); -void rds_conn_shutdown(struct rds_conn_path *cp) +void rds_conn_shutdown(struct rds_conn_path *cp, int restart) { struct rds_connection *conn = cp->cp_conn; @@ -447,7 +447,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) * conn - the reconnect is always triggered by the active peer. */ cancel_delayed_work_sync(&cp->cp_conn_w); rcu_read_lock(); - if (!hlist_unhashed(&conn->c_hash_node)) { + if (!hlist_unhashed(&conn->c_hash_node) && restart) { rcu_read_unlock(); rds_queue_reconnect(cp); } else { diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index d1e44efa0d822..603b295987bd3 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1071,13 +1071,27 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { unsigned long now = get_seconds(); + conn->c_reconnect_racing++; + /* When a race is detected, one side should fall back + * to passive and let the active side to reconnect. + * If the connection is in CONNECTING and still receive + * multiple back-to-back REQ, it means something is + * horribly wrong. Thus, drop the connection. + */ + if (conn->c_reconnect_racing > 5) { + rds_rtd_ptr(RDS_RTD_CM, + "RDS/IB: conn <%pI6c,%pI6c,%d> back-to-back REQ, reset\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos); + conn->c_reconnect_racing = 0; + rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING); /* After 15 seconds, give up on existing connection * attempts and make them try again. At this point * it's no longer a race but something has gone - * horribly wrong + * horribly wrong. */ - if (now > conn->c_connection_start && + } else if (now > conn->c_connection_start && now - conn->c_connection_start > 15) { rds_rtd_ptr(RDS_RTD_CM, "RDS/IB: connection <%pI6c,%pI6c,%d> racing for 15s, forcing reset", diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index ebf336b3e4ae7..6ae9aa0670f64 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -203,6 +203,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n", conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_reconnect_racing = 0; rds_conn_drop(conn, DR_IB_ROUTE_ERR); } break; @@ -217,6 +218,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, "ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n", conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_reconnect_racing = 0; rds_conn_drop(conn, DR_IB_ADDR_ERR); } break; @@ -229,6 +231,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, "CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n", conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_reconnect_racing = 0; rds_conn_drop(conn, DR_IB_CONNECT_ERR); } break; @@ -236,6 +239,15 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_REJECTED: err = (int *)event->param.conn.private_data; + if (conn && event->status == RDS_REJ_CONSUMER_DEFINED && + *err <= 1) { + conn->c_reconnect_racing++; + rds_rtd_ptr(RDS_RTD_ERR, + "conn %p, reconnect racing (%d) rds_conn_drop <%pI6c,%pI6c,%d>\n", + conn, conn->c_reconnect_racing, &conn->c_laddr, + &conn->c_faddr, conn->c_tos); + } + if (conn) { if (event->status == RDS_REJ_CONSUMER_DEFINED && (*err) == 0) { @@ -278,6 +290,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, "ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_reconnect_racing = 0; if (!rds_conn_self_loopback_passive(conn)) rds_conn_drop(conn, DR_IB_ADDR_CHANGE); } @@ -287,6 +300,7 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, rds_rtd_ptr(RDS_RTD_CM, "DISCONNECT event - dropping connection %pI6c->%pI6c tos %d\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos); + conn->c_reconnect_racing = 0; rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT); break; diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index ed08a41f1699b..8b95f1d446818 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -5,7 +5,7 @@ #include #include "rds.h" -#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 +#define RDS_RDMA_RESOLVE_TIMEOUT_MS RDS_RECONNECT_RETRY_MS /* Per IB specification 7.7.3, service level is a 4-bit field. */ #define TOS_TO_SL(tos) ((tos) & 0xF) diff --git a/net/rds/rds.h b/net/rds/rds.h index 3be84f04df834..59f6063c3032b 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -48,6 +48,8 @@ #define KERNEL_HAS_ATOMIC64 #endif +#define RDS_RECONNECT_RETRY_MS 15000 + #ifdef RDS_DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else @@ -333,7 +335,6 @@ struct rds_connection { wait_queue_head_t c_hs_waitq; /* handshake waitq */ - /* used by RDS_CONN_RESET */ struct list_head c_laddr_node; u32 c_my_gen_num; @@ -917,7 +918,7 @@ struct rds_connection *rds_conn_find(struct net *net, struct in6_addr *laddr, struct in6_addr *faddr, struct rds_transport *trans, u8 tos, int dev_if); -void rds_conn_shutdown(struct rds_conn_path *cp); +void rds_conn_shutdown(struct rds_conn_path *cp, int restart); void rds_conn_destroy(struct rds_connection *conn, int shutdown); void rds_conn_reset(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn, int reason); diff --git a/net/rds/threads.c b/net/rds/threads.c index bd1e64a71fc4b..cc58563c6ba97 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -158,8 +158,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, - rand % cp->cp_reconnect_jiffies); + if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr)) + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + else + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, + msecs_to_jiffies(100)); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); @@ -308,6 +311,7 @@ void rds_reconnect_timeout(struct work_struct *work) "conn <%pI6c,%pI6c,%d> not up, retry(%d)\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, cp->cp_reconnect_retry_count); + cp->cp_reconnect_racing = 0; rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT); } } @@ -332,7 +336,24 @@ void rds_shutdown_worker(struct work_struct *work) conn->c_tos, conn_drop_reason_str(cp->cp_drop_source)); - rds_conn_shutdown(cp); + /* if racing is detected, lower IP backs off and let the higher IP + * drives the reconnect (one-sided reconnect) + */ + if ((rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) || + rds_conn_self_loopback_passive(conn)) && + cp->cp_reconnect_racing) { + rds_rtd_ptr(RDS_RTD_CM, + "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n", + conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); + rds_conn_shutdown(cp, 0); + queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w, + msecs_to_jiffies(RDS_RECONNECT_RETRY_MS)); + } else { + rds_rtd_ptr(RDS_RTD_CM, + "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n", + conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); + rds_conn_shutdown(cp, 1); + } } void rds_threads_exit(void) -- 2.50.1