From 1f2ea7a020a1e2ecf4c732352fb348b62934bea9 Mon Sep 17 00:00:00 2001 From: Wei Lin Guay Date: Mon, 15 May 2017 13:42:56 +0200 Subject: [PATCH] net/rds: determine active/passive connection with IP addresses MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch changes RDS to use randomize backoff only in the first attempt to reconnect. This means both ends try to be active by sending out REQ to its peer in random t seconds. If the connection can't be established due to a race, the peer IP addresses comparison is used to determine active/passive connection establishment. (e.g IP_A > IP_B) The following description illustrates the connection establishment, t1randA: 192.168.1.A (active) --------------> 192.168.1.B (passive) t1randB: 192.168.1.A (passive) <------------- 192.168.1.B (active) t2 : 192.168.1.A (active) ---------------> REJ t3 : 192.168.1.B (active) ---------------> REJ t4 : Connection between A,B is not up. t5 : 192.168.1.A (active) --------------> 192.168.1.B (passive) Orabug: 25521901 Signed-off-by: Wei Lin Guay Reviewed-by: HÃ¥kon Bugge Suggested-by : HÃ¥kon Bugge Tested-by: Dib Chatterjee Tested-by: Rosa Isela Lopez Romero --- net/rds/ib_cm.c | 37 +++++++++++++++++++++++++++++++++++++ net/rds/rds.h | 1 + net/rds/threads.c | 15 +++++++++------ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 4485f02d8119..6ca5ce32e1de 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -914,6 +914,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, * see the comment above rds_queue_reconnect() */ mutex_lock(&conn->c_cm_lock); + if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN) && + (conn->c_laddr < conn->c_faddr)) { + rds_rtd(RDS_RTD_CM_EXT_P, + "incoming passive connection is trying to connect %p\n", + conn); + rds_conn_drop(conn, DR_IB_CONN_DROP_RACE); + goto out; + } if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { /* * in both of the cases below, the conn is half setup. @@ -944,10 +952,39 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + "racing for 15s, forcing reset\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING); rds_ib_stats_inc(s_ib_listen_closed_stale); } else { /* Wait and see - our connect may still be succeeding */ + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + " will be rejected\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); + if (test_and_clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags) || + (conn->c_laddr > conn->c_faddr) || + rds_conn_self_loopback_passive(conn)) { + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + " will be rejected as passive conn\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); + rds_conn_drop(conn, DR_IB_CONN_DROP_RACE); + } rds_ib_stats_inc(s_ib_connect_raced); } } diff --git a/net/rds/rds.h b/net/rds/rds.h index 53e71e1b41b3..b7a0227b20ba 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -136,6 +136,7 @@ enum { #define RDS_IN_XMIT 2 #define RDS_RECV_REFILL 3 #define RDS_DESTROY_PENDING 4 +#define RDS_INITIAL_RECONNECT 5 #define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5 #define RDS_ADDR_RES_TM_INDEX_MAX 5 diff --git a/net/rds/threads.c b/net/rds/threads.c index c8a3861052ca..7a04853fd4a1 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -134,6 +134,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete); */ void rds_queue_reconnect(struct rds_conn_path *cp) { + unsigned long delay = 0; unsigned long rand; struct rds_connection *conn = cp->cp_conn; bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP; @@ -149,20 +150,22 @@ void rds_queue_reconnect(struct rds_conn_path *cp) set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { + set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags); cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies); return; } - get_random_bytes(&rand, sizeof(rand)); + clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags); + if ((conn->c_laddr > conn->c_faddr) || + rds_conn_self_loopback_passive(conn)) + delay = msecs_to_jiffies(15000); rds_rtd(RDS_RTD_CM_EXT, "%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n", - rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, + delay, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, - rand % cp->cp_reconnect_jiffies); - + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); } -- 2.50.1