From: Wei Lin Guay Date: Mon, 15 May 2017 11:42:56 +0000 (+0200) Subject: net/rds: determine active/passive connection with IP addresses X-Git-Tag: v4.1.12-105.0.20170622_2100~1 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=1f2ea7a020a1e2ecf4c732352fb348b62934bea9;p=users%2Fjedix%2Flinux-maple.git net/rds: determine active/passive connection with IP addresses This patch changes RDS to use randomize backoff only in the first attempt to reconnect. This means both ends try to be active by sending out REQ to its peer in random t seconds. If the connection can't be established due to a race, the peer IP addresses comparison is used to determine active/passive connection establishment. (e.g IP_A > IP_B) The following description illustrates the connection establishment, t1randA: 192.168.1.A (active) --------------> 192.168.1.B (passive) t1randB: 192.168.1.A (passive) <------------- 192.168.1.B (active) t2 : 192.168.1.A (active) ---------------> REJ t3 : 192.168.1.B (active) ---------------> REJ t4 : Connection between A,B is not up. t5 : 192.168.1.A (active) --------------> 192.168.1.B (passive) Orabug: 25521901 Signed-off-by: Wei Lin Guay Reviewed-by: Håkon Bugge Suggested-by : Håkon Bugge Tested-by: Dib Chatterjee Tested-by: Rosa Isela Lopez Romero --- diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 4485f02d8119b..6ca5ce32e1de8 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -914,6 +914,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, * see the comment above rds_queue_reconnect() */ mutex_lock(&conn->c_cm_lock); + if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN) && + (conn->c_laddr < conn->c_faddr)) { + rds_rtd(RDS_RTD_CM_EXT_P, + "incoming passive connection is trying to connect %p\n", + conn); + rds_conn_drop(conn, DR_IB_CONN_DROP_RACE); + goto out; + } if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { /* * in both of the cases below, the conn is half setup. @@ -944,10 +952,39 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + "racing for 15s, forcing reset\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING); rds_ib_stats_inc(s_ib_listen_closed_stale); } else { /* Wait and see - our connect may still be succeeding */ + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + " will be rejected\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); + if (test_and_clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags) || + (conn->c_laddr > conn->c_faddr) || + rds_conn_self_loopback_passive(conn)) { + rds_rtd(RDS_RTD_CM, "RDS/IB: connection " + " id %p conn %p " + "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " + " will be rejected as passive conn\n", + cm_id, conn, + NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), + conn->c_tos); + rds_conn_drop(conn, DR_IB_CONN_DROP_RACE); + } rds_ib_stats_inc(s_ib_connect_raced); } } diff --git a/net/rds/rds.h b/net/rds/rds.h index 53e71e1b41b3f..b7a0227b20ba6 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -136,6 +136,7 @@ enum { #define RDS_IN_XMIT 2 #define RDS_RECV_REFILL 3 #define RDS_DESTROY_PENDING 4 +#define RDS_INITIAL_RECONNECT 5 #define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5 #define RDS_ADDR_RES_TM_INDEX_MAX 5 diff --git a/net/rds/threads.c b/net/rds/threads.c index c8a3861052cab..7a04853fd4a1b 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -134,6 +134,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete); */ void rds_queue_reconnect(struct rds_conn_path *cp) { + unsigned long delay = 0; unsigned long rand; struct rds_connection *conn = cp->cp_conn; bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP; @@ -149,20 +150,22 @@ void rds_queue_reconnect(struct rds_conn_path *cp) set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { + set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags); cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies); return; } - get_random_bytes(&rand, sizeof(rand)); + clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags); + if ((conn->c_laddr > conn->c_faddr) || + rds_conn_self_loopback_passive(conn)) + delay = msecs_to_jiffies(15000); rds_rtd(RDS_RTD_CM_EXT, "%lu delay %lu ceil conn %p for %pI4 -> %pI4 tos %d\n", - rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, + delay, cp->cp_reconnect_jiffies, conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, - rand % cp->cp_reconnect_jiffies); - + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, rds_sysctl_reconnect_max_jiffies); }