From 1bc87d23681aa69494c15915f5941e2b5d905bab Mon Sep 17 00:00:00 2001 From: Wei Lin Guay Date: Mon, 15 May 2017 13:52:47 +0200 Subject: [PATCH] net/rds: prioritize the base connection establishment MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As of today, all the TOS connections can only be established after their base connections are up. This is due to the fact that TOS connections rely on their base connections to perform route resolution. Nevertheless, when all the connections drop/reconnect(e.g., ADDR_CHANGE event), the TOS connections establishment consume the CPU resources by constantly retrying the connection establishment until their base connections are up. Thus, this patch delays all the TOS connections if their associated base connections are not up. By doing so, the priority is given to the base connections establishment. Consequently, the base connections can be established faster and subsequent their associated TOS connections. Orabug: 25521901 Signed-off-by: Wei Lin Guay Reviewed-by: HÃ¥kon Bugge Reviewed-by: Ajaykumar Hotchandani Tested-by: Dib Chatterjee Tested-by: Rosa Isela Lopez Romero --- net/rds/connection.c | 19 +++++++++++++++---- net/rds/ib_cm.c | 17 ++++++++++++++++- net/rds/rds.h | 4 ++-- net/rds/threads.c | 22 +++++++++++++++------- 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 00d349b4491a..c31758b90711 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -159,7 +159,7 @@ static void __rds_conn_path_init(struct rds_connection *conn, INIT_DELAYED_WORK(&cp->cp_hb_w, rds_hb_worker); INIT_DELAYED_WORK(&cp->cp_reconn_w, rds_reconnect_timeout); INIT_DELAYED_WORK(&cp->cp_reject_w, rds_reject_worker); - INIT_WORK(&cp->cp_down_w, rds_shutdown_worker); + INIT_DELAYED_WORK(&cp->cp_down_w, rds_shutdown_worker); mutex_init(&cp->cp_cm_lock); cp->cp_flags = 0; } @@ -421,7 +421,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp, int restart) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node) && restart) { rcu_read_unlock(); - rds_queue_reconnect(cp); + rds_queue_reconnect(cp, DR_DEFAULT); } else { rcu_read_unlock(); } @@ -442,7 +442,7 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp, int shutdown) return; rds_conn_path_drop(cp, DR_CONN_DESTROY); - flush_work(&cp->cp_down_w); + flush_delayed_work(&cp->cp_down_w); /* now that conn down worker is flushed; there cannot be any * more posting of reconn timeout work. But cancel any already @@ -852,6 +852,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason) unsigned long now = get_seconds(); struct rds_connection *conn = cp->cp_conn; + unsigned long delay = 0; cp->cp_drop_source = reason; if (rds_conn_path_state(cp) == RDS_CONN_UP) { cp->cp_reconnect_start = now; @@ -891,13 +892,16 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason) atomic_set(&cp->cp_state, RDS_CONN_ERROR); + if ((conn->c_tos && reason == DR_IB_ADDR_CHANGE) || + reason == DR_IB_BASE_CONN_DOWN) + delay = msecs_to_jiffies(100); rds_rtd(RDS_RTD_CM_EXT, "RDS/%s: queueing shutdown work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); - queue_work(cp->cp_wq, &cp->cp_down_w); + queue_delayed_work(cp->cp_wq, &cp->cp_down_w, delay); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -918,11 +922,18 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp) if (rds_conn_path_state(cp) == RDS_CONN_DOWN && !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) { + if (conn->c_tos == 0 || + (conn->c_tos && rds_conn_state(cp->cp_base_conn) == RDS_CONN_UP)) { rds_rtd(RDS_RTD_CM_EXT, "queueing connect work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + } else + rds_rtd(RDS_RTD_CM_EXT, + "skip, base conn %p down, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", + cp->cp_base_conn, conn, NIPQUAD(conn->c_laddr), + NIPQUAD(conn->c_faddr), conn->c_tos); } } EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 6ca5ce32e1de..a217d978c894 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1144,9 +1144,24 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; struct sockaddr_in src, dest; - int ret; + int ret = 0; conn->c_route_resolved = 0; + + if (conn->c_tos) { + mutex_lock(&conn->c_base_conn->c_cm_lock); + if (!rds_conn_transition(conn->c_base_conn, RDS_CONN_UP, + RDS_CONN_UP)) { + rds_rtd(RDS_RTD_CM_EXT, + "RDS/IB: base conn %p (%p) is not up\n", + conn->c_base_conn, conn); + ret = DR_IB_BASE_CONN_DOWN; + } + mutex_unlock(&conn->c_base_conn->c_cm_lock); + if (ret) + goto out; + } + /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, diff --git a/net/rds/rds.h b/net/rds/rds.h index b7a0227b20ba..61c78639958c 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -253,7 +253,7 @@ struct rds_conn_path { struct delayed_work cp_reject_w; struct delayed_work cp_hb_w; struct delayed_work cp_reconn_w; - struct work_struct cp_down_w; + struct delayed_work cp_down_w; struct mutex cp_cm_lock; /* protect cp_state & cm */ wait_queue_head_t cp_waitq; @@ -1139,7 +1139,7 @@ void rds_threads_exit(void); extern struct workqueue_struct *rds_wq; extern struct workqueue_struct *rds_tos_wq; extern struct workqueue_struct *rds_local_wq; -void rds_queue_reconnect(struct rds_conn_path *cp); +void rds_queue_reconnect(struct rds_conn_path *cp, int reason); void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); diff --git a/net/rds/threads.c b/net/rds/threads.c index 7a04853fd4a1..bcf240e3dea1 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete); * We should *always* start with a random backoff; otherwise a broken connection * will always take several iterations to be re-established. */ -void rds_queue_reconnect(struct rds_conn_path *cp) +void rds_queue_reconnect(struct rds_conn_path *cp, int reason) { unsigned long delay = 0; unsigned long rand; @@ -140,23 +140,30 @@ void rds_queue_reconnect(struct rds_conn_path *cp) bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP; rds_rtd(RDS_RTD_CM_EXT, - "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn, + "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu %s\n", conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos, - cp->cp_reconnect_jiffies); + cp->cp_reconnect_jiffies, conn_drop_reason_str(reason)); /* let peer with smaller addr initiate reconnect, to avoid duels */ if (is_tcp && !IS_CANONICAL(conn->c_laddr, conn->c_faddr)) return; + if (reason == DR_IB_BASE_CONN_DOWN) { + cp->cp_reconnect_jiffies = 0; + delay = msecs_to_jiffies(500); + } + set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags); + get_random_bytes(&rand, sizeof(rand)); cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies); + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, + delay + (rand % cp->cp_reconnect_jiffies)); return; } - clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags); + clear_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags); if ((conn->c_laddr > conn->c_faddr) || rds_conn_self_loopback_passive(conn)) delay = msecs_to_jiffies(15000); @@ -203,7 +210,8 @@ void rds_connect_worker(struct work_struct *work) RDS_CONN_DOWN)) { rds_rtd(RDS_RTD_CM_EXT, "reconnecting..., conn %p\n", conn); - rds_queue_reconnect(cp); + rds_queue_reconnect(cp, ret == DR_IB_BASE_CONN_DOWN ? + DR_IB_BASE_CONN_DOWN : DR_DEFAULT); } else { rds_conn_path_drop(cp, DR_CONN_CONNECT_FAIL); } @@ -334,7 +342,7 @@ void rds_shutdown_worker(struct work_struct *work) { struct rds_conn_path *cp = container_of(work, struct rds_conn_path, - cp_down_w); + cp_down_w.work); unsigned long now = get_seconds(); bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP; struct rds_connection *conn = cp->cp_conn; -- 2.50.1