From 5acb959ad59966b0b6905802ed720d26c560c3c5 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 11 Jul 2016 23:39:45 -0700 Subject: [PATCH] RDS: add reconnect retry scheme for stalled connections RDS IB connections gets stalled at times and letting the connections take its sweet time to reconnect. On passive side, we wait for 15 seconds for such stalled connections which is too slow based on application IO timeouts. IB connections are established in milliseconds so we better drop these stuck connections early and retry. The retry timeout is kept tunable via reconnect_retry_ms sysctl. The upper bound for retries is tunbale via rds_sysctl_reconnect_max_retries. Orabug: 22347191 Tested-by: Michael Nowak Tested-by: Rafael Alejandro Peralez Tested-by: Liwen Huang Tested-by: Hong Liu Reviewed-by: Mukesh Kacker Signed-off-by: Santosh Shilimkar --- net/rds/connection.c | 3 +++ net/rds/ib_cm.c | 27 ++++++++++++++------------- net/rds/rdma_transport.c | 5 ++++- net/rds/rds.h | 8 ++++++++ net/rds/sysctl.c | 26 ++++++++++++++++++++++++++ net/rds/threads.c | 34 +++++++++++++++++++++++++--------- 6 files changed, 80 insertions(+), 23 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 241df31bedab..0d509765e383 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -215,6 +215,9 @@ static struct rds_connection *__rds_conn_create(struct net *net, } conn->c_trans = trans; + conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms; + conn->c_reconnect_retry_count = 0; + if (conn->c_loopback) conn->c_wq = rds_local_wq; else diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 8ea47b6af17a..13a3ef4e54d7 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -914,22 +914,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { unsigned long now = get_seconds(); + unsigned long retry = conn->c_reconnect_retry; - /* - * after 15 seconds, give up on existing connection - * attempts and make them try again. At this point - * it's no longer a race but something has gone - * horribly wrong + + /* after retry seconds, give up on + * existing connection attempts and try again. + * At this point it's no longer backoff race but + * something has gone horribly wrong. */ + retry = DIV_ROUND_UP(retry, 1000); if (now > conn->c_connection_start && - now - conn->c_connection_start > 15) { - printk(KERN_CRIT "RDS/IB: connection " - "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " - "racing for 15s, forcing reset ", - NIPQUAD(conn->c_laddr), - NIPQUAD(conn->c_faddr), - conn->c_tos); - rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING); + now - conn->c_connection_start > retry) { + pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos, retry); + set_bit(RDS_RECONNECT_TIMEDOUT, + &conn->c_reconn_flags); + rds_conn_drop(conn, DR_RECONNECT_TIMEOUT); rds_ib_stats_inc(s_ib_listen_closed_stale); } else { /* Wait and see - our connect may still be succeeding */ diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 6e1c49d059bf..16aa421a7c8c 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -305,8 +305,11 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); - if (!rds_conn_self_loopback_passive(conn)) + if (!rds_conn_self_loopback_passive(conn)) { + queue_delayed_work(conn->c_wq, &conn->c_reconn_w, + msecs_to_jiffies(conn->c_reconnect_retry)); rds_conn_drop(conn, DR_IB_ADDR_CHANGE); + } } break; diff --git a/net/rds/rds.h b/net/rds/rds.h index 81e20b0252ba..8db391d1c567 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -140,6 +140,9 @@ enum { #define RDS_RDMA_RESOLVE_TO_MAX_INDEX 5 #define RDS_ADDR_RES_TM_INDEX_MAX 5 +/* Bits for c_reconn_flags */ +#define RDS_RECONNECT_TIMEDOUT 0 + enum rds_conn_drop_src { /* rds-core */ DR_DEFAULT, @@ -272,6 +275,9 @@ struct rds_connection { possible_net_t c_net; /* Re-connect stall diagnostics */ + unsigned long c_reconn_flags; + unsigned long c_reconnect_retry; + unsigned int c_reconnect_retry_count; unsigned long c_reconnect_start; unsigned int c_reconnect_drops; int c_reconnect_warn; @@ -1039,6 +1045,8 @@ extern unsigned long rds_sysctl_trace_flags; extern unsigned int rds_sysctl_trace_level; extern unsigned int rds_sysctl_shutdown_trace_start_time; extern unsigned int rds_sysctl_shutdown_trace_end_time; +extern unsigned long rds_sysctl_reconnect_retry_ms; +extern unsigned int rds_sysctl_reconnect_max_retries; /* threads.c */ int rds_threads_init(void); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index b22e8b8b6b89..64b4e77f7855 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1; unsigned int rds_sysctl_shutdown_trace_start_time; unsigned int rds_sysctl_shutdown_trace_end_time; +unsigned long rds_sysctl_reconnect_retry_ms = 1000; +static unsigned long reconnect_retry_ms_min = 100; +static unsigned long reconnect_retry_ms_max = 15000; + +unsigned int rds_sysctl_reconnect_max_retries = 60; +static unsigned long reconnect_min_retries = 15; + /* * We have official values, but must maintain the sysctl interface for existing * software that expects to find these values here. @@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, + + }, + { + .procname = "reconnect_retry_ms", + .data = &rds_sysctl_reconnect_retry_ms, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &reconnect_retry_ms_min, + .extra2 = &reconnect_retry_ms_max, + }, + { + .procname = "reconnect_max_retries", + .data = &rds_sysctl_reconnect_max_retries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &reconnect_min_retries, + .extra2 = &rds_sysctl_reconnect_max_retries, }, { } }; diff --git a/net/rds/threads.c b/net/rds/threads.c index 38b5ee2c8b98..68fc403077d4 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -91,6 +91,8 @@ void rds_connect_path_complete(struct rds_connection *conn, int curr) conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); conn->c_reconnect_jiffies = 0; + conn->c_reconnect_retry = rds_sysctl_reconnect_retry_ms; + conn->c_reconnect_retry_count = 0; set_bit(0, &conn->c_map_queued); queue_delayed_work(conn->c_wq, &conn->c_send_w, 0); queue_delayed_work(conn->c_wq, &conn->c_recv_w, 0); @@ -138,7 +140,8 @@ void rds_queue_reconnect(struct rds_connection *conn) conn->c_reconnect_jiffies); set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); - if (conn->c_reconnect_jiffies == 0) { + if (conn->c_reconnect_jiffies == 0 || + test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags)) { conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; queue_delayed_work(conn->c_wq, &conn->c_conn_w, 0); return; @@ -284,15 +287,28 @@ void rds_reconnect_timeout(struct work_struct *work) struct rds_connection *conn = container_of(work, struct rds_connection, c_reconn_w.work); - /* if the higher IP has not reconnected, reset back to two-sided - * reconnect. - */ + if (conn->c_reconnect_retry_count > rds_sysctl_reconnect_max_retries) { + pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n", + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + conn->c_reconnect_retry_count); + return; + } + if (!rds_conn_up(conn)) { - rds_rtd(RDS_RTD_CM, - "conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", - NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), - conn->c_tos); - rds_conn_drop(conn, DR_RECONNECT_TIMEOUT); + if (rds_conn_up(conn) == RDS_CONN_DISCONNECTING) { + queue_delayed_work(conn->c_wq, &conn->c_reconn_w, + msecs_to_jiffies(100)); + } else { + conn->c_reconnect_retry_count++; + rds_rtd(RDS_RTD_CM, + "conn <%pI4,%pI4,%d> not up, retry(%d)\n", + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + conn->c_reconnect_retry_count); + queue_delayed_work(conn->c_wq, &conn->c_reconn_w, + msecs_to_jiffies(conn->c_reconnect_retry)); + set_bit(RDS_RECONNECT_TIMEDOUT, &conn->c_reconn_flags); + rds_conn_drop(conn, DR_RECONNECT_TIMEOUT); + } } } -- 2.50.1