From: Wei Lin Guay Date: Mon, 11 Sep 2017 13:17:28 +0000 (+0200) Subject: Revert "net/rds: Revert "RDS: add reconnect retry scheme for stalled X-Git-Tag: v4.1.12-111.0.20170918_2215~202 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=50ee713e57e5c42933ad658de6855ba02bd96232;p=users%2Fjedix%2Flinux-maple.git Revert "net/rds: Revert "RDS: add reconnect retry scheme for stalled connections"" This commit restores commit 5acb959ad599 ("RDS: add reconnect retry scheme for stalled connections"). Even though this retry scheme "workaround" causes a long brownout time in the OVM configuration, it is needed to avoid RDS loopback connections stalls after switch reboot in the bare-metal system. As for now, the plan agreed with Exadata is to put back this commit first and have a similar code path among QU6, QU5 and QU4. Orabug: 26497333 Signed-off-by: Wei Lin Guay Reviewed-by: Ajaykumar Hotchandani --- diff --git a/net/rds/connection.c b/net/rds/connection.c index 6a565fe9f324c..80ce8baa83b1c 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -260,6 +260,9 @@ static struct rds_connection *__rds_conn_create(struct net *net, __rds_conn_path_init(conn, cp, is_outgoing); cp->cp_index = i; + cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms; + cp->cp_reconnect_retry_count = 0; + if (conn->c_loopback) cp->cp_wq = rds_local_wq; else diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 3fcf073492008..6a47845872624 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -937,22 +937,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, rds_ib_stats_inc(s_ib_listen_closed_stale); } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { unsigned long now = get_seconds(); + unsigned long retry = conn->c_reconnect_retry; - /* - * after 15 seconds, give up on existing connection - * attempts and make them try again. At this point - * it's no longer a race but something has gone - * horribly wrong + + /* after retry seconds, give up on + * existing connection attempts and try again. + * At this point it's no longer backoff race but + * something has gone horribly wrong. */ + retry = DIV_ROUND_UP(retry, 1000); if (now > conn->c_connection_start && - now - conn->c_connection_start > 15) { - printk(KERN_CRIT "RDS/IB: connection " - "<%u.%u.%u.%u,%u.%u.%u.%u,%d> " - "racing for 15s, forcing reset ", - NIPQUAD(conn->c_laddr), - NIPQUAD(conn->c_faddr), - conn->c_tos); - rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING); + now - conn->c_connection_start > retry) { + pr_info("RDS/IB: conn <%pI4,%pI4,%d> racing for more than %lus, retry\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos, retry); + set_bit(RDS_RECONNECT_TIMEDOUT, + &conn->c_reconn_flags); + rds_conn_drop(conn, DR_RECONNECT_TIMEOUT); rds_ib_stats_inc(s_ib_listen_closed_stale); } else { /* Wait and see - our connect may still be succeeding */ diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 2d673a1e09c3f..4dfd784a83551 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -308,8 +308,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, "ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n", NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr), conn->c_tos); - if (!rds_conn_self_loopback_passive(conn)) + if (!rds_conn_self_loopback_passive(conn)) { + queue_delayed_work(conn->c_path[0].cp_wq, + &conn->c_reconn_w, + msecs_to_jiffies(conn->c_reconnect_retry)); rds_conn_drop(conn, DR_IB_ADDR_CHANGE); + } } break; diff --git a/net/rds/rds.h b/net/rds/rds.h index 69ec690a99a5e..0b74ca9934bb7 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -145,6 +145,8 @@ enum { #define RDS_MPATH_WORKERS 8 #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ (rs)->rs_hash_initval) & ((n) - 1)) +/* Bits for c_reconn_flags */ +#define RDS_RECONNECT_TIMEDOUT 0 enum rds_conn_drop_src { /* rds-core */ DR_DEFAULT, @@ -318,15 +320,6 @@ struct rds_connection { unsigned int c_version; struct net *c_net; - /* Re-connect stall diagnostics */ - unsigned long c_reconnect_start; - unsigned int c_reconnect_drops; - int c_reconnect_warn; - int c_reconnect_err; - int c_to_index; - - unsigned int c_reconnect; - /* Qos support */ u8 c_tos; @@ -1135,6 +1128,8 @@ extern unsigned long rds_sysctl_trace_flags; extern unsigned int rds_sysctl_trace_level; extern unsigned int rds_sysctl_shutdown_trace_start_time; extern unsigned int rds_sysctl_shutdown_trace_end_time; +extern unsigned long rds_sysctl_reconnect_retry_ms; +extern unsigned int rds_sysctl_reconnect_max_retries; /* threads.c */ int rds_threads_init(void); diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h index 2f06ee548eed8..1757a78a0fa2d 100644 --- a/net/rds/rds_single_path.h +++ b/net/rds/rds_single_path.h @@ -21,6 +21,7 @@ #define c_send_w c_path[0].cp_send_w #define c_recv_w c_path[0].cp_recv_w #define c_conn_w c_path[0].cp_conn_w +#define c_reconn_w c_path[0].cp_reconn_w #define c_down_w c_path[0].cp_down_w #define c_cm_lock c_path[0].cp_cm_lock #define c_waitq c_path[0].cp_waitq @@ -31,6 +32,8 @@ #define c_acl_init c_path[0].cp_acl_init #define c_connection_start c_path[0].cp_connection_start #define c_reconnect_racing c_path[0].cp_reconnect_racing +#define c_reconnect_retry c_path[0].cp_reconnect_retry +#define c_reconn_flags c_path[0].cp_reconn_flags #define c_reconnect c_path[0].cp_reconnect #define c_to_index c_path[0].cp_to_index #define c_base_conn c_path[0].cp_base_conn diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index b22e8b8b6b89d..64b4e77f78553 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -52,6 +52,13 @@ unsigned int rds_sysctl_ping_enable = 1; unsigned int rds_sysctl_shutdown_trace_start_time; unsigned int rds_sysctl_shutdown_trace_end_time; +unsigned long rds_sysctl_reconnect_retry_ms = 1000; +static unsigned long reconnect_retry_ms_min = 100; +static unsigned long reconnect_retry_ms_max = 15000; + +unsigned int rds_sysctl_reconnect_max_retries = 60; +static unsigned long reconnect_min_retries = 15; + /* * We have official values, but must maintain the sysctl interface for existing * software that expects to find these values here. @@ -126,6 +133,25 @@ static struct ctl_table rds_sysctl_rds_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, + + }, + { + .procname = "reconnect_retry_ms", + .data = &rds_sysctl_reconnect_retry_ms, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &reconnect_retry_ms_min, + .extra2 = &reconnect_retry_ms_max, + }, + { + .procname = "reconnect_max_retries", + .data = &rds_sysctl_reconnect_max_retries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &reconnect_min_retries, + .extra2 = &rds_sysctl_reconnect_max_retries, }, { } }; diff --git a/net/rds/threads.c b/net/rds/threads.c index dc76c0c90df30..28b2f03948320 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -93,6 +93,8 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); cp->cp_reconnect_jiffies = 0; + cp->cp_reconnect_retry = rds_sysctl_reconnect_retry_ms; + cp->cp_reconnect_retry_count = 0; set_bit(0, &conn->c_map_queued); queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0); queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0); @@ -146,7 +148,8 @@ void rds_queue_reconnect(struct rds_conn_path *cp) return; set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); - if (cp->cp_reconnect_jiffies == 0) { + if (cp->cp_reconnect_jiffies == 0 || + test_and_clear_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags)) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); return; @@ -316,12 +319,28 @@ void rds_reconnect_timeout(struct work_struct *work) cp_reconn_w.work); struct rds_connection *conn = cp->cp_conn; - if (!rds_conn_path_up(cp)) { - rds_rtd(RDS_RTD_CM, - "conn <%pI4,%pI4,%d> not up, retry(%d)\n", + if (cp->cp_reconnect_retry_count > rds_sysctl_reconnect_max_retries) { + pr_info("RDS: connection <%pI4,%pI4,%d> reconnect retries(%d) exceeded, stop retry\n", &conn->c_laddr, &conn->c_faddr, conn->c_tos, cp->cp_reconnect_retry_count); - rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT); + return; + } + + if (!rds_conn_up(conn)) { + if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) { + queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w, + msecs_to_jiffies(100)); + } else { + cp->cp_reconnect_retry_count++; + rds_rtd(RDS_RTD_CM, + "conn <%pI4,%pI4,%d> not up, retry(%d)\n", + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + cp->cp_reconnect_retry_count); + queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w, + msecs_to_jiffies(cp->cp_reconnect_retry)); + set_bit(RDS_RECONNECT_TIMEDOUT, &cp->cp_reconn_flags); + rds_conn_drop(conn, DR_RECONNECT_TIMEOUT); + } } }