From: Håkon Bugge Date: Thu, 7 Mar 2019 11:39:22 +0000 (+0100) Subject: rds: Fix one-sided connect X-Git-Tag: v4.1.12-124.31.3~114 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=f1e3a7b192da42bb2e2815e5eef2cd1e2c658cb2;p=users%2Fjedix%2Flinux-maple.git rds: Fix one-sided connect The decision to designate a peer to be the active side did not take loopback connections into account. Further, a bug in rds_shutdown_worker where the passive side, in case of no reconnect racing, did not attempt to restart the connection. Orabug: 29391909 Signed-off-by: Håkon Bugge Tested-by: Rosa Lopez Reviewed-by: Dag Moxnes --- v1 -> v2: * Incorporated review comments from Dag * Split the commit in two Signed-off-by: Brian Maly --- diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b3137db27735..766b0e100c12 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -1112,6 +1112,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); rds_conn_drop(conn, DR_IB_REQ_WHILE_CONN_UP); rds_ib_stats_inc(s_ib_listen_closed_stale); + conn->c_reconnect_racing++; } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { unsigned long now = get_seconds(); conn->c_reconnect_racing++; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index d7a68a21b4ac..08372651c274 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -117,13 +117,15 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, /* If the connection is being shut down, bail out * right away. We return 0 so cm_id doesn't get * destroyed prematurely */ - if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) { + if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING || + rds_conn_state(conn) == RDS_CONN_ERROR) { /* Reject incoming connections while we're tearing * down an existing one. */ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) ret = 1; rds_rtd(RDS_RTD_CM, "Bailing, conn %p being shut down, ret: %d\n", conn, ret); + conn->c_reconnect_racing = 1; goto out; } } @@ -202,10 +204,10 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ROUTE_ERROR: - /* IP might have been moved so flush the ARP entry and retry */ - rds_ib_flush_arp_entry(&conn->c_faddr); - if (conn) { + /* IP might have been moved so flush the ARP entry and retry */ + rds_ib_flush_arp_entry(&conn->c_faddr); + rds_rtd_ptr(RDS_RTD_ERR, "ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n", conn, &conn->c_laddr, @@ -313,7 +315,8 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, conn->c_reconnect_racing = 0; /* reset route resolution flag */ conn->c_route = 1; - rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT); + if (!rds_conn_self_loopback_passive(conn)) + rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT); break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: diff --git a/net/rds/threads.c b/net/rds/threads.c index 3acc78146328..db4660dde8c3 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -99,6 +99,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0); queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0); queue_delayed_work(cp->cp_wq, &cp->cp_hb_w, 0); + cancel_delayed_work(&cp->cp_reconn_w); cp->cp_hb_start = 0; cp->cp_connection_start = get_seconds(); @@ -115,6 +116,15 @@ void rds_connect_complete(struct rds_connection *conn) } EXPORT_SYMBOL_GPL(rds_connect_complete); +static bool rds_conn_is_active_peer(struct rds_connection *conn) +{ + bool greater_ip = rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0; + bool self_loopback = rds_conn_self_loopback_passive(conn); + bool passive = greater_ip || self_loopback; + + return !passive; +} + /* * This random exponential backoff is relied on to eventually resolve racing * connects. @@ -137,36 +147,41 @@ void rds_queue_reconnect(struct rds_conn_path *cp) { struct rds_connection *conn = cp->cp_conn; bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP; + bool active = rds_conn_is_active_peer(conn); + uint64_t delay = 0; rds_rtd_ptr(RDS_RTD_CM_EXT, "conn %p:%d <%pI6c,%pI6c,%d> reconnect jiffies %lu\n", - conn, !!conn->c_passive, &conn->c_laddr, &conn->c_faddr, conn->c_tos, + conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos, cp->cp_reconnect_jiffies); /* let peer with smaller addr initiate reconnect, to avoid duels */ if (is_tcp && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0) return; - set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); - if (cp->cp_reconnect_jiffies == 0) { - cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + /* If we're the passive initiator and we're racing, let the + * active peer drive the reconnect + */ + if (!active && cp->cp_reconnect_racing) return; - } - - rds_rtd_ptr(RDS_RTD_CM_EXT, - "delay %lu conn %p <%pI6c,%pI6c,%d>\n", - cp->cp_reconnect_jiffies, conn, &conn->c_laddr, - &conn->c_faddr, conn->c_tos); - if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0) - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0); + if (cp->cp_reconnect_jiffies == 0) + cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; else - queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, - cp->cp_reconnect_jiffies); + delay = cp->cp_reconnect_jiffies; + + if (!active) + delay = rds_sysctl_reconnect_max_jiffies; + rds_rtd_ptr(RDS_RTD_CM_EXT, + "conn %p:%d <%pI6c,%pI6c,%d> delay %llu reconnect jiffies %lu\n", + conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos, + delay, cp->cp_reconnect_jiffies); + + set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); + queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay); cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2, - rds_sysctl_reconnect_max_jiffies); + rds_sysctl_reconnect_max_jiffies); } void rds_connect_worker(struct work_struct *work) @@ -325,6 +340,7 @@ void rds_shutdown_worker(struct work_struct *work) unsigned long now = get_seconds(); bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP; struct rds_connection *conn = cp->cp_conn; + bool restart = true; if ((now - cp->cp_reconnect_start > rds_sysctl_shutdown_trace_start_time) && @@ -340,21 +356,17 @@ void rds_shutdown_worker(struct work_struct *work) /* If racing is detected, the bigger IP backs off and lets the * smaller IP drive the reconnect (one-sided reconnect). */ - if ((rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0 || - rds_conn_self_loopback_passive(conn)) && - cp->cp_reconnect_racing) { - rds_rtd_ptr(RDS_RTD_CM, - "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n", - conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); - rds_conn_shutdown(cp, 0); + if (cp->cp_reconnect_racing) + restart = rds_conn_is_active_peer(conn); + + rds_rtd_ptr(RDS_RTD_CM, + "calling rds_conn_shutdown, conn %p restart: %d racing: %d <%pI6c,%pI6c,%d>\n", + conn, restart, cp->cp_reconnect_racing, + &conn->c_laddr, &conn->c_faddr, conn->c_tos); + rds_conn_shutdown(cp, restart); + if (!restart) queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w, msecs_to_jiffies(RDS_RECONNECT_RETRY_MS)); - } else { - rds_rtd_ptr(RDS_RTD_CM, - "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n", - conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos); - rds_conn_shutdown(cp, 1); - } } void rds_threads_exit(void)