/* If the connection is being shut down, bail out
* right away. We return 0 so cm_id doesn't get
* destroyed prematurely */
- if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
+ if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING ||
+ rds_conn_state(conn) == RDS_CONN_ERROR) {
/* Reject incoming connections while we're tearing
* down an existing one. */
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
ret = 1;
rds_rtd(RDS_RTD_CM, "Bailing, conn %p being shut down, ret: %d\n",
conn, ret);
+ conn->c_reconnect_racing = 1;
goto out;
}
}
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
- /* IP might have been moved so flush the ARP entry and retry */
- rds_ib_flush_arp_entry(&conn->c_faddr);
-
if (conn) {
+ /* IP might have been moved so flush the ARP entry and retry */
+ rds_ib_flush_arp_entry(&conn->c_faddr);
+
rds_rtd_ptr(RDS_RTD_ERR,
"ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
conn->c_reconnect_racing = 0;
/* reset route resolution flag */
conn->c_route = 1;
- rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
+ if (!rds_conn_self_loopback_passive(conn))
+ rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
break;
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
queue_delayed_work(cp->cp_wq, &cp->cp_send_w, 0);
queue_delayed_work(cp->cp_wq, &cp->cp_recv_w, 0);
queue_delayed_work(cp->cp_wq, &cp->cp_hb_w, 0);
+ cancel_delayed_work(&cp->cp_reconn_w);
cp->cp_hb_start = 0;
cp->cp_connection_start = get_seconds();
}
EXPORT_SYMBOL_GPL(rds_connect_complete);
+static bool rds_conn_is_active_peer(struct rds_connection *conn)
+{
+ bool greater_ip = rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0;
+ bool self_loopback = rds_conn_self_loopback_passive(conn);
+ bool passive = greater_ip || self_loopback;
+
+ return !passive;
+}
+
/*
* This random exponential backoff is relied on to eventually resolve racing
* connects.
{
struct rds_connection *conn = cp->cp_conn;
bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
+ bool active = rds_conn_is_active_peer(conn);
+ uint64_t delay = 0;
rds_rtd_ptr(RDS_RTD_CM_EXT,
"conn %p:%d <%pI6c,%pI6c,%d> reconnect jiffies %lu\n",
- conn, !!conn->c_passive, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+ conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
cp->cp_reconnect_jiffies);
/* let peer with smaller addr initiate reconnect, to avoid duels */
if (is_tcp && rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0)
return;
- set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
- if (cp->cp_reconnect_jiffies == 0) {
- cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
- queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+ /* If we're the passive initiator and we're racing, let the
+ * active peer drive the reconnect
+ */
+ if (!active && cp->cp_reconnect_racing)
return;
- }
-
- rds_rtd_ptr(RDS_RTD_CM_EXT,
- "delay %lu conn %p <%pI6c,%pI6c,%d>\n",
- cp->cp_reconnect_jiffies, conn, &conn->c_laddr,
- &conn->c_faddr, conn->c_tos);
- if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) > 0)
- queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+ if (cp->cp_reconnect_jiffies == 0)
+ cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
else
- queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
- cp->cp_reconnect_jiffies);
+ delay = cp->cp_reconnect_jiffies;
+
+ if (!active)
+ delay = rds_sysctl_reconnect_max_jiffies;
+ rds_rtd_ptr(RDS_RTD_CM_EXT,
+ "conn %p:%d <%pI6c,%pI6c,%d> delay %llu reconnect jiffies %lu\n",
+ conn, active, &conn->c_laddr, &conn->c_faddr, conn->c_tos,
+ delay, cp->cp_reconnect_jiffies);
+
+ set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
+ queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, delay);
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
- rds_sysctl_reconnect_max_jiffies);
+ rds_sysctl_reconnect_max_jiffies);
}
void rds_connect_worker(struct work_struct *work)
unsigned long now = get_seconds();
bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP;
struct rds_connection *conn = cp->cp_conn;
+ bool restart = true;
if ((now - cp->cp_reconnect_start >
rds_sysctl_shutdown_trace_start_time) &&
/* If racing is detected, the bigger IP backs off and lets the
* smaller IP drive the reconnect (one-sided reconnect).
*/
- if ((rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0 ||
- rds_conn_self_loopback_passive(conn)) &&
- cp->cp_reconnect_racing) {
- rds_rtd_ptr(RDS_RTD_CM,
- "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n",
- conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
- rds_conn_shutdown(cp, 0);
+ if (cp->cp_reconnect_racing)
+ restart = rds_conn_is_active_peer(conn);
+
+ rds_rtd_ptr(RDS_RTD_CM,
+ "calling rds_conn_shutdown, conn %p restart: %d racing: %d <%pI6c,%pI6c,%d>\n",
+ conn, restart, cp->cp_reconnect_racing,
+ &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ rds_conn_shutdown(cp, restart);
+ if (!restart)
queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
msecs_to_jiffies(RDS_RECONNECT_RETRY_MS));
- } else {
- rds_rtd_ptr(RDS_RTD_CM,
- "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n",
- conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
- rds_conn_shutdown(cp, 1);
- }
}
void rds_threads_exit(void)