*/
mutex_lock(&conn->c_cm_lock);
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
+ /*
+ * in both of the cases below, the conn is half setup.
+ * we need to make sure the lower layers don't destroy it
+ */
+ ic = conn->c_transport_data;
+ if (ic && ic->i_cm_id == cm_id)
+ destroy = 0;
if (rds_conn_state(conn) == RDS_CONN_UP) {
rdsdebug("incoming connect while connecting\n");
rds_conn_drop(conn);
rds_ib_stats_inc(s_ib_listen_closed_stale);
- } else
- if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
- /* Wait and see - our connect may still be succeeding */
- rds_ib_stats_inc(s_ib_connect_raced);
+ } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
+ unsigned long now = get_seconds();
+
+ /*
+ * after 15 seconds, give up on existing connection
+ * attempts and make them try again. At this point
+ * it's no longer a race but something has gone
+ * horribly wrong
+ */
+ if (now > conn->c_connection_start &&
+ now - conn->c_connection_start > 15) {
+ printk(KERN_CRIT "rds connection racing for 15s, forcing reset "
+ "connection %u.%u.%u.%u->%u.%u.%u.%u\n",
+ NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr));
+ rds_conn_drop(conn);
+ rds_ib_stats_inc(s_ib_listen_closed_stale);
+ } else {
+ /* Wait and see - our connect may still be succeeding */
+ rds_ib_stats_inc(s_ib_connect_raced);
+ }
}
goto out;
}
ic = conn->c_transport_data;
+ /*
+ * record the time we started trying to connect so that we can
+ * drop the connection if it doesn't work out after a while
+ */
+ conn->c_connection_start = get_seconds();
+
rds_ib_set_protocol(conn, version);
rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
set_bit(0, &conn->c_map_queued);
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+ conn->c_connection_start = get_seconds();
}
EXPORT_SYMBOL_GPL(rds_connect_complete);
clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
+ /*
+ * record the time we started trying to connect so that we can
+ * drop the connection if it doesn't work out after a while
+ */
+ conn->c_connection_start = get_seconds();
+
ret = conn->c_trans->conn_connect(conn);
rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
conn, &conn->c_laddr, &conn->c_faddr, ret);