}
EXPORT_SYMBOL_GPL(rds_conn_find);
-void rds_conn_shutdown(struct rds_conn_path *cp)
+void rds_conn_shutdown(struct rds_conn_path *cp, int restart)
{
struct rds_connection *conn = cp->cp_conn;
* conn - the reconnect is always triggered by the active peer. */
cancel_delayed_work_sync(&cp->cp_conn_w);
rcu_read_lock();
- if (!hlist_unhashed(&conn->c_hash_node)) {
+ if (!hlist_unhashed(&conn->c_hash_node) && restart) {
rcu_read_unlock();
rds_queue_reconnect(cp);
} else {
rds_ib_stats_inc(s_ib_listen_closed_stale);
} else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
unsigned long now = get_seconds();
+ conn->c_reconnect_racing++;
+ /* When a race is detected, one side should fall back
+ * to passive and let the active side to reconnect.
+ * If the connection is in CONNECTING and still receive
+ * multiple back-to-back REQ, it means something is
+ * horribly wrong. Thus, drop the connection.
+ */
+ if (conn->c_reconnect_racing > 5) {
+ rds_rtd_ptr(RDS_RTD_CM,
+ "RDS/IB: conn <%pI6c,%pI6c,%d> back-to-back REQ, reset\n",
+ &conn->c_laddr, &conn->c_faddr,
+ conn->c_tos);
+ conn->c_reconnect_racing = 0;
+ rds_conn_drop(conn, DR_IB_REQ_WHILE_CONNECTING);
/* After 15 seconds, give up on existing connection
* attempts and make them try again. At this point
* it's no longer a race but something has gone
- * horribly wrong
+ * horribly wrong.
*/
- if (now > conn->c_connection_start &&
+ } else if (now > conn->c_connection_start &&
now - conn->c_connection_start > 15) {
rds_rtd_ptr(RDS_RTD_CM,
"RDS/IB: connection <%pI6c,%pI6c,%d> racing for 15s, forcing reset",
"ROUTE_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
+ conn->c_reconnect_racing = 0;
rds_conn_drop(conn, DR_IB_ROUTE_ERR);
}
break;
"ADDR_ERROR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
+ conn->c_reconnect_racing = 0;
rds_conn_drop(conn, DR_IB_ADDR_ERR);
}
break;
"CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr,
&conn->c_faddr, conn->c_tos);
+ conn->c_reconnect_racing = 0;
rds_conn_drop(conn, DR_IB_CONNECT_ERR);
}
break;
case RDMA_CM_EVENT_REJECTED:
err = (int *)event->param.conn.private_data;
+ if (conn && event->status == RDS_REJ_CONSUMER_DEFINED &&
+ *err <= 1) {
+ conn->c_reconnect_racing++;
+ rds_rtd_ptr(RDS_RTD_ERR,
+ "conn %p, reconnect racing (%d) rds_conn_drop <%pI6c,%pI6c,%d>\n",
+ conn, conn->c_reconnect_racing, &conn->c_laddr,
+ &conn->c_faddr, conn->c_tos);
+ }
+
if (conn) {
if (event->status == RDS_REJ_CONSUMER_DEFINED &&
(*err) == 0) {
"ADDR_CHANGE: calling rds_conn_drop <%pI6c,%pI6c,%d>\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos);
+ conn->c_reconnect_racing = 0;
if (!rds_conn_self_loopback_passive(conn))
rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
}
rds_rtd_ptr(RDS_RTD_CM,
"DISCONNECT event - dropping connection %pI6c->%pI6c tos %d\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ conn->c_reconnect_racing = 0;
rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
break;
#include <rdma/rdma_cm.h>
#include "rds.h"
-#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
+#define RDS_RDMA_RESOLVE_TIMEOUT_MS RDS_RECONNECT_RETRY_MS
/* Per IB specification 7.7.3, service level is a 4-bit field. */
#define TOS_TO_SL(tos) ((tos) & 0xF)
#define KERNEL_HAS_ATOMIC64
#endif
+#define RDS_RECONNECT_RETRY_MS 15000
+
#ifdef RDS_DEBUG
#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
#else
wait_queue_head_t c_hs_waitq; /* handshake waitq */
- /* used by RDS_CONN_RESET */
struct list_head c_laddr_node;
u32 c_my_gen_num;
struct in6_addr *faddr,
struct rds_transport *trans, u8 tos,
int dev_if);
-void rds_conn_shutdown(struct rds_conn_path *cp);
+void rds_conn_shutdown(struct rds_conn_path *cp, int restart);
void rds_conn_destroy(struct rds_connection *conn, int shutdown);
void rds_conn_reset(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn, int reason);
rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
- queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
- rand % cp->cp_reconnect_jiffies);
+ if (rds_addr_cmp(&conn->c_laddr, &conn->c_faddr))
+ queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+ else
+ queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
+ msecs_to_jiffies(100));
cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,
rds_sysctl_reconnect_max_jiffies);
"conn <%pI6c,%pI6c,%d> not up, retry(%d)\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
cp->cp_reconnect_retry_count);
+ cp->cp_reconnect_racing = 0;
rds_conn_path_drop(cp, DR_RECONNECT_TIMEOUT);
}
}
conn->c_tos,
conn_drop_reason_str(cp->cp_drop_source));
- rds_conn_shutdown(cp);
+ /* if racing is detected, lower IP backs off and let the higher IP
+ * drives the reconnect (one-sided reconnect)
+ */
+ if ((rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) ||
+ rds_conn_self_loopback_passive(conn)) &&
+ cp->cp_reconnect_racing) {
+ rds_rtd_ptr(RDS_RTD_CM,
+ "calling rds_conn_shutdown, conn %p:0 <%pI6c,%pI6c,%d>\n",
+ conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ rds_conn_shutdown(cp, 0);
+ queue_delayed_work(cp->cp_wq, &cp->cp_reconn_w,
+ msecs_to_jiffies(RDS_RECONNECT_RETRY_MS));
+ } else {
+ rds_rtd_ptr(RDS_RTD_CM,
+ "calling rds_conn_shutdown, conn %p:1 <%pI6c,%pI6c,%d>\n",
+ conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ rds_conn_shutdown(cp, 1);
+ }
}
void rds_threads_exit(void)