"<%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
NIPQUAD(reset.src.s_addr),
NIPQUAD(reset.dst.s_addr), conn->c_tos);
+ conn->c_drop_source = 1;
rds_conn_drop(conn);
}
mutex_lock(&conn->c_cm_lock);
if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
&& !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
+ conn->c_drop_source = 2;
rds_conn_error(conn, "shutdown called in state %d\n",
atomic_read(&conn->c_state));
mutex_unlock(&conn->c_cm_lock);
* Quite reproduceable with loopback connections.
* Mostly harmless.
*/
+ conn->c_drop_source = 3;
rds_conn_error(conn,
"%s: failed to transition to state DOWN, "
"current state is %d\n",
synchronize_rcu();
/* shut the connection down */
+ conn->c_drop_source = 4;
rds_conn_drop(conn);
flush_work(&conn->c_down_w);
rds_conn_message_info_retrans);
}
+char *conn_drop_reason_str(u8 reason)
+{
+ /* Here is distribution of drop reason:
+ *
+ * 0-19: rds-core
+ *
+ * 20-119: IB
+ * 20-39: ib_cm
+ * 40-59: event handling
+ * 60-79: data path
+ * 80-119: special features like active bonding
+ *
+ * 120-139: iWARP
+ *
+ * 140-159: TCP
+ *
+ * 160-255: any other future additions
+ *
+ */
+ switch (reason) {
+ case 1: return "user reset";
+ case 2: return "invalid connection state";
+ case 3: return "failure to move to DOWN state";
+ case 4: return "connection destroy";
+ case 5: return "zero lane went down";
+ case 6: return "conn_connect failure";
+ case 7: return "hb timeout";
+ case 8: return "reconnect timeout";
+
+ case 20: return "race between ESTABLISHED event and drop";
+ case 21: return "conn is not in CONNECTING state";
+ case 22: return "qp event";
+ case 23: return "base conn down";
+ case 24: return "incoming REQ in CONN_UP state";
+ case 25: return "incoming REQ in CONNECTING state";
+ case 26: return "setup_qp failure";
+ case 27: return "rdma_accept failure";
+ case 28: return "setup_qp failure";
+ case 29: return "rdma_connect failure";
+
+ case 40: return "rdma_set_ib_paths failure";
+ case 41: return "resolve_route failure";
+ case 42: return "detected rdma_cm_id mismatch";
+ case 43: return "ROUTE_ERROR event";
+ case 44: return "ADDR_ERROR event";
+ case 45: return "CONNECT_ERROR or UNREACHABLE or DEVICE_REMOVE event";
+ case 46: return "CONSUMER_DEFINED reject";
+ case 47: return "REJECTED event";
+ case 48: return "ADDR_CHANGE event";
+ case 49: return "DISCONNECTED event";
+ case 50: return "TIMEWAIT_EXIT event";
+
+ case 60: return "post_recv failure";
+ case 61: return "send_ack failure";
+ case 62: return "no header in incoming msg";
+ case 63: return "corrupted header in incoming msg";
+ case 64: return "fragment header mismatch";
+ case 65: return "recv completion error";
+ case 66: return "send completion error";
+ case 67: return "post_send failure";
+
+ case 80: return "rds_rdma module unload";
+ case 81: return "active bonding failover";
+ case 82: return "corresponding loopback conn drop";
+ case 83: return "active bonding failback";
+
+ case 120: return "qp_event";
+ case 121: return "incoming REQ in connecting state";
+ case 122: return "setup_qp failure";
+ case 123: return "rdma_accept failure";
+ case 124: return "setup_qp failure";
+ case 125: return "rdma_connect failure";
+
+ case 130: return "post_recv failure";
+ case 131: return "send_ack failure";
+ case 132: return "no header in incoming msg";
+ case 133: return "corrupted header in incoming msg";
+ case 134: return "fragment header mismatch";
+ case 135: return "recv completion error";
+ case 136: return "send completion error";
+
+ case 140: return "sk_state to TCP_CLOSE";
+ case 141: return "tcp_send failure";
+
+ default: return "unknown reason";
+ }
+}
+
static void rds_conn_probe_lanes(struct rds_connection *conn)
{
struct hlist_head *head =
NIPQUAD(tmp->c_faddr),
tmp->c_tos);
+ conn->c_drop_source = 5;
rds_conn_drop(tmp);
}
}
conn->c_reconnect_err = 0;
conn->c_reconnect_racing = 0;
printk(KERN_INFO "RDS/IB: connection "
- "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped\n",
+ "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped due to '%s'\n",
NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr),
- conn->c_tos);
+ conn->c_tos,
+ conn_drop_reason_str(conn->c_drop_source));
if (conn->c_tos == 0)
rds_conn_probe_lanes(conn);
"calling rds_conn_drop to drop all connections.\n");
spin_lock_irqsave(&rds_ibdev->spinlock, flags);
- list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
+ list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node) {
+ ic->conn->c_drop_source = 80;
rds_conn_drop(ic->conn);
+ }
spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
}
rds_rtd(RDS_RTD_CM_EXT,
"conn: %p, calling rds_conn_drop\n", conn);
+ conn->c_drop_source = 81;
rds_conn_drop(conn);
kfree(work);
ic->conn->c_faddr &&
ic2->conn->c_faddr ==
ic->conn->c_laddr) {
- rds_rtd(RDS_RTD_CM_EXT_P,
- "conn:%p, tos %d, calling rds_conn_drop\n",
- ic2->conn,
- ic2->conn->c_tos);
+ rds_rtd(RDS_RTD_CM_EXT_P,
+ "conn:%p, tos %d, calling rds_conn_drop\n",
+ ic2->conn,
+ ic2->conn->c_tos);
+ ic2->conn->c_drop_source = 82;
rds_conn_drop(ic2->conn);
}
}
rds_rtd(RDS_RTD_CM_EXT,
"conn: %p, tos %d, calling rds_conn_drop\n",
ic->conn, ic->conn->c_tos);
+ ic->conn->c_drop_source = 83;
rds_conn_drop(ic->conn);
}
}
rds_rtd(RDS_RTD_CM,
"ic->i_cm_id is NULL, ic: %p, calling rds_conn_drop\n",
ic);
+ conn->c_drop_source = 20;
rds_conn_drop(conn);
return;
}
rds_rtd(RDS_RTD_CM,
"conn is in connecting state, conn: %p, calling rds_conn_drop\n",
conn);
+ conn->c_drop_source = 21;
rds_conn_drop(conn);
return;
}
"Fatal QP Event %u (%s) - connection %pI4->%pI4 tos %d, reconnecting\n",
event->event, rds_ib_event_str(event->event),
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ conn->c_drop_source = 22;
rds_conn_drop(conn);
break;
}
NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 23;
rds_conn_drop(conn);
}
if (rds_conn_state(conn) == RDS_CONN_UP) {
rds_rtd(RDS_RTD_CM_EXT_P,
"incoming connect while connecting\n");
+ conn->c_drop_source = 24;
rds_conn_drop(conn);
rds_ib_stats_inc(s_ib_listen_closed_stale);
} else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 25;
rds_conn_drop(conn);
rds_ib_stats_inc(s_ib_listen_closed_stale);
} else {
err = rds_ib_setup_qp(conn);
if (err) {
+ conn->c_drop_source = 26;
rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
goto out;
}
#endif
/* rdma_accept() calls rdma_reject() internally if it fails */
err = rdma_accept(cm_id, &conn_param);
- if (err)
+ if (err) {
+ conn->c_drop_source = 27;
rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
+ }
#if RDMA_RDS_APM_SUPPORTED
else if (rds_ib_apm_enabled && !conn->c_loopback) {
err = rdma_enable_apm(cm_id, RDMA_ALT_PATH_BEST);
ret = rds_ib_setup_qp(conn);
if (ret) {
+ conn->c_drop_source = 28;
rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
goto out;
}
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
conn->c_proposed_version, UINT_MAX, UINT_MAX);
ret = rdma_connect(cm_id, &conn_param);
- if (ret)
+ if (ret) {
+ conn->c_drop_source = 29;
rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
+ }
out:
/* Beware - returning non-zero tells the rdma_cm to destroy
recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
(long) sg_dma_address(&recv->r_frag->f_sg), ret);
if (ret) {
+ conn->c_drop_source = 60;
rds_ib_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
"reconnecting\n", &conn->c_faddr,
rds_ib_stats_inc(s_ib_ack_send_failure);
+ ic->conn->c_drop_source = 61;
rds_ib_conn_error(ic->conn, "sending ack failed\n");
} else
rds_ib_stats_inc(s_ib_ack_sent);
data_len);
if (data_len < sizeof(struct rds_header)) {
+ conn->c_drop_source = 62;
rds_ib_conn_error(conn, "incoming message "
"from %pI4 didn't inclue a "
"header, disconnecting and "
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
+ conn->c_drop_source = 63;
rds_ib_conn_error(conn, "incoming message "
"from %pI4 has corrupted header - "
"forcing a reconnect\n",
|| hdr->h_len != ihdr->h_len
|| hdr->h_sport != ihdr->h_sport
|| hdr->h_dport != ihdr->h_dport) {
+ conn->c_drop_source = 64;
rds_ib_conn_error(conn,
"fragment header mismatch; forcing reconnect\n");
return;
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
+ conn->c_drop_source = 65;
rds_ib_conn_error(conn, "recv completion "
"<%pI4,%pI4,%d> had "
"status %u, disconnecting and "
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+ conn->c_drop_source = 66;
rds_ib_conn_error(conn,
"send completion <%u.%u.%u.%u,%u.%u.%u.%u,%d> status "
"%u vendor_err %u, disconnecting and reconnecting\n",
prev->s_op = NULL;
}
+ ic->conn->c_drop_source = 67;
rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
goto out;
}
"- connection %pI4->%pI4, reconnecting\n",
event->event, &conn->c_laddr,
&conn->c_faddr);
+ conn->c_drop_source = 120;
rds_conn_drop(conn);
break;
}
if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
if (rds_conn_state(conn) == RDS_CONN_UP) {
rdsdebug("incoming connect while connecting\n");
+ conn->c_drop_source = 121;
rds_conn_drop(conn);
rds_iw_stats_inc(s_iw_listen_closed_stale);
} else
err = rds_iw_setup_qp(conn);
if (err) {
+ conn->c_drop_source = 122;
rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
goto out;
}
err = rdma_accept(cm_id, &conn_param);
mutex_unlock(&conn->c_cm_lock);
if (err) {
+ conn->c_drop_source = 123;
rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
goto out;
}
ret = rds_iw_setup_qp(conn);
if (ret) {
+ conn->c_drop_source = 124;
rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
goto out;
}
rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
ret = rdma_connect(cm_id, &conn_param);
- if (ret)
+ if (ret) {
+ conn->c_drop_source = 125;
rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
-
+ }
out:
/* Beware - returning non-zero tells the rdma_cm to destroy
* the cm_id. We should certainly not do it as long as we still
recv->r_iwinc, recv->r_frag->f_page,
(long) recv->r_frag->f_mapped, ret);
if (ret) {
+ conn->c_drop_source = 130;
rds_iw_conn_error(conn, "recv post on "
"%pI4 returned %d, disconnecting and "
"reconnecting\n", &conn->c_faddr,
rds_iw_stats_inc(s_iw_ack_send_failure);
+ ic->conn->c_drop_source = 131;
rds_iw_conn_error(ic->conn, "sending ack failed\n");
} else
rds_iw_stats_inc(s_iw_ack_sent);
byte_len);
if (byte_len < sizeof(struct rds_header)) {
+ conn->c_drop_source = 132;
rds_iw_conn_error(conn, "incoming message "
"from %pI4 didn't inclue a "
"header, disconnecting and "
/* Validate the checksum. */
if (!rds_message_verify_checksum(ihdr)) {
+ conn->c_drop_source = 133;
rds_iw_conn_error(conn, "incoming message "
"from %pI4 has corrupted header - "
"forcing a reconnect\n",
|| hdr->h_len != ihdr->h_len
|| hdr->h_sport != ihdr->h_sport
|| hdr->h_dport != ihdr->h_dport) {
+ conn->c_drop_source = 134;
rds_iw_conn_error(conn,
"fragment header mismatch; forcing reconnect\n");
return;
if (wc.status == IB_WC_SUCCESS) {
rds_iw_process_recv(conn, recv, wc.byte_len, state);
} else {
+ conn->c_drop_source = 135;
rds_iw_conn_error(conn, "recv completion on "
"%pI4 had status %u, disconnecting and "
"reconnecting\n", &conn->c_faddr,
/* We expect errors as the qp is drained during shutdown */
if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+ conn->c_drop_source = 136;
rds_iw_conn_error(conn,
"send completion on %pI4 "
"had status %u, disconnecting and reconnecting\n",
"ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
ret, NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr), conn->c_tos);
+ conn->c_drop_source = 40;
rds_conn_drop(conn);
ret = 0;
}
ibic = conn->c_transport_data;
if (ibic && ibic->i_cm_id == cm_id)
ibic->i_cm_id = NULL;
+ conn->c_drop_source = 41;
rds_conn_drop(conn);
}
} else if (conn->c_to_index < (RDS_RDMA_RESOLVE_TO_MAX_INDEX-1))
"ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr), conn->c_tos);
+ conn->c_drop_source = 42;
rds_conn_drop(conn);
}
}
"ROUTE_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr), conn->c_tos);
+ conn->c_drop_source = 43;
rds_conn_drop(conn);
}
break;
"ADDR_ERROR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr), conn->c_tos);
+ conn->c_drop_source = 44;
rds_conn_drop(conn);
}
break;
"CONN/UNREACHABLE/RMVAL ERR: conn %p, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr), conn->c_tos);
+ conn->c_drop_source = 45;
rds_conn_drop(conn);
}
break;
if (!conn->c_tos) {
conn->c_proposed_version =
RDS_PROTOCOL_COMPAT_VERSION;
+ conn->c_drop_source = 46;
rds_conn_drop(conn);
} else {
if (conn->c_loopback)
NIPQUAD(conn->c_laddr),
NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 47;
rds_conn_drop(conn);
}
}
"ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 48;
rds_conn_drop(conn);
}
#else
"ADDR_CHANGE: calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 48;
rds_conn_drop(conn);
}
#endif
rds_rtd(RDS_RTD_CM,
"DISCONNECT event - dropping connection %pI4->%pI4 tos %d\n",
&conn->c_laddr, &conn->c_faddr, conn->c_tos);
+ conn->c_drop_source = 49;
rds_conn_drop(conn);
break;
"dropping connection "
"%pI4->%pI4\n", &conn->c_laddr,
&conn->c_faddr);
+ conn->c_drop_source = 50;
rds_conn_drop(conn);
} else
printk(KERN_INFO "TIMEWAIT_EXIT event - conn=NULL\n");
unsigned int c_reconnect_racing;
unsigned int c_route_resolved;
+ u8 c_drop_source;
};
static inline
struct rds_info_lengths *lens,
int (*visitor)(struct rds_connection *, void *),
size_t item_len);
+char *conn_drop_reason_str(u8 reason);
void __rds_conn_error(struct rds_connection *conn, const char *, ...)
__attribute__ ((format (printf, 2, 3)));
#define rds_conn_error(conn, fmt...) \
break;
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
+ conn->c_drop_source = 140;
rds_conn_drop(conn);
default:
break;
printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u "
"returned %d, disconnecting and reconnecting\n",
NIPQUAD(conn->c_faddr), ret);
+ conn->c_drop_source = 141;
rds_conn_drop(conn);
}
}
* drop the connection if it doesn't work out after a while
*/
conn->c_connection_start = get_seconds();
+ conn->c_drop_source = 0;
ret = conn->c_trans->conn_connect(conn);
rds_rtd(RDS_RTD_CM_EXT,
rds_rtd(RDS_RTD_CM_EXT,
"reconnecting..., conn %p\n", conn);
rds_queue_reconnect(conn);
- } else
+ } else {
+ conn->c_drop_source = 6;
rds_conn_error(conn, "RDS: connect failed\n");
+ }
}
} else {
rds_rtd(RDS_RTD_CM,
"RDS/IB: connection <%u.%u.%u.%u,%u.%u.%u.%u,%d> timed out (0x%lx,0x%lx)..discon and recon\n",
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos, conn->c_hb_start, now);
- rds_conn_drop(conn);
+ conn->c_drop_source = 7;
+ rds_conn_drop(conn);
return;
}
queue_delayed_work(rds_wq, &conn->c_hb_w, HZ);
"conn not up, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
conn->c_tos);
+ conn->c_drop_source = 8;
rds_conn_drop(conn);
conn->c_reconnect_racing = 0;
}