[DR_INV_CONN_STATE] = "invalid connection state",
[DR_DOWN_TRANSITION_FAIL] = "failure to move to DOWN state",
[DR_CONN_DESTROY] = "connection destroy",
- [DR_ZERO_LANE_DOWN] = "zero lane went down",
[DR_CONN_CONNECT_FAIL] = "conn_connect failure",
[DR_HB_TIMEOUT] = "hb timeout",
[DR_RECONNECT_TIMEOUT] = "reconnect timeout",
[DR_IB_RDMA_ACCEPT_FAIL] = "rdma_accept failure",
[DR_IB_ACT_SETUP_QP_FAIL] = "active setup_qp failure",
[DR_IB_RDMA_CONNECT_FAIL] = "rdma_connect failure",
- [DR_IB_SET_IB_PATH_FAIL] = "rdma_set_ib_paths failure",
[DR_IB_RESOLVE_ROUTE_FAIL] = "resolve_route failure",
[DR_IB_RDMA_CM_ID_MISMATCH] = "detected rdma_cm_id mismatch",
[DR_IB_ROUTE_ERR] = "ROUTE_ERROR event",
ARRAY_SIZE(conn_drop_reasons), reason);
}
-static void rds_conn_probe_lanes(struct rds_connection *conn)
-{
- struct hlist_head *head =
- rds_conn_bucket(conn->c_laddr, conn->c_faddr);
- struct rds_connection *tmp;
-
- /* XXX only do this for IB transport? */
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp, head, c_hash_node) {
- if (tmp->c_faddr == conn->c_faddr &&
- tmp->c_laddr == conn->c_laddr &&
- tmp->c_tos != 0 &&
- tmp->c_trans == conn->c_trans) {
- if (rds_conn_up(tmp))
- rds_send_hb(tmp, 0);
- else if (rds_conn_connecting(tmp) &&
- (tmp->c_path[0].cp_route_resolved == 0)) {
- printk(KERN_INFO "RDS/IB: connection "
- "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
- "connecting, force reset ",
- NIPQUAD(tmp->c_laddr),
- NIPQUAD(tmp->c_faddr),
- tmp->c_tos);
-
- rds_conn_drop(tmp, DR_ZERO_LANE_DOWN);
- }
- }
- }
- rcu_read_unlock();
-}
-
/*
* Force a disconnect
*/
conn->c_tos,
conn_drop_reason_str(cp->cp_drop_source));
- if (conn->c_tos == 0)
- rds_conn_probe_lanes(conn);
-
} else if ((cp->cp_reconnect_warn) &&
(now - cp->cp_reconnect_start > 60)) {
printk(KERN_INFO "RDS/%s: re-connect "
cp->cp_reconnect_drops,
cp->cp_reconnect_err);
cp->cp_reconnect_warn = 0;
-
- if (conn->c_tos == 0)
- rds_conn_probe_lanes(conn);
}
cp->cp_reconnect_drops++;
* while we're executing. */
if (conn) {
mutex_lock(&conn->c_cm_lock);
-
/* If the connection is being shut down, bail out
* right away. We return 0 so cm_id doesn't get
* destroyed prematurely */
case RDMA_CM_EVENT_ADDR_RESOLVED:
rdma_set_service_type(cm_id, conn->c_tos);
- if (conn->c_tos && conn->c_reconnect) {
- struct rds_ib_connection *base_ic =
- conn->c_base_conn->c_transport_data;
-
- mutex_lock(&conn->c_base_conn->c_cm_lock);
- if (rds_conn_transition(conn->c_base_conn, RDS_CONN_UP,
- RDS_CONN_UP)) {
- ret = rdma_set_ib_paths(cm_id,
- base_ic->i_cm_id->route.path_rec,
- base_ic->i_cm_id->route.num_paths);
- if (!ret) {
- struct rds_ib_connection *ic =
- conn->c_transport_data;
-
- cm_id->route.path_rec[0].sl =
- ic->i_sl;
- cm_id->route.path_rec[0].qos_class =
- conn->c_tos;
- ret = trans->cm_initiate_connect(cm_id);
- }
- } else {
- ret = 1;
- }
- mutex_unlock(&conn->c_base_conn->c_cm_lock);
-
- if (ret) {
- rds_rtd(RDS_RTD_CM,
- "ADDR_RESOLVED: ret %d, calling rds_conn_drop <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
- ret, NIPQUAD(conn->c_laddr),
- NIPQUAD(conn->c_faddr), conn->c_tos);
- rds_conn_drop(conn, DR_IB_SET_IB_PATH_FAIL);
- ret = 0;
- }
-
- break;
- }
-
-
/* XXX do we need to clean up if this fails? */
ret = rdma_resolve_route(cm_id,
rds_rdma_resolve_to_ms[conn->c_to_index]);
struct rds_ib_connection *ibic;
ibic = conn->c_transport_data;
- if (ibic && ibic->i_cm_id == cm_id)
+ if (ibic && ibic->i_cm_id == cm_id) {
+ /* ibacm caches the path record without considering the tos/sl.
+ * It is considered a match if the <src,dest> matches the
+ * cache. In order to create qp with the correct sl/vl, RDS
+ * needs to update the sl manually. As for now, RDS is assuming
+ * that it is a 1:1 in tos to sl mapping.
+ */
+ cm_id->route.path_rec[0].sl = conn->c_tos;
+ cm_id->route.path_rec[0].qos_class = conn->c_tos;
ret = trans->cm_initiate_connect(cm_id);
- else {
+ } else {
rds_rtd(RDS_RTD_CM,
"ROUTE_RESOLVED: calling rds_conn_drop, conn %p <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
conn, NIPQUAD(conn->c_laddr),