module_param(rds_ib_retry_count, int, 0444);
MODULE_PARM_DESC(rds_ib_retry_count, "UNUSED, set param in rds_rdma instead");
-static int rds_qos_enabled = 1;
-module_param(rds_qos_enabled, int, 0444);
-MODULE_PARM_DESC(rds_qos_enabled, "Set to enable QoS");
-
static char *rds_qos_threshold = NULL;
module_param(rds_qos_threshold, charp, 0444);
MODULE_PARM_DESC(rds_qos_threshold, "<tos>:<max_msg_size>[,<tos>:<max_msg_size>]*");
switch (cmd) {
case SIOCRDSSETTOS:
- if (!rds_qos_enabled)
- return -EOPNOTSUPP;
-
if (get_user(tos, (rds_tos_t __user *)arg))
return -EFAULT;
conn->c_reconnect_warn = 1;
conn->c_reconnect_drops = 0;
conn->c_reconnect_err = 0;
+ conn->c_proposed_version = RDS_PROTOCOL_VERSION;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
#define RDS_IB_DEFAULT_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */
-#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000007 /* minor versions supported */
+#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
#define RDS_IB_RECYCLE_BATCH_COUNT 32
u8 dp_protocol_major;
u8 dp_protocol_minor;
__be16 dp_protocol_minor_mask; /* bitmask */
- __be32 dp_reserved1;
+ __be32 dp_tos;
__be64 dp_ack_seq;
__be32 dp_credit; /* non-zero enables flow ctl */
- u8 dp_tos;
};
struct rds_ib_send_work {
}
}
- if (conn->c_version < RDS_PROTOCOL(3, 2)) {
- if (conn->c_version == RDS_PROTOCOL(3, 1)) {
- if (conn->c_tos) {
- printk(KERN_NOTICE "RDS: Connection to"
- " %u.%u.%u.%u version %u.%u Tos %d"
- " failed, not supporting QoS\n",
- NIPQUAD(conn->c_faddr),
- RDS_PROTOCOL_MAJOR(conn->c_version),
- RDS_PROTOCOL_MINOR(conn->c_version),
- conn->c_tos);
- rds_conn_drop(conn);
- return;
- }
- } else {
+ if (conn->c_version < RDS_PROTOCOL_VERSION) {
+ if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
/*
* BUG: destroying connection here can deadlock with
* the CM event handler on the c_cm_lock.
RDS_PROTOCOL_MINOR(conn->c_version));
rds_conn_destroy(conn);
return;
+ } else {
+ conn->c_proposed_version = RDS_PROTOCOL_VERSION;
}
}
dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
dp->dp_ack_seq = rds_ib_piggyb_ack(ic);
- dp->dp_tos = conn->c_tos;
+ dp->dp_tos = cpu_to_be32(conn->c_tos);
/* Advertise flow control */
if (ic->i_flowctl) {
/* Even if len is crap *now* I still want to check it. -ASG */
if (event->param.conn.private_data_len < sizeof(*dp)
|| dp->dp_protocol_major == 0)
- return RDS_PROTOCOL_3_0;
+ return RDS_PROTOCOL_4_0;
common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
- if (dp->dp_protocol_major == 3 && common) {
- version = RDS_PROTOCOL_3_0;
+ if (dp->dp_protocol_major == 4 && common) {
+ version = RDS_PROTOCOL_4_0;
while ((common >>= 1) != 0)
version++;
+ } else if (RDS_PROTOCOL_COMPAT_VERSION ==
+ RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)) {
+ version = RDS_PROTOCOL_COMPAT_VERSION;
} else if (printk_ratelimit()) {
printk(KERN_NOTICE "RDS: Connection from %pI4 using "
"incompatible protocol version %u.%u\n",
(unsigned long long)be64_to_cpu(fguid));
conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
- dp->dp_tos, GFP_KERNEL);
+ be32_to_cpu(dp->dp_tos), GFP_KERNEL);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
if (conn)
mutex_unlock(&conn->c_cm_lock);
if (err)
- rdma_reject(cm_id, NULL, 0);
+ rdma_reject(cm_id, &err, sizeof(int));
return destroy;
}
/* If the peer doesn't do protocol negotiation, we must
* default to RDSv3.0 */
- rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
+ rds_ib_set_protocol(conn, RDS_PROTOCOL_4_0);
ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
ret = rds_ib_setup_qp(conn);
goto out;
}
- rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
- UINT_MAX, UINT_MAX);
+ rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
+ conn->c_proposed_version, UINT_MAX, UINT_MAX);
ret = rdma_connect(cm_id, &conn_param);
if (ret)
rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
#include <net/sock.h>
#include <net/inet_common.h>
+#define RDS_IB_REJ_CONSUMER_DEFINED 28
+
static struct rdma_cm_id *rds_iw_listen_id;
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct arpreq *r;
struct sockaddr_in *sin;
int ret = 0;
+ int *err;
rdsdebug("conn %p id %p handling event %u\n", conn, cm_id,
event->event);
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
if (conn)
rds_conn_drop(conn);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ err = (int *)event->param.conn.private_data;
+ if (conn) {
+ if ((*err) == 0 &&
+ event->status == RDS_IB_REJ_CONSUMER_DEFINED) {
+ /* rejection from 3.x protocol */
+ if (!conn->c_tos) {
+ /* retry the connect with a
+ * lower compatible protocol */
+ conn->c_proposed_version =
+ RDS_PROTOCOL_COMPAT_VERSION;
+ rds_conn_drop(conn);
+ }
+ } else
+ rds_conn_drop(conn);
+ }
+ break;
+
case RDMA_CM_EVENT_ADDR_CHANGE:
#if RDMA_RDS_APM_SUPPORTED
if (conn && !rds_ib_apm_enabled)
*/
#define RDS_PROTOCOL_3_0 0x0300
#define RDS_PROTOCOL_3_1 0x0301
-#define RDS_PROTOCOL_3_2 0x0302
-#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_2
+#define RDS_PROTOCOL_4_0 0x0400
+#define RDS_PROTOCOL_4_1 0x0401
+#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1
+#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_4_1
#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
unsigned int c_unacked_bytes;
/* Protocol version */
+ unsigned int c_proposed_version;
unsigned int c_version;
/* Re-connect stall diagnostics */
unsigned long c_hb_start;
- unsigned int c_active_side;
+ struct rds_connection *c_base_conn;
};
#define RDS_FLAG_CONG_BITMAP 0x01
rds_message_addref(rm);
spin_lock(&conn->c_lock);
+ if (conn->c_pending_flush) {
+ spin_unlock(&conn->c_lock);
+ spin_unlock_irqrestore(&rs->rs_lock, flags);
+ goto out;
+ }
rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
- /* This can race with rds_send_reset. If an async op sneaked
- * in after resetting the send state, flush it too.
- */
- if (conn->c_pending_flush) {
- if (rm->rdma.op_active) {
- if (rm->rdma.op_notifier) {
- rm->rdma.op_notifier->n_conn = conn;
- conn->c_pending_flush++;
- }
- set_bit(RDS_MSG_FLUSH, &rm->m_flags);
- }
- if (rm->data.op_active && rm->data.op_async) {
- if (rm->data.op_notifier) {
- rm->data.op_notifier->n_conn = conn;
- conn->c_pending_flush++;
- }
- set_bit(RDS_MSG_FLUSH, &rm->m_flags);
- }
- }
-
spin_unlock(&conn->c_lock);
rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
return ret;
}
-struct user_hdr {
- u32 seq;
- u8 op;
-};
-
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
{
struct sock *sk = sock->sk;
ret = PTR_ERR(conn);
goto out;
}
+
+ if (rs->rs_tos && !conn->c_base_conn) {
+ conn->c_base_conn = rds_conn_create_outgoing(
+ rs->rs_bound_addr, daddr,
+ rs->rs_transport, 0,
+ sock->sk->sk_allocation);
+ if (IS_ERR(conn->c_base_conn)) {
+ ret = PTR_ERR(conn->c_base_conn);
+ goto out;
+ }
+ rds_conn_connect_if_down(conn->c_base_conn);
+ }
rs->rs_conn = conn;
}
- /*
- if (allocated_mr && conn->c_cleanup_stale_mrs) {
- rds_rdma_cleanup_stale_mrs(rs, conn);
- conn->c_cleanup_stale_mrs = 0;
+ if (conn->c_tos && !rds_conn_up(conn)) {
+ if (!rds_conn_up(conn->c_base_conn)) {
+ ret = -EAGAIN;
+ goto out;
+ } else if (conn->c_base_conn->c_version ==
+ RDS_PROTOCOL_COMPAT_VERSION) {
+ conn = conn->c_base_conn;
+ }
}
- */
/* Not accepting new sends until all the failed ops have been reaped */
if (rds_async_send_enabled && conn->c_pending_flush) {
ret = -EAGAIN;
goto out;
}
+ if (conn->c_pending_flush) {
+ ret = -EAGAIN;
+ goto out;
+ }
timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
rds_send_queue_rm(rs, conn, rm,
if (ret) {
if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) {
- if (conn->c_reconnect && conn->c_active_side)
- rds_queue_reconnect(conn);
+ rds_queue_reconnect(conn);
} else
rds_conn_error(conn, "RDS: connect failed\n");
}
-
- if (!conn->c_reconnect)
- conn->c_active_side = 1;
}
}
struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
rds_conn_shutdown(conn);
-
- if (!conn->c_reconnect)
- conn->c_active_side = 0;
}
void rds_threads_exit(void)