From: Saeed Mahameed Date: Thu, 31 Jan 2013 08:37:19 +0000 (+0200) Subject: RDS: RDS rolling upgrade X-Git-Tag: v4.1.12-92~293^2^2~79 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=aba1da6784a54887bf5de9fed19f1e1ae818d41c;p=users%2Fjedix%2Flinux-maple.git RDS: RDS rolling upgrade Changes to support rolling upgrade from RDS protocol version 3.1 to 4.1 Signed-off-by: Bang Nguyen (cherry picked from commit 6788b32aeb00a1ac4b3815680c029911c431031a) --- diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index aa7a6f687dd6..48406609889c 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -46,10 +46,6 @@ static unsigned int rds_ib_retry_count = 0xdead; module_param(rds_ib_retry_count, int, 0444); MODULE_PARM_DESC(rds_ib_retry_count, "UNUSED, set param in rds_rdma instead"); -static int rds_qos_enabled = 1; -module_param(rds_qos_enabled, int, 0444); -MODULE_PARM_DESC(rds_qos_enabled, "Set to enable QoS"); - static char *rds_qos_threshold = NULL; module_param(rds_qos_threshold, charp, 0444); MODULE_PARM_DESC(rds_qos_threshold, ":[,:]*"); @@ -220,9 +216,6 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) switch (cmd) { case SIOCRDSSETTOS: - if (!rds_qos_enabled) - return -EOPNOTSUPP; - if (get_user(tos, (rds_tos_t __user *)arg)) return -EFAULT; diff --git a/net/rds/connection.c b/net/rds/connection.c index b0ed223b97cb..e21fc5f0a9dc 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -208,6 +208,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, conn->c_reconnect_warn = 1; conn->c_reconnect_drops = 0; conn->c_reconnect_err = 0; + conn->c_proposed_version = RDS_PROTOCOL_VERSION; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); diff --git a/net/rds/ib.h b/net/rds/ib.h index 44092f905beb..886ac0699c05 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -28,7 +28,7 @@ #define RDS_IB_DEFAULT_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */ -#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000007 /* minor versions supported */ +#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ #define RDS_IB_RECYCLE_BATCH_COUNT 32 @@ -83,10 +83,9 @@ struct rds_ib_connect_private { u8 dp_protocol_major; u8 dp_protocol_minor; __be16 dp_protocol_minor_mask; /* bitmask */ - __be32 dp_reserved1; + __be32 dp_tos; __be64 dp_ack_seq; __be32 dp_credit; /* non-zero enables flow ctl */ - u8 dp_tos; }; struct rds_ib_send_work { diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b140d671e1b8..bce6a638cccf 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -144,20 +144,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3, 2)) { - if (conn->c_version == RDS_PROTOCOL(3, 1)) { - if (conn->c_tos) { - printk(KERN_NOTICE "RDS: Connection to" - " %u.%u.%u.%u version %u.%u Tos %d" - " failed, not supporting QoS\n", - NIPQUAD(conn->c_faddr), - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version), - conn->c_tos); - rds_conn_drop(conn); - return; - } - } else { + if (conn->c_version < RDS_PROTOCOL_VERSION) { + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { /* * BUG: destroying connection here can deadlock with * the CM event handler on the c_cm_lock. @@ -170,6 +158,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even RDS_PROTOCOL_MINOR(conn->c_version)); rds_conn_destroy(conn); return; + } else { + conn->c_proposed_version = RDS_PROTOCOL_VERSION; } } @@ -276,7 +266,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->dp_ack_seq = rds_ib_piggyb_ack(ic); - dp->dp_tos = conn->c_tos; + dp->dp_tos = cpu_to_be32(conn->c_tos); /* Advertise flow control */ if (ic->i_flowctl) { @@ -679,13 +669,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) /* Even if len is crap *now* I still want to check it. -ASG */ if (event->param.conn.private_data_len < sizeof(*dp) || dp->dp_protocol_major == 0) - return RDS_PROTOCOL_3_0; + return RDS_PROTOCOL_4_0; common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS; - if (dp->dp_protocol_major == 3 && common) { - version = RDS_PROTOCOL_3_0; + if (dp->dp_protocol_major == 4 && common) { + version = RDS_PROTOCOL_4_0; while ((common >>= 1) != 0) version++; + } else if (RDS_PROTOCOL_COMPAT_VERSION == + RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)) { + version = RDS_PROTOCOL_COMPAT_VERSION; } else if (printk_ratelimit()) { printk(KERN_NOTICE "RDS: Connection from %pI4 using " "incompatible protocol version %u.%u\n", @@ -721,7 +714,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, (unsigned long long)be64_to_cpu(fguid)); conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport, - dp->dp_tos, GFP_KERNEL); + be32_to_cpu(dp->dp_tos), GFP_KERNEL); if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); conn = NULL; @@ -828,7 +821,7 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, NULL, 0); + rdma_reject(cm_id, &err, sizeof(int)); return destroy; } @@ -851,7 +844,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_0); ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); @@ -860,8 +853,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) goto out; } - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, - UINT_MAX, UINT_MAX); + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, + conn->c_proposed_version, UINT_MAX, UINT_MAX); ret = rdma_connect(cm_id, &conn_param); if (ret) rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index c24b640801f9..82ca9ee34aee 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -40,6 +40,8 @@ #include #include +#define RDS_IB_REJ_CONSUMER_DEFINED 28 + static struct rdma_cm_id *rds_iw_listen_id; int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, @@ -52,6 +54,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct arpreq *r; struct sockaddr_in *sin; int ret = 0; + int *err; rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, event->event); @@ -160,12 +163,29 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: if (conn) rds_conn_drop(conn); break; + case RDMA_CM_EVENT_REJECTED: + err = (int *)event->param.conn.private_data; + if (conn) { + if ((*err) == 0 && + event->status == RDS_IB_REJ_CONSUMER_DEFINED) { + /* rejection from 3.x protocol */ + if (!conn->c_tos) { + /* retry the connect with a + * lower compatible protocol */ + conn->c_proposed_version = + RDS_PROTOCOL_COMPAT_VERSION; + rds_conn_drop(conn); + } + } else + rds_conn_drop(conn); + } + break; + case RDMA_CM_EVENT_ADDR_CHANGE: #if RDMA_RDS_APM_SUPPORTED if (conn && !rds_ib_apm_enabled) diff --git a/net/rds/rds.h b/net/rds/rds.h index 4716e239d5ec..988aa458660e 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -17,8 +17,10 @@ */ #define RDS_PROTOCOL_3_0 0x0300 #define RDS_PROTOCOL_3_1 0x0301 -#define RDS_PROTOCOL_3_2 0x0302 -#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_2 +#define RDS_PROTOCOL_4_0 0x0400 +#define RDS_PROTOCOL_4_1 0x0401 +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 +#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_4_1 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) @@ -137,6 +139,7 @@ struct rds_connection { unsigned int c_unacked_bytes; /* Protocol version */ + unsigned int c_proposed_version; unsigned int c_version; /* Re-connect stall diagnostics */ @@ -154,7 +157,7 @@ struct rds_connection { unsigned long c_hb_start; - unsigned int c_active_side; + struct rds_connection *c_base_conn; }; #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/send.c b/net/rds/send.c index 0ac3ce9bbddd..cf66b6797cdc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1000,30 +1000,15 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, rds_message_addref(rm); spin_lock(&conn->c_lock); + if (conn->c_pending_flush) { + spin_unlock(&conn->c_lock); + spin_unlock_irqrestore(&rs->rs_lock, flags); + goto out; + } rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++); list_add_tail(&rm->m_conn_item, &conn->c_send_queue); set_bit(RDS_MSG_ON_CONN, &rm->m_flags); - /* This can race with rds_send_reset. If an async op sneaked - * in after resetting the send state, flush it too. - */ - if (conn->c_pending_flush) { - if (rm->rdma.op_active) { - if (rm->rdma.op_notifier) { - rm->rdma.op_notifier->n_conn = conn; - conn->c_pending_flush++; - } - set_bit(RDS_MSG_FLUSH, &rm->m_flags); - } - if (rm->data.op_active && rm->data.op_async) { - if (rm->data.op_notifier) { - rm->data.op_notifier->n_conn = conn; - conn->c_pending_flush++; - } - set_bit(RDS_MSG_FLUSH, &rm->m_flags); - } - } - spin_unlock(&conn->c_lock); rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n", @@ -1168,11 +1153,6 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -struct user_hdr { - u32 seq; - u8 op; -}; - int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) { struct sock *sk = sock->sk; @@ -1266,15 +1246,30 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ret = PTR_ERR(conn); goto out; } + + if (rs->rs_tos && !conn->c_base_conn) { + conn->c_base_conn = rds_conn_create_outgoing( + rs->rs_bound_addr, daddr, + rs->rs_transport, 0, + sock->sk->sk_allocation); + if (IS_ERR(conn->c_base_conn)) { + ret = PTR_ERR(conn->c_base_conn); + goto out; + } + rds_conn_connect_if_down(conn->c_base_conn); + } rs->rs_conn = conn; } - /* - if (allocated_mr && conn->c_cleanup_stale_mrs) { - rds_rdma_cleanup_stale_mrs(rs, conn); - conn->c_cleanup_stale_mrs = 0; + if (conn->c_tos && !rds_conn_up(conn)) { + if (!rds_conn_up(conn->c_base_conn)) { + ret = -EAGAIN; + goto out; + } else if (conn->c_base_conn->c_version == + RDS_PROTOCOL_COMPAT_VERSION) { + conn = conn->c_base_conn; + } } - */ /* Not accepting new sends until all the failed ops have been reaped */ if (rds_async_send_enabled && conn->c_pending_flush) { @@ -1318,6 +1313,10 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ret = -EAGAIN; goto out; } + if (conn->c_pending_flush) { + ret = -EAGAIN; + goto out; + } timeo = wait_event_interruptible_timeout(*sk_sleep(sk), rds_send_queue_rm(rs, conn, rm, diff --git a/net/rds/threads.c b/net/rds/threads.c index 96b9c99b7aa2..1c0ac67bbc69 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -165,14 +165,10 @@ void rds_connect_worker(struct work_struct *work) if (ret) { if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) { - if (conn->c_reconnect && conn->c_active_side) - rds_queue_reconnect(conn); + rds_queue_reconnect(conn); } else rds_conn_error(conn, "RDS: connect failed\n"); } - - if (!conn->c_reconnect) - conn->c_active_side = 1; } } @@ -258,9 +254,6 @@ void rds_shutdown_worker(struct work_struct *work) struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w); rds_conn_shutdown(conn); - - if (!conn->c_reconnect) - conn->c_active_side = 0; } void rds_threads_exit(void)