]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: RDS rolling upgrade
authorSaeed Mahameed <saeedm@mellanox.com>
Thu, 31 Jan 2013 08:37:19 +0000 (10:37 +0200)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 20:37:55 +0000 (13:37 -0700)
Changes to support rolling upgrade from RDS protocol version 3.1 to 4.1

Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
(cherry picked from commit 6788b32aeb00a1ac4b3815680c029911c431031a)

net/rds/af_rds.c
net/rds/connection.c
net/rds/ib.h
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/send.c
net/rds/threads.c

index aa7a6f687dd6767925c8dcb1210a9434e4fbcece..48406609889c212f7111ec53fa0dcdc35232b71a 100644 (file)
@@ -46,10 +46,6 @@ static unsigned int rds_ib_retry_count = 0xdead;
 module_param(rds_ib_retry_count, int, 0444);
 MODULE_PARM_DESC(rds_ib_retry_count, "UNUSED, set param in rds_rdma instead");
 
-static int rds_qos_enabled = 1;
-module_param(rds_qos_enabled, int, 0444);
-MODULE_PARM_DESC(rds_qos_enabled, "Set to enable QoS");
-
 static char *rds_qos_threshold = NULL;
 module_param(rds_qos_threshold, charp, 0444);
 MODULE_PARM_DESC(rds_qos_threshold, "<tos>:<max_msg_size>[,<tos>:<max_msg_size>]*");
@@ -220,9 +216,6 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
        switch (cmd) {
        case SIOCRDSSETTOS:
-               if (!rds_qos_enabled)
-                       return -EOPNOTSUPP;
-
                if (get_user(tos, (rds_tos_t __user *)arg))
                        return -EFAULT;
 
index b0ed223b97cbd6dec0493e5376895e1eef5d1418..e21fc5f0a9dcbd9f08811b1a8f8f17151d18fc3a 100644 (file)
@@ -208,6 +208,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
        conn->c_reconnect_warn = 1;
        conn->c_reconnect_drops = 0;
        conn->c_reconnect_err = 0;
+       conn->c_proposed_version = RDS_PROTOCOL_VERSION;
 
        INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
        INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
index 44092f905beba5ebf71146552b2b8786a5c3da92..886ac0699c05cdd92b5e5d5812e087b5fff00d79 100644 (file)
@@ -28,7 +28,7 @@
 
 #define RDS_IB_DEFAULT_TIMEOUT          16 /* 4.096 * 2 ^ 16 = 260 msec */
 
-#define RDS_IB_SUPPORTED_PROTOCOLS     0x00000007      /* minor versions supported */
+#define RDS_IB_SUPPORTED_PROTOCOLS     0x00000003      /* minor versions supported */
 
 #define RDS_IB_RECYCLE_BATCH_COUNT     32
 
@@ -83,10 +83,9 @@ struct rds_ib_connect_private {
        u8                      dp_protocol_major;
        u8                      dp_protocol_minor;
        __be16                  dp_protocol_minor_mask; /* bitmask */
-       __be32                  dp_reserved1;
+       __be32                  dp_tos;
        __be64                  dp_ack_seq;
        __be32                  dp_credit;              /* non-zero enables flow ctl */
-       u8                      dp_tos;
 };
 
 struct rds_ib_send_work {
index b140d671e1b87064b45842e3c3eab0aadd31d61b..bce6a638cccf1acf42301959421dd2020dd2bb3e 100644 (file)
@@ -144,20 +144,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                }
        }
 
-       if (conn->c_version < RDS_PROTOCOL(3, 2)) {
-               if (conn->c_version == RDS_PROTOCOL(3, 1)) {
-                       if (conn->c_tos) {
-                               printk(KERN_NOTICE "RDS: Connection to"
-                                       " %u.%u.%u.%u version %u.%u Tos %d"
-                                       " failed, not supporting QoS\n",
-                                       NIPQUAD(conn->c_faddr),
-                                       RDS_PROTOCOL_MAJOR(conn->c_version),
-                                       RDS_PROTOCOL_MINOR(conn->c_version),
-                                       conn->c_tos);
-                               rds_conn_drop(conn);
-                               return;
-                       }
-               } else {
+       if (conn->c_version < RDS_PROTOCOL_VERSION) {
+               if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
                        /*
                           * BUG: destroying connection here can deadlock with
                           * the CM event handler on the c_cm_lock.
@@ -170,6 +158,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                                RDS_PROTOCOL_MINOR(conn->c_version));
                        rds_conn_destroy(conn);
                        return;
+               } else {
+                       conn->c_proposed_version = RDS_PROTOCOL_VERSION;
                }
        }
 
@@ -276,7 +266,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
                dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
                dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
                dp->dp_ack_seq = rds_ib_piggyb_ack(ic);
-               dp->dp_tos = conn->c_tos;
+               dp->dp_tos = cpu_to_be32(conn->c_tos);
 
                /* Advertise flow control */
                if (ic->i_flowctl) {
@@ -679,13 +669,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
        /* Even if len is crap *now* I still want to check it. -ASG */
        if (event->param.conn.private_data_len < sizeof(*dp)
            || dp->dp_protocol_major == 0)
-               return RDS_PROTOCOL_3_0;
+               return RDS_PROTOCOL_4_0;
 
        common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
-       if (dp->dp_protocol_major == 3 && common) {
-               version = RDS_PROTOCOL_3_0;
+       if (dp->dp_protocol_major == 4 && common) {
+               version = RDS_PROTOCOL_4_0;
                while ((common >>= 1) != 0)
                        version++;
+       } else if (RDS_PROTOCOL_COMPAT_VERSION ==
+               RDS_PROTOCOL(dp->dp_protocol_major, dp->dp_protocol_minor)) {
+               version = RDS_PROTOCOL_COMPAT_VERSION;
        } else if (printk_ratelimit()) {
                printk(KERN_NOTICE "RDS: Connection from %pI4 using "
                        "incompatible protocol version %u.%u\n",
@@ -721,7 +714,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                 (unsigned long long)be64_to_cpu(fguid));
 
        conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
-                              dp->dp_tos, GFP_KERNEL);
+                              be32_to_cpu(dp->dp_tos), GFP_KERNEL);
        if (IS_ERR(conn)) {
                rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
                conn = NULL;
@@ -828,7 +821,7 @@ out:
        if (conn)
                mutex_unlock(&conn->c_cm_lock);
        if (err)
-               rdma_reject(cm_id, NULL, 0);
+               rdma_reject(cm_id, &err, sizeof(int));
        return destroy;
 }
 
@@ -851,7 +844,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
 
        /* If the peer doesn't do protocol negotiation, we must
         * default to RDSv3.0 */
-       rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
+       rds_ib_set_protocol(conn, RDS_PROTOCOL_4_0);
        ic->i_flowctl = rds_ib_sysctl_flow_control;     /* advertise flow control */
 
        ret = rds_ib_setup_qp(conn);
@@ -860,8 +853,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
                goto out;
        }
 
-       rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
-               UINT_MAX, UINT_MAX);
+       rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
+                               conn->c_proposed_version, UINT_MAX, UINT_MAX);
        ret = rdma_connect(cm_id, &conn_param);
        if (ret)
                rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
index c24b640801f94e91f819be16183b2b2381bbd33f..82ca9ee34aee667921034b8b71739c1348ea3e30 100644 (file)
@@ -40,6 +40,8 @@
 #include <net/sock.h>
 #include <net/inet_common.h>
 
+#define RDS_IB_REJ_CONSUMER_DEFINED 28
+
 static struct rdma_cm_id *rds_iw_listen_id;
 
 int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
@@ -52,6 +54,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
        struct arpreq *r;
        struct sockaddr_in *sin;
        int ret = 0;
+       int *err;
 
        rdsdebug("conn %p id %p handling event %u\n", conn, cm_id,
                 event->event);
@@ -160,12 +163,29 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
        case RDMA_CM_EVENT_ADDR_ERROR:
        case RDMA_CM_EVENT_CONNECT_ERROR:
        case RDMA_CM_EVENT_UNREACHABLE:
-       case RDMA_CM_EVENT_REJECTED:
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
                if (conn)
                        rds_conn_drop(conn);
                break;
 
+       case RDMA_CM_EVENT_REJECTED:
+               err = (int *)event->param.conn.private_data;
+               if (conn) {
+                       if ((*err) == 0 &&
+                               event->status == RDS_IB_REJ_CONSUMER_DEFINED) {
+                               /* rejection from 3.x protocol */
+                               if (!conn->c_tos) {
+                                       /* retry the connect with a
+                                        * lower compatible protocol */
+                                       conn->c_proposed_version =
+                                               RDS_PROTOCOL_COMPAT_VERSION;
+                                       rds_conn_drop(conn);
+                               }
+                       } else
+                               rds_conn_drop(conn);
+               }
+               break;
+
        case RDMA_CM_EVENT_ADDR_CHANGE:
 #if RDMA_RDS_APM_SUPPORTED
                if (conn && !rds_ib_apm_enabled)
index 4716e239d5ecb18c411370d5651741be9446091a..988aa458660e83e2a53f9d0eab7c6ea3e5fbdfd4 100644 (file)
  */
 #define RDS_PROTOCOL_3_0       0x0300
 #define RDS_PROTOCOL_3_1       0x0301
-#define RDS_PROTOCOL_3_2        0x0302
-#define RDS_PROTOCOL_VERSION    RDS_PROTOCOL_3_2
+#define RDS_PROTOCOL_4_0       0x0400
+#define RDS_PROTOCOL_4_1       0x0401
+#define RDS_PROTOCOL_COMPAT_VERSION    RDS_PROTOCOL_3_1
+#define RDS_PROTOCOL_VERSION    RDS_PROTOCOL_4_1
 #define RDS_PROTOCOL_MAJOR(v)  ((v) >> 8)
 #define RDS_PROTOCOL_MINOR(v)  ((v) & 255)
 #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
@@ -137,6 +139,7 @@ struct rds_connection {
        unsigned int            c_unacked_bytes;
 
        /* Protocol version */
+       unsigned int            c_proposed_version;
        unsigned int            c_version;
 
        /* Re-connect stall diagnostics */
@@ -154,7 +157,7 @@ struct rds_connection {
 
        unsigned long           c_hb_start;
 
-       unsigned int            c_active_side;
+       struct rds_connection   *c_base_conn;
 };
 
 #define RDS_FLAG_CONG_BITMAP   0x01
index 0ac3ce9bbddd226a0246e10551c12d8087c54872..cf66b6797cdcc0a27f3f206e2b54fd0281165499 100644 (file)
@@ -1000,30 +1000,15 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
                rds_message_addref(rm);
 
                spin_lock(&conn->c_lock);
+               if (conn->c_pending_flush) {
+                       spin_unlock(&conn->c_lock);
+                       spin_unlock_irqrestore(&rs->rs_lock, flags);
+                       goto out;
+               }
                rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
                list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
                set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
 
-               /* This can race with rds_send_reset. If an async op sneaked
-                * in after resetting the send state, flush it too.
-                */
-               if (conn->c_pending_flush) {
-                       if (rm->rdma.op_active) {
-                               if (rm->rdma.op_notifier) {
-                                       rm->rdma.op_notifier->n_conn = conn;
-                                       conn->c_pending_flush++;
-                               }
-                               set_bit(RDS_MSG_FLUSH, &rm->m_flags);
-                       }
-                       if (rm->data.op_active && rm->data.op_async) {
-                               if (rm->data.op_notifier) {
-                                       rm->data.op_notifier->n_conn = conn;
-                                       conn->c_pending_flush++;
-                               }
-                               set_bit(RDS_MSG_FLUSH, &rm->m_flags);
-                       }
-               }
-
                spin_unlock(&conn->c_lock);
 
                rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
@@ -1168,11 +1153,6 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
        return ret;
 }
 
-struct user_hdr {
-       u32     seq;
-       u8      op;
-};
-
 int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 {
        struct sock *sk = sock->sk;
@@ -1266,15 +1246,30 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                        ret = PTR_ERR(conn);
                        goto out;
                }
+
+               if (rs->rs_tos && !conn->c_base_conn) {
+                       conn->c_base_conn = rds_conn_create_outgoing(
+                                       rs->rs_bound_addr, daddr,
+                                       rs->rs_transport, 0,
+                                       sock->sk->sk_allocation);
+                       if (IS_ERR(conn->c_base_conn)) {
+                               ret = PTR_ERR(conn->c_base_conn);
+                               goto out;
+                       }
+                       rds_conn_connect_if_down(conn->c_base_conn);
+               }
                rs->rs_conn = conn;
        }
 
-       /*
-       if (allocated_mr && conn->c_cleanup_stale_mrs) {
-               rds_rdma_cleanup_stale_mrs(rs, conn);
-               conn->c_cleanup_stale_mrs = 0;
+       if (conn->c_tos && !rds_conn_up(conn)) {
+               if (!rds_conn_up(conn->c_base_conn)) {
+                       ret = -EAGAIN;
+                       goto out;
+               } else if (conn->c_base_conn->c_version ==
+                               RDS_PROTOCOL_COMPAT_VERSION) {
+                       conn = conn->c_base_conn;
+               }
        }
-       */
 
        /* Not accepting new sends until all the failed ops have been reaped */
        if (rds_async_send_enabled && conn->c_pending_flush) {
@@ -1318,6 +1313,10 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                        ret = -EAGAIN;
                        goto out;
                }
+               if (conn->c_pending_flush) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
 
                timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
                                        rds_send_queue_rm(rs, conn, rm,
index 96b9c99b7aa2d01302be75c803026b29a6e111a0..1c0ac67bbc693e67b71f45639f8f35d1658a7198 100644 (file)
@@ -165,14 +165,10 @@ void rds_connect_worker(struct work_struct *work)
 
                if (ret) {
                        if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) {
-                               if (conn->c_reconnect && conn->c_active_side)
-                                       rds_queue_reconnect(conn);
+                               rds_queue_reconnect(conn);
                        } else
                                rds_conn_error(conn, "RDS: connect failed\n");
                }
-
-               if (!conn->c_reconnect)
-                       conn->c_active_side = 1;
        }
 }
 
@@ -258,9 +254,6 @@ void rds_shutdown_worker(struct work_struct *work)
        struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
 
        rds_conn_shutdown(conn);
-
-       if (!conn->c_reconnect)
-               conn->c_active_side = 0;
 }
 
 void rds_threads_exit(void)