]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS merge for UEK2
authorBang Nguyen <bang.nguyen@oracle.com>
Mon, 14 Jan 2013 05:54:09 +0000 (21:54 -0800)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 20:12:36 +0000 (13:12 -0700)
Orabug: 15997083

This is merged code of Mellanox OFED R2, 0080 release; and ofa 4.1

Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
(cherry picked from commit 26add53cf20e08dfa331ec22d307dab40f0c4d74)

net/rds/af_rds.c
net/rds/ib.c
net/rds/ib.h
net/rds/rds.h
net/rds/send.c
net/rds/stats.c

index ddbf568da6d4a6738e272904f9b4b90319f871d4..01af7ddfe9171dc441cc5398b1090716160247a1 100644 (file)
@@ -30,6 +30,7 @@
  * SOFTWARE.
  *
  */
+#include <linux/string.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -45,6 +46,21 @@ static unsigned int rds_ib_retry_count = 0xdead;
 module_param(rds_ib_retry_count, int, 0444);
 MODULE_PARM_DESC(rds_ib_retry_count, "UNUSED, set param in rds_rdma instead");
 
+static int rds_qos_enabled = 1;
+module_param(rds_qos_enabled, int, 0444);
+MODULE_PARM_DESC(rds_qos_enabled, "Set to enable QoS");
+
+static char *rds_qos_threshold = NULL;
+module_param(rds_qos_threshold, charp, 0444);
+MODULE_PARM_DESC(rds_qos_threshold, "<tos>:<max_msg_size>[,<tos>:<max_msg_size>]*");
+
+static int rds_qos_threshold_action = 0;
+module_param(rds_qos_threshold_action, int, 0444);
+MODULE_PARM_DESC(rds_qos_threshold_action,
+       "0=Ignore,1=Error,2=Statistic,3=Error_Statistic");
+
+static unsigned long rds_qos_threshold_tbl[256];
+
 /* this is just used for stats gathering :/ */
 static DEFINE_SPINLOCK(rds_sock_lock);
 static unsigned long rds_sock_count;
@@ -204,11 +220,14 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        rds_tos_t tos;
        unsigned long flags;
 
-       if (get_user(tos, (rds_tos_t __user *)arg))
-               return -EFAULT;
-
        switch (cmd) {
        case SIOCRDSSETTOS:
+               if (!rds_qos_enabled)
+                       return -EOPNOTSUPP;
+
+               if (get_user(tos, (rds_tos_t __user *)arg))
+                       return -EFAULT;
+
                spin_lock_irqsave(&rds_sock_lock, flags);
                if (rs->rs_tos || rs->rs_conn) {
                        spin_unlock_irqrestore(&rds_sock_lock, flags);
@@ -621,6 +640,105 @@ out:
        spin_unlock_irqrestore(&rds_sock_lock, flags);
 }
 
+static unsigned long parse_ul(char *ptr, unsigned long max)
+{
+       unsigned long val;
+       char *endptr;
+
+       val = simple_strtoul(ptr, &endptr, 0);
+       switch (*endptr) {
+       case 'k': case 'K':
+               val <<= 10;
+               endptr++;
+               break;
+       case 'm': case 'M':
+               val <<= 20;
+               endptr++;
+               break;
+       }
+
+       if (*ptr && !*endptr && val <= max)
+               return val;
+
+       printk(KERN_WARNING "RDS: Invalid threshold number\n");
+       return 0;
+}
+
+int rds_check_qos_threshold(u8 tos, size_t payload_len)
+{
+       if (rds_qos_threshold_action == 0)
+               return 0;
+
+       if (rds_qos_threshold_tbl[tos] && payload_len &&
+               rds_qos_threshold_tbl[tos] < payload_len) {
+               if (rds_qos_threshold_action == 1)
+                       return 1;
+               else if (rds_qos_threshold_action == 2) {
+                       rds_stats_inc(s_qos_threshold_exceeded);
+                       return 0;
+               } else if (rds_qos_threshold_action == 3) {
+                       rds_stats_inc(s_qos_threshold_exceeded);
+                       return 1;
+               } else
+                       return 0;
+       } else
+               return 0;
+}
+
+static void rds_qos_threshold_init(void)
+{
+       char *tok, *nxt_tok, *end;
+       char str[1024];
+       int     i;
+
+       for (i = 0; i < 256; i++)
+               rds_qos_threshold_tbl[i] = 0;
+
+       if (rds_qos_threshold == NULL)
+               return;
+
+       strcpy(str, rds_qos_threshold);
+       nxt_tok = strchr(str, ',');
+       if (nxt_tok) {
+               *nxt_tok = '\0';
+               nxt_tok++;
+       }
+
+       tok = str;
+       while (tok) {
+               char *qos_str, *threshold_str;
+               
+               qos_str = tok;
+               threshold_str = strchr(tok, ':');
+               if (threshold_str) {
+                       unsigned long qos, threshold;
+
+                       *threshold_str = '\0';
+                       threshold_str++;
+                       qos = simple_strtol(qos_str, &end, 0);
+                       if (*end) {
+                               printk(KERN_WARNING "RDS: Warning: QoS "
+                                       "%s is improperly formatted\n", qos);
+                       } else if (qos > 255) {
+                               printk(KERN_WARNING "RDS: Warning: QoS "
+                                       "%s out of range\n", qos);
+                       }
+                       threshold = parse_ul(threshold_str, (u32)~0);
+                       rds_qos_threshold_tbl[qos] = threshold;
+               } else {
+                       printk(KERN_WARNING "RDS: Warning: QoS:Threshold "
+                               "%s is improperly formatted\n", tok);
+               }
+
+               tok = nxt_tok;
+               nxt_tok = strchr(str, ',');
+               if (nxt_tok) {
+                       *nxt_tok = '\0';
+                       nxt_tok++;
+               }
+       }
+}
+
 static void rds_exit(void)
 {
        sock_unregister(rds_family_ops.family);
@@ -662,6 +780,8 @@ static int rds_init(void)
        rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
        rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
 
+       rds_qos_threshold_init();
+
        goto out;
 
 out_proto:
@@ -681,8 +801,8 @@ out:
 }
 module_init(rds_init);
 
-#define DRV_VERSION     "4.0"
-#define DRV_RELDATE     "Feb 12, 2009"
+#define DRV_VERSION     "4.1"
+#define DRV_RELDATE     "Jan 04, 2013"
 
 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
 MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets"
index 903e9fa847a4300fb76b5800a9512a7ea6a47bda..a768633ac6270a43b03844ea99f4b91652a8d26a 100644 (file)
@@ -55,10 +55,10 @@ unsigned int rds_ib_apm_enabled = 0;
 unsigned int rds_ib_apm_fallback = 1;
 unsigned int rds_ib_haip_enabled = 0;
 unsigned int rds_ib_haip_fallback = 1;
-unsigned int rds_ib_haip_hca_failover_enabled = 1;
 unsigned int rds_ib_apm_timeout = RDS_IB_DEFAULT_TIMEOUT;
 unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT;
 unsigned int rds_ib_cq_balance_enabled = 1;
+static char *rds_ib_haip_failover_groups = NULL;
 
 module_param(rds_ib_fmr_1m_pool_size, int, 0444);
 MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1m fmr per HCA");
@@ -78,8 +78,9 @@ module_param(rds_ib_apm_fallback, int, 0444);
 MODULE_PARM_DESC(rds_ib_apm_fallback, " APM failback enabled");
 module_param(rds_ib_haip_fallback, int, 0444);
 MODULE_PARM_DESC(rds_ib_haip_fallback, " HAIP failback Enabled");
-module_param(rds_ib_haip_hca_failover_enabled, int, 0444);
-MODULE_PARM_DESC(rds_ib_haip_hca_failover_enabled, " HAIP HCA failover Enabled");
+module_param(rds_ib_haip_failover_groups, charp, 0444);
+MODULE_PARM_DESC(rds_ib_haip_failover_groups,
+       "<ifname>[,<ifname>]*[;<ifname>[,<ifname>]*]*");
 module_param(rds_ib_cq_balance_enabled, int, 0444);
 MODULE_PARM_DESC(rds_ib_cq_balance_enabled, " CQ load balance Enabled");
 
@@ -338,19 +339,18 @@ static u8 rds_ib_get_failover_port(u8 port)
 
        for (i = 1; i <= ip_port_cnt; i++) {
                if (i != port &&
-                       ip_config[i].rds_ibdev == ip_config[port].rds_ibdev &&
+                       ip_config[i].failover_group ==
+                               ip_config[port].failover_group &&
                        ip_config[i].port_state == RDS_IB_PORT_UP) {
                        return i;
                }
        }
 
-       if (rds_ib_haip_hca_failover_enabled) {
-               for (i = 1; i <= ip_port_cnt; i++) {
-                       if (i != port &&
-                               ip_config[i].port_state == RDS_IB_PORT_UP) {
-                                       return i;
-                               }
-               }
+       for (i = 1; i <= ip_port_cnt; i++) {
+               if (i != port &&
+                       ip_config[i].port_state == RDS_IB_PORT_UP) {
+                               return i;
+                       }
        }
 
        return 0;
@@ -771,8 +771,8 @@ static void rds_ib_failback(struct work_struct *_work)
                } else if (ip_config[ip_config[i].ip_active_port].port_state ==
                                RDS_IB_PORT_DOWN) {
                        rds_ib_do_failover(i, 0, ip_active_port);
-               } else if (ip_config[port].rds_ibdev ==
-                               ip_config[i].rds_ibdev) {
+               } else if (ip_config[port].failover_group ==
+                               ip_config[i].failover_group) {
                        rds_ib_do_failover(i, port, ip_active_port);
                }
        }
@@ -980,6 +980,70 @@ out:
        return ret;
 }
 
+void rds_ib_ip_failover_groups_init(void)
+{
+       char *tok, *grp, *nxt_tok, *nxt_grp, *end;
+       char str[1024];
+       unsigned int    grp_id = 1;
+       int i;
+       struct rds_ib_device *rds_ibdev;
+
+       if (rds_ib_haip_failover_groups == NULL) {
+               rcu_read_lock();
+               list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
+                       for (i = 1; i <= ip_port_cnt; i++) {
+                               if (ip_config[i].rds_ibdev == rds_ibdev)
+                                       ip_config[i].failover_group = grp_id;
+                       }
+                       grp_id++;
+               }
+               rcu_read_unlock();
+               return;
+       }
+
+       strcpy(str, rds_ib_haip_failover_groups);
+       nxt_grp = strchr(str, ';');
+       if (nxt_grp) {
+               *nxt_grp = '\0';
+               nxt_grp++;
+       }
+       grp = str;
+       while (grp) {
+               tok = grp;
+               nxt_tok = strchr(tok, ',');
+               if (nxt_tok) {
+                       *nxt_tok = '\0';
+                       nxt_tok++;
+               }
+               while (tok) {
+                       for (i = 1; i <= ip_port_cnt; i++) {
+                               if (!strcmp(tok, ip_config[i].if_name)) {
+                                       if (!ip_config[i].failover_group)
+                                               ip_config[i].failover_group =
+                                                       grp_id;
+                                       else
+                                               printk(KERN_WARNING "RDS/IB: %s is already part of another failover group\n", tok);
+                                       break;
+                               }
+                       }
+                       tok = nxt_tok;
+                       nxt_tok = strchr(str, ',');
+                       if (nxt_tok) {
+                               *nxt_tok = '\0';
+                               nxt_tok++;
+                       }
+               }
+
+               grp = nxt_grp;
+               nxt_grp = strchr(str, ';');
+               if (nxt_grp) {
+                       *nxt_grp = '\0';
+                       nxt_grp++;
+               }
+               grp_id++;
+       }
+}
+
 void rds_ib_add_one(struct ib_device *device)
 {
        struct rds_ib_device *rds_ibdev;
@@ -1224,6 +1288,8 @@ int rds_ib_init(void)
                goto out_srq;
        }
 
+       rds_ib_ip_failover_groups_init();
+
        register_netdevice_notifier(&rds_ib_nb);
 
        goto out;
index 3c6675c6663c02407f13f72e00269ce371b1936d..944b6e6927436a0dd2a7327b26335b0bb0765c54 100644 (file)
@@ -272,6 +272,7 @@ enum {
 #define RDS_IB_MAX_ALIASES     100
 struct rds_ib_port {
        struct rds_ib_device    *rds_ibdev;
+       unsigned int            failover_group;
        struct net_device       *dev;
        unsigned int            port_state;
        u8                      port_num;
index 038e809c19638e5b8044445e22721e93d72bc221..dfe88f1a99b66a37858724185f61f686554a879f 100644 (file)
@@ -598,6 +598,7 @@ struct rds_statistics {
        uint64_t        s_cong_update_received;
        uint64_t        s_cong_send_error;
        uint64_t        s_cong_send_blocked;
+       uint64_t        s_qos_threshold_exceeded;
 };
 
 /* af_rds.c */
@@ -611,6 +612,7 @@ static inline void __rds_wake_sk_sleep(struct sock *sk)
        if (!sock_flag(sk, SOCK_DEAD) && waitq)
                wake_up(waitq);
 }
+int rds_check_qos_threshold(u8 tos, size_t pauload_len);
 extern wait_queue_head_t rds_poll_waitq;
 
 void debug_sock_hold(struct sock *sock);
index a3b5ecc617fb4f5da61355b144ca8487d7118efe..8d454bd0576e2267ba5899ae78553a3a0a575e65 100644 (file)
@@ -581,7 +581,8 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
                notifier->n_status = status;
 
                if (!ro->op_remote_complete) {
-                       if (rds_async_send_enabled && !status) {
+                       if (!rds_async_send_enabled ||
+                               (rds_async_send_enabled && !status)) {
                                spin_lock(&rs->rs_lock);
                                list_add_tail(&notifier->n_list,
                                        &rs->rs_notify_queue);
@@ -620,7 +621,8 @@ void rds_atomic_send_complete(struct rds_message *rm, int status)
                debug_sock_hold(rds_rs_to_sk(rs));
 
                notifier->n_status = status;
-               if (rds_async_send_enabled && !status) {
+               if (!rds_async_send_enabled ||
+                       (rds_async_send_enabled && !status)) {
                        spin_lock(&rs->rs_lock);
                        list_add_tail(&notifier->n_list,
                                &rs->rs_notify_queue);
@@ -1185,6 +1187,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
        int queued = 0, allocated_mr = 0;
        int nonblock = msg->msg_flags & MSG_DONTWAIT;
        long timeo = sock_sndtimeo(sk, nonblock);
+       size_t total_payload_len = payload_len;
 
        /* Mirror Linux UDP mirror of BSD error message compatibility */
        /* XXX: Perhaps MSG_MORE someday */
@@ -1245,6 +1248,14 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
        if (ret)
                goto out;
 
+       if (rm->rdma.op_active)
+               total_payload_len += rm->rdma.op_bytes;
+
+       if (rds_check_qos_threshold(rs->rs_tos, total_payload_len)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        /* rds_conn_create has a spinlock that runs with IRQ off.
         * Caching the conn in the socket helps a lot. */
        if (rs->rs_conn && rs->rs_conn->c_faddr == daddr &&
index 01acc9faac24aa61c59112e8fd8a44b8669e80b5..e341b37c4f78fd2f12bd1acfd5ee5c7546eaed9b 100644 (file)
@@ -75,6 +75,7 @@ static char *rds_stat_names[] = {
        "cong_update_received",
        "cong_send_error",
        "cong_send_blocked",
+       "qos_threshold_exceeded",
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,