]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
rds: CQ balance
authorBang Nguyen <bang.nguyen@oracle.com>
Fri, 30 Nov 2012 22:07:31 +0000 (14:07 -0800)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 20:12:30 +0000 (13:12 -0700)
This patch provides load-balancing for RDS CQs across available interrupt vectors.

Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
net/rds/ib.c
net/rds/ib.h
net/rds/ib_cm.c

index 205ea0f2e5aa94f1322d90988d362b05b6445767..72a1ad75ec128b53cb00c76fd8eda3f9c4aebc14 100644 (file)
@@ -57,6 +57,7 @@ unsigned int rds_ib_haip_fallback = 1;
 unsigned int rds_ib_haip_hca_failover_enabled = 1;
 unsigned int rds_ib_apm_timeout = RDS_IB_DEFAULT_TIMEOUT;
 unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT;
+unsigned int rds_ib_cq_balance_enabled = 1;
 
 module_param(rds_ib_fmr_1m_pool_size, int, 0444);
 MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1m fmr per HCA");
@@ -78,7 +79,8 @@ module_param(rds_ib_haip_fallback, int, 0444);
 MODULE_PARM_DESC(rds_ib_haip_fallback, " HAIP failback Enabled");
 module_param(rds_ib_haip_hca_failover_enabled, int, 0444);
 MODULE_PARM_DESC(rds_ib_haip_hca_failover_enabled, " HAIP HCA failover Enabled");
-
+module_param(rds_ib_cq_balance_enabled, int, 0444);
+MODULE_PARM_DESC(rds_ib_cq_balance_enabled, " CQ load balance Enabled");
 
 /*
  * we have a clumsy combination of RCU and a rwsem protecting this list
@@ -99,6 +101,7 @@ struct socket        *rds_ib_inet_socket;
 
 static struct rds_ib_port *ip_config;
 static u8      ip_port_cnt = 0;
+static u8      ip_port_max;
 
 void rds_ib_nodev_connect(void)
 {
@@ -146,6 +149,9 @@ static void rds_ib_dev_free(struct work_struct *work)
                kfree(i_ipaddr);
        }
 
+       if (rds_ibdev->vector_load)
+               kfree(rds_ibdev->vector_load);
+
        kfree(rds_ibdev);
 }
 
@@ -467,7 +473,7 @@ static int rds_ib_move_ip(char                      *from_dev,
        struct page             *page;
        char                    from_dev2[2*IFNAMSIZ + 1];
        char                    to_dev2[2*IFNAMSIZ + 1];
-       int                     i, ret = 0;
+       int                     ret = 0;
        u8                      active_port;
        struct in_device        *in_dev;
 
@@ -577,7 +583,12 @@ static void rds_ib_init_port(struct rds_ib_device  *rds_ibdev,
                                struct net_device       *net_dev,
                                u8                      port_num)
 {
-       ip_port_cnt++;
+       if (ip_port_cnt++ > ip_port_max) {
+               printk(KERN_ERR "RDS/IB: Exceeded max ports (%d)\n",
+                       ip_port_max);
+               return;
+       }
+
        ip_config[ip_port_cnt].port_num = port_num;
        ip_config[ip_port_cnt].dev = net_dev;
        ip_config[ip_port_cnt].rds_ibdev = rds_ibdev;
@@ -893,7 +904,7 @@ static void rds_ib_dump_ip_config(void)
        }
 }
 
-static int rds_ib_setup_ports(void)
+static int rds_ib_ip_config_init(void)
 {
        struct net_device       *dev;
        struct in_ifaddr        *ifa;
@@ -904,7 +915,21 @@ static int rds_ib_setup_ports(void)
        int                     ret = 0;
 
        if (!rds_ib_haip_enabled)
-               return ret;
+               return 0;
+
+       ip_port_max = 0;
+       rcu_read_lock();
+       list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
+               ip_port_max += rds_ibdev->dev->phys_port_cnt;
+       }
+       rcu_read_unlock();
+
+       ip_config = kzalloc(sizeof(struct rds_ib_port) *
+                               (ip_port_max + 1), GFP_KERNEL);
+       if (!ip_config) {
+               printk(KERN_ERR "RDS/IB: failed to allocate IP config\n");
+               return 1;
+       }
 
        read_lock(&dev_base_lock);
        for_each_netdev(&init_net, dev) {
@@ -1008,15 +1033,6 @@ void rds_ib_add_one(struct ib_device *device)
        }
 
        if (rds_ib_haip_enabled) {
-               ip_config = kzalloc(sizeof(struct rds_ib_port) *
-                                       RDS_IB_MAX_PORTS + 1, GFP_KERNEL);
-
-               if (!ip_config) {
-                       printk(KERN_ERR
-                               "RDS/IB: failed to allocate IP config\n");
-                       goto put_dev;
-               }
-
                INIT_IB_EVENT_HANDLER(&rds_ibdev->event_handler,
                                rds_ibdev->dev, rds_ib_event_handler);
                if (ib_register_event_handler(&rds_ibdev->event_handler)) {
@@ -1026,6 +1042,13 @@ void rds_ib_add_one(struct ib_device *device)
                }
        }
 
+       rds_ibdev->vector_load = kzalloc(sizeof(int) *
+                                       device->num_comp_vectors, GFP_KERNEL);
+       if (!rds_ibdev->vector_load) {
+               printk(KERN_ERR "RDS/IB: failed to allocate vector memoru\n");
+               goto put_dev;
+       }
+
        rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
        if (IS_ERR(rds_ibdev->mr)) {
                rds_ibdev->mr = NULL;
@@ -1085,6 +1108,9 @@ void rds_ib_exit(void)
        rds_ib_recv_exit();
        rds_trans_unregister(&rds_ib_transport);
        rds_ib_fmr_exit();
+
+       if (ip_config)
+               kfree(ip_config);
 }
 
 struct rds_transport rds_ib_transport = {
@@ -1229,7 +1255,7 @@ int rds_ib_init(void)
 
        rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
 
-       ret = rds_ib_setup_ports();
+       ret = rds_ib_ip_config_init();
        if (ret) {
                printk(KERN_ERR "RDS/IB: failed to init port\n");
                goto out_srq;
index 58e9a285c3a53d08b0860380945d48ecced6367a..3c6675c6663c02407f13f72e00269ce371b1936d 100644 (file)
@@ -225,6 +225,9 @@ struct rds_ib_connection {
        struct rds_ib_path      i_cur_path;
        unsigned int            i_alt_path_index;
        unsigned int            i_active_side;
+
+       int                     i_scq_vector;
+       int                     i_rcq_vector;
 };
 
 /* This assumes that atomic_t is at least 32 bits */
@@ -267,7 +270,6 @@ enum {
 };
 
 #define RDS_IB_MAX_ALIASES     100
-#define RDS_IB_MAX_PORTS       10
 struct rds_ib_port {
        struct rds_ib_device    *rds_ibdev;
        struct net_device       *dev;
@@ -316,6 +318,7 @@ struct rds_ib_device {
        struct rds_ib_srq       *srq;
        struct rds_ib_port      *ports;
        struct ib_event_handler event_handler;
+       int                     *vector_load;
 };
 
 #define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
@@ -427,6 +430,7 @@ extern unsigned int rds_ib_haip_enabled;
 extern unsigned int rds_ib_haip_fallback;
 extern unsigned int rds_ib_haip_failover_enabled;
 extern unsigned int rds_ib_apm_timeout;
+extern unsigned int rds_ib_cq_balance_enabled;
 
 extern spinlock_t ib_nodev_conns_lock;
 extern struct list_head ib_nodev_conns;
index 82abce6a81c519831e960589b967cd13e2e86f9a..69773f894ce8af909365fcaed2534a43bdc60143 100644 (file)
@@ -457,6 +457,26 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
        }
 }
 
+static int rds_ib_find_least_loaded_vector(struct rds_ib_device *rds_ibdev)
+{
+       int i;
+       int index = 0;
+       int min = rds_ibdev->vector_load[0];
+
+       if (!rds_ib_cq_balance_enabled)
+               return IB_CQ_VECTOR_LEAST_ATTACHED;
+
+       for (i = 1; i < rds_ibdev->dev->num_comp_vectors; i++) {
+               if (rds_ibdev->vector_load[i] < min) {
+                       index = i;
+                       min = rds_ibdev->vector_load[i];
+               }
+       }
+
+       rds_ibdev->vector_load[index]++;
+       return index;
+}
+
 /*
  * This needs to be very careful to not leave IS_ERR pointers around for
  * cleanup to trip over.
@@ -489,31 +509,37 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        ic->i_pd = rds_ibdev->pd;
        ic->i_mr = rds_ibdev->mr;
 
+       ic->i_scq_vector = rds_ib_find_least_loaded_vector(rds_ibdev);
        ic->i_scq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
                                rds_ib_cq_event_handler, conn,
                                ic->i_send_ring.w_nr + 1,
-                               IB_CQ_VECTOR_LEAST_ATTACHED);
+                               ic->i_scq_vector);
        if (IS_ERR(ic->i_scq)) {
                ret = PTR_ERR(ic->i_scq);
                ic->i_scq = NULL;
                rdsdebug("ib_create_cq send failed: %d\n", ret);
+               if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+                       rds_ibdev->vector_load[ic->i_scq_vector]--;
                goto out;
        }
 
+       ic->i_rcq_vector = rds_ib_find_least_loaded_vector(rds_ibdev);
        if (rds_ib_srq_enabled)
                ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
                                        rds_ib_cq_event_handler, conn,
                                        rds_ib_srq_max_wr - 1,
-                                       IB_CQ_VECTOR_LEAST_ATTACHED);
+                                       ic->i_rcq_vector);
        else
                ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
                                        rds_ib_cq_event_handler, conn,
                                        ic->i_recv_ring.w_nr,
-                                       IB_CQ_VECTOR_LEAST_ATTACHED);
+                                       ic->i_rcq_vector);
        if (IS_ERR(ic->i_rcq)) {
                ret = PTR_ERR(ic->i_rcq);
                ic->i_rcq = NULL;
                rdsdebug("ib_create_cq recv failed: %d\n", ret);
+               if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+                       rds_ibdev->vector_load[ic->i_rcq_vector]--;
                goto out;
        }
 
@@ -1039,10 +1065,20 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
                /* first destroy the ib state that generates callbacks */
                if (ic->i_cm_id->qp)
                        rdma_destroy_qp(ic->i_cm_id);
-               if (ic->i_rcq)
+
+               if (ic->i_rcq) {
+                       if (ic->rds_ibdev &&
+                               ic->i_rcq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+                               ic->rds_ibdev->vector_load[ic->i_rcq_vector]--;
                        ib_destroy_cq(ic->i_rcq);
-               if (ic->i_scq)
+               }
+
+               if (ic->i_scq) {
+                       if (ic->rds_ibdev &&
+                               ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+                               ic->rds_ibdev->vector_load[ic->i_scq_vector]--;
                        ib_destroy_cq(ic->i_scq);
+               }
 
                /* then free the resources that ib callbacks use */
                if (ic->i_send_hdrs)