unsigned int rds_ib_haip_hca_failover_enabled = 1;
unsigned int rds_ib_apm_timeout = RDS_IB_DEFAULT_TIMEOUT;
unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT;
+unsigned int rds_ib_cq_balance_enabled = 1;
module_param(rds_ib_fmr_1m_pool_size, int, 0444);
MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1m fmr per HCA");
MODULE_PARM_DESC(rds_ib_haip_fallback, " HAIP failback Enabled");
module_param(rds_ib_haip_hca_failover_enabled, int, 0444);
MODULE_PARM_DESC(rds_ib_haip_hca_failover_enabled, " HAIP HCA failover Enabled");
-
+module_param(rds_ib_cq_balance_enabled, int, 0444);
+MODULE_PARM_DESC(rds_ib_cq_balance_enabled, " CQ load balance Enabled");
/*
* we have a clumsy combination of RCU and a rwsem protecting this list
static struct rds_ib_port *ip_config;
static u8 ip_port_cnt = 0;
+static u8 ip_port_max;
void rds_ib_nodev_connect(void)
{
kfree(i_ipaddr);
}
+ if (rds_ibdev->vector_load)
+ kfree(rds_ibdev->vector_load);
+
kfree(rds_ibdev);
}
struct page *page;
char from_dev2[2*IFNAMSIZ + 1];
char to_dev2[2*IFNAMSIZ + 1];
- int i, ret = 0;
+ int ret = 0;
u8 active_port;
struct in_device *in_dev;
struct net_device *net_dev,
u8 port_num)
{
- ip_port_cnt++;
+ if (ip_port_cnt++ > ip_port_max) {
+ printk(KERN_ERR "RDS/IB: Exceeded max ports (%d)\n",
+ ip_port_max);
+ return;
+ }
+
ip_config[ip_port_cnt].port_num = port_num;
ip_config[ip_port_cnt].dev = net_dev;
ip_config[ip_port_cnt].rds_ibdev = rds_ibdev;
}
}
-static int rds_ib_setup_ports(void)
+static int rds_ib_ip_config_init(void)
{
struct net_device *dev;
struct in_ifaddr *ifa;
int ret = 0;
if (!rds_ib_haip_enabled)
- return ret;
+ return 0;
+
+ ip_port_max = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
+ ip_port_max += rds_ibdev->dev->phys_port_cnt;
+ }
+ rcu_read_unlock();
+
+ ip_config = kzalloc(sizeof(struct rds_ib_port) *
+ (ip_port_max + 1), GFP_KERNEL);
+ if (!ip_config) {
+ printk(KERN_ERR "RDS/IB: failed to allocate IP config\n");
+ return 1;
+ }
read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
}
if (rds_ib_haip_enabled) {
- ip_config = kzalloc(sizeof(struct rds_ib_port) *
- RDS_IB_MAX_PORTS + 1, GFP_KERNEL);
-
- if (!ip_config) {
- printk(KERN_ERR
- "RDS/IB: failed to allocate IP config\n");
- goto put_dev;
- }
-
INIT_IB_EVENT_HANDLER(&rds_ibdev->event_handler,
rds_ibdev->dev, rds_ib_event_handler);
if (ib_register_event_handler(&rds_ibdev->event_handler)) {
}
}
+ rds_ibdev->vector_load = kzalloc(sizeof(int) *
+ device->num_comp_vectors, GFP_KERNEL);
+ if (!rds_ibdev->vector_load) {
+ printk(KERN_ERR "RDS/IB: failed to allocate vector memoru\n");
+ goto put_dev;
+ }
+
rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(rds_ibdev->mr)) {
rds_ibdev->mr = NULL;
rds_ib_recv_exit();
rds_trans_unregister(&rds_ib_transport);
rds_ib_fmr_exit();
+
+ if (ip_config)
+ kfree(ip_config);
}
struct rds_transport rds_ib_transport = {
rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
- ret = rds_ib_setup_ports();
+ ret = rds_ib_ip_config_init();
if (ret) {
printk(KERN_ERR "RDS/IB: failed to init port\n");
goto out_srq;
struct rds_ib_path i_cur_path;
unsigned int i_alt_path_index;
unsigned int i_active_side;
+
+ int i_scq_vector;
+ int i_rcq_vector;
};
/* This assumes that atomic_t is at least 32 bits */
};
#define RDS_IB_MAX_ALIASES 100
-#define RDS_IB_MAX_PORTS 10
struct rds_ib_port {
struct rds_ib_device *rds_ibdev;
struct net_device *dev;
struct rds_ib_srq *srq;
struct rds_ib_port *ports;
struct ib_event_handler event_handler;
+ int *vector_load;
};
#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
extern unsigned int rds_ib_haip_fallback;
extern unsigned int rds_ib_haip_failover_enabled;
extern unsigned int rds_ib_apm_timeout;
+extern unsigned int rds_ib_cq_balance_enabled;
extern spinlock_t ib_nodev_conns_lock;
extern struct list_head ib_nodev_conns;
}
}
+static int rds_ib_find_least_loaded_vector(struct rds_ib_device *rds_ibdev)
+{
+ int i;
+ int index = 0;
+ int min = rds_ibdev->vector_load[0];
+
+ if (!rds_ib_cq_balance_enabled)
+ return IB_CQ_VECTOR_LEAST_ATTACHED;
+
+ for (i = 1; i < rds_ibdev->dev->num_comp_vectors; i++) {
+ if (rds_ibdev->vector_load[i] < min) {
+ index = i;
+ min = rds_ibdev->vector_load[i];
+ }
+ }
+
+ rds_ibdev->vector_load[index]++;
+ return index;
+}
+
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
ic->i_pd = rds_ibdev->pd;
ic->i_mr = rds_ibdev->mr;
+ ic->i_scq_vector = rds_ib_find_least_loaded_vector(rds_ibdev);
ic->i_scq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
rds_ib_cq_event_handler, conn,
ic->i_send_ring.w_nr + 1,
- IB_CQ_VECTOR_LEAST_ATTACHED);
+ ic->i_scq_vector);
if (IS_ERR(ic->i_scq)) {
ret = PTR_ERR(ic->i_scq);
ic->i_scq = NULL;
rdsdebug("ib_create_cq send failed: %d\n", ret);
+ if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+ rds_ibdev->vector_load[ic->i_scq_vector]--;
goto out;
}
+ ic->i_rcq_vector = rds_ib_find_least_loaded_vector(rds_ibdev);
if (rds_ib_srq_enabled)
ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
rds_ib_cq_event_handler, conn,
rds_ib_srq_max_wr - 1,
- IB_CQ_VECTOR_LEAST_ATTACHED);
+ ic->i_rcq_vector);
else
ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
rds_ib_cq_event_handler, conn,
ic->i_recv_ring.w_nr,
- IB_CQ_VECTOR_LEAST_ATTACHED);
+ ic->i_rcq_vector);
if (IS_ERR(ic->i_rcq)) {
ret = PTR_ERR(ic->i_rcq);
ic->i_rcq = NULL;
rdsdebug("ib_create_cq recv failed: %d\n", ret);
+ if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+ rds_ibdev->vector_load[ic->i_rcq_vector]--;
goto out;
}
/* first destroy the ib state that generates callbacks */
if (ic->i_cm_id->qp)
rdma_destroy_qp(ic->i_cm_id);
- if (ic->i_rcq)
+
+ if (ic->i_rcq) {
+ if (ic->rds_ibdev &&
+ ic->i_rcq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+ ic->rds_ibdev->vector_load[ic->i_rcq_vector]--;
ib_destroy_cq(ic->i_rcq);
- if (ic->i_scq)
+ }
+
+ if (ic->i_scq) {
+ if (ic->rds_ibdev &&
+ ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED)
+ ic->rds_ibdev->vector_load[ic->i_scq_vector]--;
ib_destroy_cq(ic->i_scq);
+ }
/* then free the resources that ib callbacks use */
if (ic->i_send_hdrs)