From 42d2aef6520183937bdd9bebc9b725742b315bf8 Mon Sep 17 00:00:00 2001 From: Bang Nguyen Date: Fri, 30 Nov 2012 14:07:31 -0800 Subject: [PATCH] rds: CQ balance This patch provides load-balancing for RDS CQs across available interrupt vectors. Signed-off-by: Bang Nguyen --- net/rds/ib.c | 56 ++++++++++++++++++++++++++++++++++++------------- net/rds/ib.h | 6 +++++- net/rds/ib_cm.c | 46 +++++++++++++++++++++++++++++++++++----- 3 files changed, 87 insertions(+), 21 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index 205ea0f2e5aa..72a1ad75ec12 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -57,6 +57,7 @@ unsigned int rds_ib_haip_fallback = 1; unsigned int rds_ib_haip_hca_failover_enabled = 1; unsigned int rds_ib_apm_timeout = RDS_IB_DEFAULT_TIMEOUT; unsigned int rds_ib_rnr_retry_count = RDS_IB_DEFAULT_RNR_RETRY_COUNT; +unsigned int rds_ib_cq_balance_enabled = 1; module_param(rds_ib_fmr_1m_pool_size, int, 0444); MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1m fmr per HCA"); @@ -78,7 +79,8 @@ module_param(rds_ib_haip_fallback, int, 0444); MODULE_PARM_DESC(rds_ib_haip_fallback, " HAIP failback Enabled"); module_param(rds_ib_haip_hca_failover_enabled, int, 0444); MODULE_PARM_DESC(rds_ib_haip_hca_failover_enabled, " HAIP HCA failover Enabled"); - +module_param(rds_ib_cq_balance_enabled, int, 0444); +MODULE_PARM_DESC(rds_ib_cq_balance_enabled, " CQ load balance Enabled"); /* * we have a clumsy combination of RCU and a rwsem protecting this list @@ -99,6 +101,7 @@ struct socket *rds_ib_inet_socket; static struct rds_ib_port *ip_config; static u8 ip_port_cnt = 0; +static u8 ip_port_max; void rds_ib_nodev_connect(void) { @@ -146,6 +149,9 @@ static void rds_ib_dev_free(struct work_struct *work) kfree(i_ipaddr); } + if (rds_ibdev->vector_load) + kfree(rds_ibdev->vector_load); + kfree(rds_ibdev); } @@ -467,7 +473,7 @@ static int rds_ib_move_ip(char *from_dev, struct page *page; char from_dev2[2*IFNAMSIZ + 1]; char to_dev2[2*IFNAMSIZ + 1]; - int i, ret = 0; + int ret = 0; u8 active_port; struct in_device *in_dev; @@ -577,7 +583,12 @@ static void rds_ib_init_port(struct rds_ib_device *rds_ibdev, struct net_device *net_dev, u8 port_num) { - ip_port_cnt++; + if (ip_port_cnt++ > ip_port_max) { + printk(KERN_ERR "RDS/IB: Exceeded max ports (%d)\n", + ip_port_max); + return; + } + ip_config[ip_port_cnt].port_num = port_num; ip_config[ip_port_cnt].dev = net_dev; ip_config[ip_port_cnt].rds_ibdev = rds_ibdev; @@ -893,7 +904,7 @@ static void rds_ib_dump_ip_config(void) } } -static int rds_ib_setup_ports(void) +static int rds_ib_ip_config_init(void) { struct net_device *dev; struct in_ifaddr *ifa; @@ -904,7 +915,21 @@ static int rds_ib_setup_ports(void) int ret = 0; if (!rds_ib_haip_enabled) - return ret; + return 0; + + ip_port_max = 0; + rcu_read_lock(); + list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { + ip_port_max += rds_ibdev->dev->phys_port_cnt; + } + rcu_read_unlock(); + + ip_config = kzalloc(sizeof(struct rds_ib_port) * + (ip_port_max + 1), GFP_KERNEL); + if (!ip_config) { + printk(KERN_ERR "RDS/IB: failed to allocate IP config\n"); + return 1; + } read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { @@ -1008,15 +1033,6 @@ void rds_ib_add_one(struct ib_device *device) } if (rds_ib_haip_enabled) { - ip_config = kzalloc(sizeof(struct rds_ib_port) * - RDS_IB_MAX_PORTS + 1, GFP_KERNEL); - - if (!ip_config) { - printk(KERN_ERR - "RDS/IB: failed to allocate IP config\n"); - goto put_dev; - } - INIT_IB_EVENT_HANDLER(&rds_ibdev->event_handler, rds_ibdev->dev, rds_ib_event_handler); if (ib_register_event_handler(&rds_ibdev->event_handler)) { @@ -1026,6 +1042,13 @@ void rds_ib_add_one(struct ib_device *device) } } + rds_ibdev->vector_load = kzalloc(sizeof(int) * + device->num_comp_vectors, GFP_KERNEL); + if (!rds_ibdev->vector_load) { + printk(KERN_ERR "RDS/IB: failed to allocate vector memoru\n"); + goto put_dev; + } + rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rds_ibdev->mr)) { rds_ibdev->mr = NULL; @@ -1085,6 +1108,9 @@ void rds_ib_exit(void) rds_ib_recv_exit(); rds_trans_unregister(&rds_ib_transport); rds_ib_fmr_exit(); + + if (ip_config) + kfree(ip_config); } struct rds_transport rds_ib_transport = { @@ -1229,7 +1255,7 @@ int rds_ib_init(void) rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); - ret = rds_ib_setup_ports(); + ret = rds_ib_ip_config_init(); if (ret) { printk(KERN_ERR "RDS/IB: failed to init port\n"); goto out_srq; diff --git a/net/rds/ib.h b/net/rds/ib.h index 58e9a285c3a5..3c6675c6663c 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -225,6 +225,9 @@ struct rds_ib_connection { struct rds_ib_path i_cur_path; unsigned int i_alt_path_index; unsigned int i_active_side; + + int i_scq_vector; + int i_rcq_vector; }; /* This assumes that atomic_t is at least 32 bits */ @@ -267,7 +270,6 @@ enum { }; #define RDS_IB_MAX_ALIASES 100 -#define RDS_IB_MAX_PORTS 10 struct rds_ib_port { struct rds_ib_device *rds_ibdev; struct net_device *dev; @@ -316,6 +318,7 @@ struct rds_ib_device { struct rds_ib_srq *srq; struct rds_ib_port *ports; struct ib_event_handler event_handler; + int *vector_load; }; #define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus) @@ -427,6 +430,7 @@ extern unsigned int rds_ib_haip_enabled; extern unsigned int rds_ib_haip_fallback; extern unsigned int rds_ib_haip_failover_enabled; extern unsigned int rds_ib_apm_timeout; +extern unsigned int rds_ib_cq_balance_enabled; extern spinlock_t ib_nodev_conns_lock; extern struct list_head ib_nodev_conns; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 82abce6a81c5..69773f894ce8 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -457,6 +457,26 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) } } +static int rds_ib_find_least_loaded_vector(struct rds_ib_device *rds_ibdev) +{ + int i; + int index = 0; + int min = rds_ibdev->vector_load[0]; + + if (!rds_ib_cq_balance_enabled) + return IB_CQ_VECTOR_LEAST_ATTACHED; + + for (i = 1; i < rds_ibdev->dev->num_comp_vectors; i++) { + if (rds_ibdev->vector_load[i] < min) { + index = i; + min = rds_ibdev->vector_load[i]; + } + } + + rds_ibdev->vector_load[index]++; + return index; +} + /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -489,31 +509,37 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_pd = rds_ibdev->pd; ic->i_mr = rds_ibdev->mr; + ic->i_scq_vector = rds_ib_find_least_loaded_vector(rds_ibdev); ic->i_scq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, rds_ib_cq_event_handler, conn, ic->i_send_ring.w_nr + 1, - IB_CQ_VECTOR_LEAST_ATTACHED); + ic->i_scq_vector); if (IS_ERR(ic->i_scq)) { ret = PTR_ERR(ic->i_scq); ic->i_scq = NULL; rdsdebug("ib_create_cq send failed: %d\n", ret); + if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED) + rds_ibdev->vector_load[ic->i_scq_vector]--; goto out; } + ic->i_rcq_vector = rds_ib_find_least_loaded_vector(rds_ibdev); if (rds_ib_srq_enabled) ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, rds_ib_cq_event_handler, conn, rds_ib_srq_max_wr - 1, - IB_CQ_VECTOR_LEAST_ATTACHED); + ic->i_rcq_vector); else ic->i_rcq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, rds_ib_cq_event_handler, conn, ic->i_recv_ring.w_nr, - IB_CQ_VECTOR_LEAST_ATTACHED); + ic->i_rcq_vector); if (IS_ERR(ic->i_rcq)) { ret = PTR_ERR(ic->i_rcq); ic->i_rcq = NULL; rdsdebug("ib_create_cq recv failed: %d\n", ret); + if (ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED) + rds_ibdev->vector_load[ic->i_rcq_vector]--; goto out; } @@ -1039,10 +1065,20 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* first destroy the ib state that generates callbacks */ if (ic->i_cm_id->qp) rdma_destroy_qp(ic->i_cm_id); - if (ic->i_rcq) + + if (ic->i_rcq) { + if (ic->rds_ibdev && + ic->i_rcq_vector != IB_CQ_VECTOR_LEAST_ATTACHED) + ic->rds_ibdev->vector_load[ic->i_rcq_vector]--; ib_destroy_cq(ic->i_rcq); - if (ic->i_scq) + } + + if (ic->i_scq) { + if (ic->rds_ibdev && + ic->i_scq_vector != IB_CQ_VECTOR_LEAST_ATTACHED) + ic->rds_ibdev->vector_load[ic->i_scq_vector]--; ib_destroy_cq(ic->i_scq); + } /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) -- 2.50.1