From 492ae2eaf3e7e073a0845aa4ee929a65d4cfb345 Mon Sep 17 00:00:00 2001 From: Knut Omang Date: Tue, 9 Aug 2016 16:10:39 +0200 Subject: [PATCH] sif: base: Scale default desc.array size values based on #of available CBs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit With default values for #of QPs and MRs set high by default, 33 instances of the driver would consume a lot of memory just to initialize basic tables since each of these instances have their own 1M QP space and in effect allocates the same amount of resources that a bare metal, single instance driver would do. The number of collect buffers assigned to the PCIe function tells us what fraction of the hardware resources we got, and a small fraction of the 16K CB space indicates that the function competes with other functions on resources, and that it is unlikely that the same huge number of QPs etc can be deployed with high performance anyway. This commit introduces tracking of module parameter settings compared to default values, and if compiled in defaults are used, we scale down the number of QPs etc with a factor corresponding to the fraction of CBs we got. This yields eg. 32K QPs per function in a 32 VF enabled system and significantly reduces system wide memory usage in a virtualized environment (whether Xen based or not) Users can still override settings using the module parameters, which will not be subject to scaling if they deviate from the compiled in defaults. Orabug: 24424521 Signed-off-by: Knut Omang Reviewed-by: HÃ¥kon Bugge --- drivers/infiniband/hw/sif/sif_base.c | 36 +++++++++++++++++++------- drivers/infiniband/hw/sif/sif_dev.h | 5 ++-- drivers/infiniband/hw/sif/sif_epsc.c | 2 ++ drivers/infiniband/hw/sif/sif_pd.c | 38 ++++++++++++++++------------ drivers/infiniband/hw/sif/sif_pd.h | 3 +++ 5 files changed, 57 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/hw/sif/sif_base.c b/drivers/infiniband/hw/sif/sif_base.c index 5b8cf92d431a..bb25e5472e20 100644 --- a/drivers/infiniband/hw/sif/sif_base.c +++ b/drivers/infiniband/hw/sif/sif_base.c @@ -47,7 +47,8 @@ static uint dummy_lat_cb_size = 1; * They are all read only after driver load */ -#define add_qsz_parameter(type, hwtype, initsize) \ +#define add_qsz_parameter(type, initsize) \ +static const uint sif_##type##_default_size = initsize;\ uint sif_##type##_size = initsize;\ module_param_named(type##_size, sif_##type##_size, uint, S_IRUGO);\ MODULE_PARM_DESC(type##_size, "Size of the " #type " descriptor table") @@ -57,25 +58,32 @@ MODULE_PARM_DESC(type##_size, "Size of the " #type " descriptor table") * e.g. for instance qp_size=2048 or ah_size=100 * (all sizes will be rounded up to a power of two value) */ -add_qsz_parameter(mr, key, 4194304); -add_qsz_parameter(epsc, epsc_csr_req, 2048); -add_qsz_parameter(qp, qp, 1048576); -add_qsz_parameter(rq, rq_hw, 1048576); -add_qsz_parameter(cq, cq_hw, 524288); -add_qsz_parameter(ah, ah, 262144); -add_qsz_parameter(sq_ring, sq_ring, 262144); -add_qsz_parameter(sq_tvl, sq_tvl, 128); +add_qsz_parameter(mr, 4194304); +add_qsz_parameter(epsc, 2048); +add_qsz_parameter(qp, 1048576); +add_qsz_parameter(rq, 1048576); +add_qsz_parameter(cq, 524288); +add_qsz_parameter(ah, 262144); +add_qsz_parameter(sq_ring, 262144); +add_qsz_parameter(sq_tvl, 128); /* These sizes must be equal to QP size */ #define sif_sq_rspq_size sif_qp_size +#define sif_sq_rspq_default_size sif_qp_default_size #define sif_rqsp_size sif_qp_size +#define sif_rqsp_default_size sif_qp_default_size #define sif_atsp_size sif_qp_size +#define sif_atsp_default_size sif_qp_default_size /* These can be set from the command line - no parameter needed */ static uint sif_epsa0_size = 64; +static const uint sif_epsa0_default_size = 64; static uint sif_epsa1_size = 64; +static const uint sif_epsa1_default_size = 64; static uint sif_epsa2_size = 64; +static const uint sif_epsa2_default_size = 64; static uint sif_epsa3_size = 64; +static const uint sif_epsa3_default_size = 64; /* This defines how small the smallest (sw) pointers can get. * If set to <= 8, 512 sw descriptors will fit in one page. @@ -115,6 +123,7 @@ struct sif_table_layout { const char *name; /* Corresponding to enum name */ const char *desc; /* Textual table desc (for logging) */ uint *e_cnt_ref; /* Driver parameter ref for no.of entries to allocate */ + const uint *e_def_cnt; /* Driver parameter ref for default no.of entries to allocate */ u32 entry_sz; /* Real size of entries in this table */ u32 ext; /* Actual extent of (stride between) entries in this table */ sif_dfs_printer dfs_printer; /* entry printing in debugfs */ @@ -141,6 +150,7 @@ struct sif_table_layout { .name = #type,\ .desc = _desc,\ .e_cnt_ref = &sif_##ec##_size,\ + .e_def_cnt = &sif_##ec##_default_size, \ .entry_sz = sizeof(struct _e_type),\ .ext = roundup_pow_of_two(sizeof(struct _e_type)),\ .dfs_printer = _dfs_printer,\ @@ -179,6 +189,7 @@ struct sif_table_layout { .name = #type "_csr_req", \ .desc = "EPS" #_suff " Request queue", \ .e_cnt_ref = &sif_##type##_size, \ + .e_def_cnt = &sif_##type##_default_size, \ .entry_sz = sizeof(struct psif_epsc_csr_req),\ .ext = roundup_pow_of_two(sizeof(struct psif_epsc_csr_req)), \ .dfs_printer = sif_dfs_print_##type, \ @@ -193,6 +204,7 @@ struct sif_table_layout { .name = #type "_csr_rsp", \ .desc = "EPS" #_suff " Response queue", \ .e_cnt_ref = &sif_##type##_size, \ + .e_def_cnt = &sif_##type##_default_size, \ .entry_sz = sizeof(struct psif_epsc_csr_rsp),\ .ext = roundup_pow_of_two(sizeof(struct psif_epsc_csr_rsp)), \ .dfs_printer = NULL, \ @@ -231,6 +243,7 @@ static struct sif_table_layout base_layout[] = { .name = "sq_cmpl", .desc = "cq: SQ addr.map", .e_cnt_ref = &sif_qp_size, + .e_def_cnt = &sif_qp_default_size, \ .entry_sz = 0, /* Calculated later */ .ext = 0, /* Calculated later */ .dfs_printer = sif_dfs_print_sq_cmpl, @@ -247,6 +260,7 @@ static struct sif_table_layout base_layout[] = { .name = "bw_cb", .desc = "High bandwith collect buffer", .e_cnt_ref = &dummy_bw_cb_size, + .e_def_cnt = &dummy_bw_cb_size, .entry_sz = sizeof(struct psif_cb), .ext = 4096, .dfs_printer = NULL, @@ -260,6 +274,7 @@ static struct sif_table_layout base_layout[] = { .name = "lat_cb", .desc = "Low latency collect buffer", .e_cnt_ref = &dummy_lat_cb_size, + .e_def_cnt = &dummy_lat_cb_size, .entry_sz = sizeof(struct psif_cb), .ext = 4096, .dfs_printer = NULL, @@ -494,6 +509,9 @@ int sif_table_init(struct sif_dev *sdev, enum sif_tab_type type) tp->type = type; tp->sdev = sdev; cfg_sz = (u32)(*base_layout[type].e_cnt_ref); + if (type < bw_cb && sdev->res_frac > 1 && cfg_sz == (u32)(*base_layout[type].e_def_cnt)) + cfg_sz = cfg_sz / sdev->res_frac; + if (cfg_sz & 0x80000000 || cfg_sz == 0) { sif_log(sdev, SIF_INFO, "%s(%u): table size %#x out of bounds", base_layout[type].desc, type, cfg_sz); diff --git a/drivers/infiniband/hw/sif/sif_dev.h b/drivers/infiniband/hw/sif/sif_dev.h index 1d349599cf88..fb05b0222eff 100644 --- a/drivers/infiniband/hw/sif/sif_dev.h +++ b/drivers/infiniband/hw/sif/sif_dev.h @@ -264,8 +264,9 @@ struct sif_dev { /* Interrupt allocation */ size_t intr_req; /* Number of irqs requested */ size_t intr_cnt; /* Number of irqs allocated */ - size_t bw_cb_cnt; /* No.of virtual collect buffers available */ - size_t lat_cb_cnt; /* No.of virtual collect buffers available */ + size_t bw_cb_cnt; /* No.of bandwidth optimized virtual collect buffers available */ + size_t lat_cb_cnt; /* No.of latency optimized virtual collect buffers available */ + size_t res_frac; /* Fraction of the available hardware resources allocated to this UF */ size_t msix_entries_sz; /* Size of the allocated msix_entries array */ spinlock_t msix_lock; /* Protects intr_used */ struct msix_entry *msix_entries; /* MSI-X vector info */ diff --git a/drivers/infiniband/hw/sif/sif_epsc.c b/drivers/infiniband/hw/sif/sif_epsc.c index c7c95f29b39d..541eb1faf9a5 100644 --- a/drivers/infiniband/hw/sif/sif_epsc.c +++ b/drivers/infiniband/hw/sif/sif_epsc.c @@ -972,6 +972,8 @@ proto_probing_done: if (ret) goto epsc_failed; + sif_cb_init(sdev); + #if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && defined(__sparc__) /* The kernel is currently using iommu bypass mode in the sparc iommu, and * the PSIF MMU requires a fixed configuration of the upper 12 bits of the diff --git a/drivers/infiniband/hw/sif/sif_pd.c b/drivers/infiniband/hw/sif/sif_pd.c index b947efec5837..7fa0db1078f0 100644 --- a/drivers/infiniband/hw/sif/sif_pd.c +++ b/drivers/infiniband/hw/sif/sif_pd.c @@ -184,12 +184,16 @@ int sif_remove_shpd(struct ib_device *ibdev, /* Obtain information about lat_cb and bw_cb resources * We cannot use the ba structs yet as they are not initialized at this point: */ -static void sif_cb_init(struct sif_dev *sdev) +void sif_cb_init(struct sif_dev *sdev) { struct psif_epsc_csr_req req; struct psif_epsc_csr_rsp rsp; struct sif_eps *es = &sdev->es[sdev->mbox_epsc]; + /* To work with very old fw assume at least 4 lat_cb and some bw_cbs */ + sdev->lat_cb_cnt = 4; + sdev->bw_cb_cnt = 128; + /* EPSC supports the new requests starting from v.0.36 */ if (eps_version_ge(es, 0, 37)) { int ret = 0; @@ -198,16 +202,25 @@ static void sif_cb_init(struct sif_dev *sdev) req.opcode = EPSC_QUERY; req.u.query.data.op = EPSC_QUERY_CAP_VCB_LO; req.u.query.info.op = EPSC_QUERY_CAP_VCB_HI; - ret = sif_epsc_wr(sdev, &req, &rsp); - if (ret) + ret = sif_epsc_wr_poll(sdev, &req, &rsp); + if (ret) { sif_log(sdev, SIF_INFO, "Request for VCB info failed with %d", ret); - else { + } else { sdev->bw_cb_cnt = rsp.data; sdev->lat_cb_cnt = rsp.info; sif_log(sdev, SIF_INIT, "Got %ld bw_cbs and %ld lat_cbs", sdev->bw_cb_cnt, sdev->lat_cb_cnt); } } + + /* estimate what fraction of the hardware resources we got based on the + * number of collect buffers reserved and use this to scale down + * configured base address ranges: + */ + sdev->res_frac = max(1UL, CBU_NUM_VCB / (sdev->bw_cb_cnt + sdev->lat_cb_cnt)); + if (sdev->res_frac > 1) + sif_log(sdev, SIF_INFO, "Scaling down default queue sizes by a factor %ld", + sdev->res_frac); } @@ -221,20 +234,13 @@ void sif_cb_table_init(struct sif_dev *sdev, enum sif_tab_type type) /* Update table values with EPSC data: */ if (type == bw_cb) { - sif_cb_init(sdev); - if (sdev->bw_cb_cnt) { - tp->entry_cnt = sdev->bw_cb_cnt; - tp->table_sz = tp->ext_sz * tp->entry_cnt; - } + tp->entry_cnt = sdev->bw_cb_cnt; + tp->table_sz = tp->ext_sz * tp->entry_cnt; tp->sif_off = sdev->cb_base; } else { - /* lat_cb */ - if (sdev->lat_cb_cnt) { - tp->entry_cnt = sdev->lat_cb_cnt; - tp->table_sz = tp->ext_sz * tp->entry_cnt; - tp->sif_off = sdev->cb_base + sdev->ba[bw_cb].table_sz; - } else - tp->entry_cnt = 0; + tp->entry_cnt = sdev->lat_cb_cnt; + tp->table_sz = tp->ext_sz * tp->entry_cnt; + tp->sif_off = sdev->cb_base + sdev->ba[bw_cb].table_sz; } tp->mem = sif_mem_create_ref(sdev, SIFMT_NOMEM, tp->sif_base, diff --git a/drivers/infiniband/hw/sif/sif_pd.h b/drivers/infiniband/hw/sif/sif_pd.h index aa0277a80b12..222d969e8efe 100644 --- a/drivers/infiniband/hw/sif/sif_pd.h +++ b/drivers/infiniband/hw/sif/sif_pd.h @@ -42,6 +42,9 @@ struct sif_shpd { struct sif_pd *pd; }; +/* Initialize driver information about the number of CBs of each type available */ +void sif_cb_init(struct sif_dev *sdev); + /* Initialize/deinitialize the pd subsystem */ int sif_init_pd(struct sif_dev *sdev); void sif_deinit_pd(struct sif_dev *sdev); -- 2.50.1