From 3b203bd4ede677a52318b8c68dc55f476a5d742e Mon Sep 17 00:00:00 2001 From: Shamir Rabinovitch Date: Tue, 14 Mar 2017 09:35:27 -0700 Subject: [PATCH] sparc64/mlx4_core: relaxed order for mlx4_core dma mappings MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit SPARC PCI require relaxed order PCI transactions from the HCA to the host memory to enable maximum performance. CX3 is not capable of setting the relaxed order for the PCI transactions. Only mlx4_core driver can set relaxed order on the right DMA mappings. Fix it! This is temporary fix only for UEK4. Upstream changed the way the IB subsystem work with DMA and upstreaming of this fix requires first to fix net-next tree panic when rdma service is started (Orabug 25760394) and only then upstreaming the fix for the SPARC/CX3 performance issue. This cannot be done in short time and require back-port of lots of upstream code introduced with upstream commit: "Merge tag 'for-next-dma_ops' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma". The current decision is to limit the change to mlx4 drivers and the ULPs that use it. This patch only override the mlx4 DMA functions as opposed for doing this from ib_verbs DMA functions. This patch is already used in UEK2/SPARC and was proven as working under all the product testing and so it is ported as-is. Orabug: 25723815 Cc: Yuval Shaia Cc: Govinda Tatti Cc: chris hyser Reviewed-by: HÃ¥kon Bugge Signed-off-by: tndave Signed-off-by: Shamir Rabinovitch Signed-off-by: Allen Pais --- drivers/infiniband/core/umem.c | 11 ++ drivers/infiniband/hw/mlx4/Makefile | 1 + drivers/infiniband/hw/mlx4/dma.c | 206 ++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/dma.h | 6 + drivers/infiniband/hw/mlx4/main.c | 3 + include/rdma/ib_verbs.h | 65 ++++++--- 6 files changed, 273 insertions(+), 19 deletions(-) create mode 100644 drivers/infiniband/hw/mlx4/dma.c create mode 100644 drivers/infiniband/hw/mlx4/dma.h diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 062a51e5094f..522b2e99a021 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,10 +39,19 @@ #include #include #include +#include #include #include "uverbs.h" +#if defined(__sparc__) && defined(__arch64__) +/* weak order is enabled by default for sparc 64 platforms */ +static bool allow_weak_ordering = true; +#else +static bool allow_weak_ordering; +#endif +module_param(allow_weak_ordering, bool, 0444); +MODULE_PARM_DESC(allow_weak_ordering, "Allow weak ordering for data registered memory"); static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { @@ -86,6 +95,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, DEFINE_DMA_ATTRS(attrs); if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + else if (allow_weak_ordering) + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); return ib_umem_get_attrs(context, addr, size, access, DMA_BIDIRECTIONAL, &attrs); } EXPORT_SYMBOL(ib_umem_get); diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile index 08def0eb3b59..2843746f6bc9 100644 --- a/drivers/infiniband/hw/mlx4/Makefile +++ b/drivers/infiniband/hw/mlx4/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o mlx4_ib-y += wc.o +mlx4_ib-y += dma.o diff --git a/drivers/infiniband/hw/mlx4/dma.c b/drivers/infiniband/hw/mlx4/dma.c new file mode 100644 index 000000000000..2c7336753274 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/dma.c @@ -0,0 +1,206 @@ +#include +#include + +#if defined(__sparc__) && defined(__arch64__) +/* + * sparc platform dma mapping for mlx4 + * + * sparc platform require weak order dma mapping as default mapping type. + * only cq must have strict memory dma mapping. most of the ulps just call + * ib_dma_map_single/sg w/o the needed DMA_ATTR_WEAK_ORDERING attribute. + * as result the ib performance on sparc platforms is very poor. using the + * dma mapping callbacks in ib_dma_xxx functions can solve this issue w/o + * the need to modify all the ulps. + * + * we pick the right dma api by the below order: + * 1. include/asm-generic/dma-mapping-common.h + * 2. include/linux/dma-mapping.h + * + * NOTE! - call to ib_dma_xxx api will cause endless recursion! + */ + +static int +sparc_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_mapping_error(dev->dma_device, dma_addr); +} + +static u64 +sparc_dma_map_single(struct ib_device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + return dma_map_single_attrs(dev->dma_device, ptr, size, direction, + &attrs); +} + +static void +sparc_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + return dma_unmap_single_attrs(dev->dma_device, addr, size, direction, + &attrs); +} + +static u64 +sparc_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + const struct dma_map_ops *ops = get_dma_ops(dev->dma_device); + dma_addr_t addr; + DEFINE_DMA_ATTRS(attrs); + + kmemcheck_mark_initialized(page_address(page) + offset, size); + BUG_ON(!valid_dma_direction(direction)); + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + addr = ops->map_page(dev->dma_device, page, offset, size, direction, + &attrs); + + debug_dma_map_page(dev->dma_device, page, offset, size, direction, + addr, false); + + return addr; +} + +static void +sparc_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + const struct dma_map_ops *ops = get_dma_ops(dev->dma_device); + DEFINE_DMA_ATTRS(attrs); + + BUG_ON(!valid_dma_direction(direction)); + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + if (ops->unmap_page) + ops->unmap_page(dev->dma_device, addr, size, direction, + &attrs); + + debug_dma_unmap_page(dev->dma_device, addr, size, direction, false); +} + +static int +sparc_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, &attrs); +} + +static void +sparc_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, &attrs); +} + +static u64 +sparc_dma_map_single_attrs(struct ib_device *dev, void *ptr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_set_attr(DMA_ATTR_WEAK_ORDERING, attrs); + return dma_map_single_attrs(dev->dma_device, ptr, size, direction, + attrs); +} + +static void +sparc_dma_unmap_single_attrs(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_set_attr(DMA_ATTR_WEAK_ORDERING, attrs); + dma_unmap_single_attrs(dev->dma_device, addr, size, direction, attrs); +} + +static int +sparc_dma_map_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction, struct dma_attrs *attrs) +{ + dma_set_attr(DMA_ATTR_WEAK_ORDERING, attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} + +static void +sparc_dma_unmap_sg_attrs(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_set_attr(DMA_ATTR_WEAK_ORDERING, attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} + +static void +sparc_dma_sync_single_for_cpu(struct ib_device *dev, u64 dma_handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, dma_handle, size, dir); +} + +static void +sparc_dma_sync_single_for_device(struct ib_device *dev, u64 dma_handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, dma_handle, size, dir); +} + +static void * +sparc_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + dma_addr_t handle; + void *ret; + + ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); + *dma_handle = handle; + + return ret; +} + +static void +sparc_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, + u64 dma_handle) +{ + dma_free_coherent(dev->dma_device, size, + cpu_addr, (dma_addr_t) dma_handle); +} + +static struct ib_dma_mapping_ops sparc_dma_mapping_ops = { + .mapping_error = sparc_dma_mapping_error, + .map_single = sparc_dma_map_single, + .unmap_single = sparc_dma_unmap_single, + .map_page = sparc_dma_map_page, + .unmap_page = sparc_dma_unmap_page, + .map_sg = sparc_dma_map_sg, + .unmap_sg = sparc_dma_unmap_sg, + .map_single_attrs = sparc_dma_map_single_attrs, + .unmap_single_attrs = sparc_dma_unmap_single_attrs, + .map_sg_attrs = sparc_dma_map_sg_attrs, + .unmap_sg_attrs = sparc_dma_unmap_sg_attrs, + .sync_single_for_cpu = sparc_dma_sync_single_for_cpu, + .sync_single_for_device = sparc_dma_sync_single_for_device, + .alloc_coherent = sparc_dma_alloc_coherent, + .free_coherent = sparc_dma_free_coherent, + +}; +#endif /* if defined(__sparc__) && defined(__arch64__) */ + +void +mlx4_register_dma_ops(struct ib_device *ib_dev) +{ +#if defined(__sparc__) && defined(__arch64__) + ib_dev->dma_ops = &sparc_dma_mapping_ops; +#else + ib_dev->dma_ops = NULL; +#endif +} diff --git a/drivers/infiniband/hw/mlx4/dma.h b/drivers/infiniband/hw/mlx4/dma.h new file mode 100644 index 000000000000..b534375fe5d8 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/dma.h @@ -0,0 +1,6 @@ +#ifndef __MLX4_DMA_H__ +#define __MLX4_DMA_H__ + +void mlx4_register_dma_ops(struct ib_device *ib_dev); + +#endif diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4e1c1522aeb2..0aefa10afd30 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -53,6 +53,7 @@ #include "mlx4_ib.h" #include "user.h" #include "wc.h" +#include "dma.h" #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "2.2-1" @@ -2368,6 +2369,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) return NULL; } + mlx4_register_dma_ops(&ibdev->ib_dev); + iboe = &ibdev->iboe; if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index f526b363a32e..d5e998e10f9d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1494,6 +1494,22 @@ struct ib_dma_mapping_ops { void (*free_coherent)(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle); + u64 (*map_single_attrs)(struct ib_device *dev, + void *ptr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs); + void (*unmap_single_attrs)(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs); + int (*map_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs); + void (*unmap_sg_attrs)(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs); }; struct iw_cm_verbs; @@ -2225,9 +2241,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, enum dma_data_direction direction) { if (dev->dma_ops) - dev->dma_ops->unmap_single(dev, addr, size, direction); - else - dma_unmap_single(dev->dma_device, addr, size, direction); + return dev->dma_ops->unmap_single(dev, addr, size, direction); + dma_unmap_single(dev->dma_device, addr, size, direction); } static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, @@ -2235,6 +2250,9 @@ static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, enum dma_data_direction direction, struct dma_attrs *attrs) { + if (dev->dma_ops) + return dev->dma_ops->map_single_attrs(dev, cpu_addr, size, + direction, attrs); return dma_map_single_attrs(dev->dma_device, cpu_addr, size, direction, attrs); } @@ -2244,8 +2262,11 @@ static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, enum dma_data_direction direction, struct dma_attrs *attrs) { - return dma_unmap_single_attrs(dev->dma_device, addr, size, - direction, attrs); + if (dev->dma_ops) + return dev->dma_ops->unmap_single_attrs(dev, addr, size, + direction, attrs); + dma_unmap_single_attrs(dev->dma_device, addr, size, + direction, attrs); } /** @@ -2279,9 +2300,8 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, enum dma_data_direction direction) { if (dev->dma_ops) - dev->dma_ops->unmap_page(dev, addr, size, direction); - else - dma_unmap_page(dev->dma_device, addr, size, direction); + return dev->dma_ops->unmap_page(dev, addr, size, direction); + dma_unmap_page(dev->dma_device, addr, size, direction); } /** @@ -2322,7 +2342,11 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, struct dma_attrs *attrs) { - return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); + if (dev->dma_ops) + return dev->dma_ops->map_sg_attrs(dev, sg, nents, + direction, attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, + direction, attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@ -2330,7 +2354,11 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, struct dma_attrs *attrs) { - dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); + if (dev->dma_ops) + return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction, + attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, + attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry @@ -2373,9 +2401,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, enum dma_data_direction dir) { if (dev->dma_ops) - dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); - else - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); + return dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -2391,9 +2418,9 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, enum dma_data_direction dir) { if (dev->dma_ops) - dev->dma_ops->sync_single_for_device(dev, addr, size, dir); - else - dma_sync_single_for_device(dev->dma_device, addr, size, dir); + return dev->dma_ops->sync_single_for_device(dev, addr, size, + dir); + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** @@ -2432,9 +2459,9 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, u64 dma_handle) { if (dev->dma_ops) - dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); - else - dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); + return dev->dma_ops->free_coherent(dev, size, cpu_addr, + dma_handle); + dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** -- 2.50.1