From b0bac387097ea42fd1cc06d96836a07d3499a018 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Thu, 11 May 2017 14:41:21 -0700 Subject: [PATCH] net/mlx4_core: Use round robin scheme to avoid stale caches MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The mlx4 driver in uek4 has a bug where frequent re-use of CQs, MPTs, or SRQs leads to memory corruption and subsequent crash of lwipc. The issue has not been root-caused, but by partly reverting the upstream commit 7c6d74d23a33 ("mlx4_core: Roll back round robin bitmap allocation commit for CQs, SRQs, and MPTs") by re-introducing round-robin (RR) allocation of said structures, we have a mitigation, and the bug does not reproduce. The root-cause of bug 25730857 is tracked by bug 26266051. The commit message of the upstream commit states a performance concern related to the use of RR. Simple testing using this commit reveals up to 20% performance regression running simple OF-UV tests in loop, but these tests are not deemed close to any real use-cases. The same RR is in uek2 and performance issues are not reported related to the concern. The plan is therefore to merge this commit, to buy some time to root-cause the issue. When the issue is root-caused, this commit should be reverted. Orabug: 25730857 Signed-off-by: Santosh Shilimkar Signed-off-by: HÃ¥kon Bugge Reviewed-by: Ajaykumar Hotchandani --- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mr.c | 2 +- drivers/net/ethernet/mellanox/mlx4/srq.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index e71f31387ac6c..c86142e763ac0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -231,7 +231,7 @@ err_put: mlx4_table_put(dev, &cq_table->table, *cqn); err_out: - mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_NO_RR); + mlx4_bitmap_free(&cq_table->bitmap, *cqn, MLX4_USE_RR); return err; } @@ -261,7 +261,7 @@ void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) mlx4_table_put(dev, &cq_table->cmpt_table, cqn); mlx4_table_put(dev, &cq_table->table, cqn); - mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_NO_RR); + mlx4_bitmap_free(&cq_table->bitmap, cqn, MLX4_USE_RR); } static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 271c158b70203..f17f51e288690 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -479,7 +479,7 @@ void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); - mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR); + mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_USE_RR); } static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 67146624eb58b..1e94949460d88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -116,7 +116,7 @@ err_put: mlx4_table_put(dev, &srq_table->table, *srqn); err_out: - mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_NO_RR); + mlx4_bitmap_free(&srq_table->bitmap, *srqn, MLX4_USE_RR); return err; } @@ -144,7 +144,7 @@ void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) mlx4_table_put(dev, &srq_table->cmpt_table, srqn); mlx4_table_put(dev, &srq_table->table, srqn); - mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_NO_RR); + mlx4_bitmap_free(&srq_table->bitmap, srqn, MLX4_USE_RR); } static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn) -- 2.50.1