In RQs of type multi-packet WQE (Striding RQ), each WQE is relatively
large (typically 256KB) but their number is relatively small (8 in
default).
Re-mapping the descriptors' buffers before re-posting them is done via
UMR (User-Mode Memory Registration) operations.
On the one hand, posting UMR WQEs in bulks reduces communication overhead
with the HW and better utilizes its processing units.
On the other hand, delaying the WQE repost operations for a small RQ
(say, of 4 WQEs) might drastically hit its performance, causing packet
drops due to no receive buffer, for high or bursty incoming packets rate.
Here we restrict the bulk size for too small RQs. Effectively, with the current
constants, RQ of size 4 (minimum allowed) would have no bulking, while larger
RQs will continue working with bulks of 2.
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
                        u8                     umr_in_progress;
                        u8                     umr_last_bulk;
                        u8                     umr_completed;
+                       u8                     min_wqe_bulk;
                        struct mlx5e_shampo_hd *shampo;
                } mpwqe;
        };
 
                mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
 }
 
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+       return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
                          struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk)
 
 u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params,
                                   struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
                          struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk);
 
                rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
                rq->mpwqe.num_strides =
                        BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
+               rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
 
                rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
 
 
        if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk))
                rq->stats->congst_umr++;
 
-#define UMR_WQE_BULK (2)
-       if (likely(missing < UMR_WQE_BULK))
+       if (likely(missing < rq->mpwqe.min_wqe_bulk))
                return false;
 
        if (rq->page_pool)