]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
net/mlx5e: SHAMPO, Use KSMs instead of KLMs
authorYoray Zack <yorayz@nvidia.com>
Mon, 3 Jun 2024 21:22:17 +0000 (00:22 +0300)
committerJakub Kicinski <kuba@kernel.org>
Thu, 6 Jun 2024 03:20:46 +0000 (20:20 -0700)
KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact,
it is a faster mechanism than KLM.

SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer.
As it used KLMs with the same buffer size for each entry,
we can use KSMs instead.

This commit changes the Mkeys that map the SHAMPO headers buffer
from KLMs to KSMs.

Signed-off-by: Yoray Zack <yorayz@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
include/linux/mlx5/device.h

index ff326601d4a4b2f0bd488911f47a9f14218abc22..bec784d25d7bea0453b5a681f5ea712383d97bc8 100644 (file)
@@ -80,6 +80,7 @@ struct page_pool;
                                 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define MLX5E_RX_MAX_HEAD (256)
+#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8)
 #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
 #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
 #define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
@@ -146,25 +147,6 @@ struct page_pool;
 #define MLX5E_TX_XSK_POLL_BUDGET       64
 #define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
 
-#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
-       (sizeof(struct mlx5e_umr_wqe) +\
-       (sizeof(struct mlx5_klm) * (sgl_len)))
-
-#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
-       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
-
-#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
-       (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
-
-#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
-       (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
-
-#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
-       ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)
-
-#define MLX5E_MAX_KLM_PER_WQE(mdev) \
-       MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
-
 #define mlx5e_state_dereference(priv, p) \
        rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
 
index ec819dfc98be211b0feecaf04132c46c5604f92d..6c9ccccca81e27dc51829ed05d1ca3259a9baa92 100644 (file)
@@ -1071,18 +1071,18 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
                                 struct mlx5e_params *params,
                                 struct mlx5e_rq_param *rq_param)
 {
-       int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
+       int max_num_of_umr_per_wqe, max_hd_per_wqe, max_ksm_per_umr, rest;
        void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
        int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
        u32 wqebbs;
 
-       max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
+       max_ksm_per_umr = MLX5E_MAX_KSM_PER_WQE(mdev);
        max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
-       max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
-       rest = max_hd_per_wqe % max_klm_per_umr;
-       wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
+       max_num_of_umr_per_wqe = max_hd_per_wqe / max_ksm_per_umr;
+       rest = max_hd_per_wqe % max_ksm_per_umr;
+       wqebbs = MLX5E_KSM_UMR_WQEBBS(max_ksm_per_umr) * max_num_of_umr_per_wqe;
        if (rest)
-               wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
+               wqebbs += MLX5E_KSM_UMR_WQEBBS(rest);
        wqebbs *= wq_size;
        return wqebbs;
 }
index 879d698b6119303836915d5dc19f6f910d506890..d1f0f868d494e7bbc201087febbf7760d77588b6 100644 (file)
 
 #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
 
+#define MLX5E_KSM_UMR_WQE_SZ(sgl_len)\
+       (sizeof(struct mlx5e_umr_wqe) +\
+       (sizeof(struct mlx5_ksm) * (sgl_len)))
+
+#define MLX5E_KSM_UMR_WQEBBS(ksm_entries) \
+       (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KSM_UMR_DS_CNT(ksm_entries)\
+       (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size)\
+       (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_ksm))
+
+#define MLX5E_KSM_ENTRIES_PER_WQE(wqe_size)\
+       ALIGN_DOWN(MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT)
+
+#define MLX5E_MAX_KSM_PER_WQE(mdev) \
+       MLX5E_KSM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
+
 static inline
 ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
 {
index d21a87ddc934b330772987e435fe8898e34fd5da..2a3e0de51f0e4e9aa31de779290616105e04025a 100644 (file)
@@ -504,8 +504,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
        return err;
 }
 
-static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
-                                    u64 nentries,
+static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev,
+                                    u64 nentries, u8 log_entry_size,
                                     u32 *umr_mkey)
 {
        int inlen;
@@ -525,12 +525,13 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
        MLX5_SET(mkc, mkc, umr_en, 1);
        MLX5_SET(mkc, mkc, lw, 1);
        MLX5_SET(mkc, mkc, lr, 1);
-       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+       MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM);
        mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
        MLX5_SET(mkc, mkc, qpn, 0xffffff);
        MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
        MLX5_SET(mkc, mkc, translations_octword_size, nentries);
-       MLX5_SET(mkc, mkc, length64, 1);
+       MLX5_SET(mkc, mkc, log_page_size, log_entry_size);
+       MLX5_SET64(mkc, mkc, len, nentries << log_entry_size);
        err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
 
        kvfree(in);
@@ -565,14 +566,16 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
 static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
                                       struct mlx5e_rq *rq)
 {
-       u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+       u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
 
-       if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
-               mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
-                             max_klm_size, rq->mpwqe.shampo->hd_per_wq);
+       if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) {
+               mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
+                             max_ksm_size, rq->mpwqe.shampo->hd_per_wq);
                return -EINVAL;
        }
-       return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+
+       return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
+                                        MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE,
                                         &rq->mpwqe.shampo->mkey);
 }
 
index 3af4f70de3348c95fdabb7a69b84597ad12a5c4a..f1fbf60d0356e666c05f3e96e9af6b6f923ac974 100644 (file)
@@ -619,25 +619,25 @@ static int bitmap_find_window(unsigned long *bitmap, int len,
        return min(len, count);
 }
 
-static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
-                         __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
+static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
+                         __be32 key, u16 offset, u16 ksm_len)
 {
-       memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
+       memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms));
        umr_wqe->ctrl.opmod_idx_opcode =
                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
                             MLX5_OPCODE_UMR);
        umr_wqe->ctrl.umr_mkey = key;
        umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
-                                           | MLX5E_KLM_UMR_DS_CNT(klm_len));
+                                           | MLX5E_KSM_UMR_DS_CNT(ksm_len));
        umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
        umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
-       umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
+       umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len);
        umr_wqe->uctrl.mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
 static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
                                     struct mlx5e_icosq *sq,
-                                    u16 klm_entries, u16 index)
+                                    u16 ksm_entries, u16 index)
 {
        struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
        u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
@@ -650,20 +650,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
        int headroom, i;
 
        headroom = rq->buff.headroom;
-       new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
-       entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
-       wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
+       new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
+       entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
+       wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries);
        pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
        umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
-       build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
+       build_ksm_umr(sq, umr_wqe, shampo->key, index, entries);
 
        frag_page = &shampo->pages[page_index];
 
        for (i = 0; i < entries; i++, index++) {
                dma_info = &shampo->info[index];
-               if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
-                                        MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT))
-                       goto update_klm;
+               if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index <
+                                        MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT))
+                       goto update_ksm;
                header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
                        MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
                if (!(header_offset & (PAGE_SIZE - 1))) {
@@ -683,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
                        dma_info->frag_page = frag_page;
                }
 
-update_klm:
-               umr_wqe->inline_klms[i].bcount =
-                       cpu_to_be32(MLX5E_RX_MAX_HEAD);
-               umr_wqe->inline_klms[i].key    = cpu_to_be32(lkey);
-               umr_wqe->inline_klms[i].va     =
-                       cpu_to_be64(dma_info->addr + headroom);
+update_ksm:
+               umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+                       .key = cpu_to_be32(lkey),
+                       .va  = cpu_to_be64(dma_info->addr + headroom),
+               };
        }
 
        sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
@@ -720,37 +719,37 @@ err_unmap:
 static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
 {
        struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
-       u16 klm_entries, num_wqe, index, entries_before;
+       u16 ksm_entries, num_wqe, index, entries_before;
        struct mlx5e_icosq *sq = rq->icosq;
-       int i, err, max_klm_entries, len;
+       int i, err, max_ksm_entries, len;
 
-       max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
-       klm_entries = bitmap_find_window(shampo->bitmap,
+       max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
+       ksm_entries = bitmap_find_window(shampo->bitmap,
                                         shampo->hd_per_wqe,
                                         shampo->hd_per_wq, shampo->pi);
-       if (!klm_entries)
+       if (!ksm_entries)
                return 0;
 
-       klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
-       index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
+       ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
+       index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
        entries_before = shampo->hd_per_wq - index;
 
-       if (unlikely(entries_before < klm_entries))
-               num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
-                         DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
+       if (unlikely(entries_before < ksm_entries))
+               num_wqe = DIV_ROUND_UP(entries_before, max_ksm_entries) +
+                         DIV_ROUND_UP(ksm_entries - entries_before, max_ksm_entries);
        else
-               num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
+               num_wqe = DIV_ROUND_UP(ksm_entries, max_ksm_entries);
 
        for (i = 0; i < num_wqe; i++) {
-               len = (klm_entries > max_klm_entries) ? max_klm_entries :
-                                                       klm_entries;
+               len = (ksm_entries > max_ksm_entries) ? max_ksm_entries :
+                                                       ksm_entries;
                if (unlikely(index + len > shampo->hd_per_wq))
                        len = shampo->hd_per_wq - index;
                err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
                if (unlikely(err))
                        return err;
                index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
-               klm_entries -= len;
+               ksm_entries -= len;
        }
 
        return 0;
index d7bb31d9a4463abf132df7b5d928d9a98c71bf76..da09bfaa7b813c1d1541b8493249d52c6973e741 100644 (file)
@@ -294,6 +294,7 @@ enum {
 #define MLX5_UMR_FLEX_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
 #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
+#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm))
 
 #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)