From f33a508c23a48b8b94a6bb9e3ffd2432bbfa3d50 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 12 Feb 2025 17:12:32 -0800 Subject: [PATCH 01/16] bnxt_en: Refactor completion ring free routine Add a wrapper routine to free L2 completion rings. This will be useful later in the series. Reviewed-by: Kalesh AP Reviewed-by: Michal Swiatkowski Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8ab7345acb0a..52d4dc222759 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7405,6 +7405,20 @@ static void bnxt_hwrm_rx_agg_ring_free(struct bnxt *bp, bp->grp_info[grp_idx].agg_fw_ring_id = INVALID_HW_RING_ID; } +static void bnxt_hwrm_cp_ring_free(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr) +{ + struct bnxt_ring_struct *ring; + + ring = &cpr->cp_ring_struct; + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_L2_CMPL, + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) { u32 type; @@ -7450,17 +7464,9 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_ring_struct *ring; int j; - for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) { - struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; + for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) + bnxt_hwrm_cp_ring_free(bp, &cpr->cp_ring_arr[j]); - ring = &cpr2->cp_ring_struct; - if (ring->fw_ring_id == INVALID_HW_RING_ID) - continue; - hwrm_ring_free_send_msg(bp, ring, - RING_FREE_REQ_RING_TYPE_L2_CMPL, - INVALID_HW_RING_ID); - ring->fw_ring_id = INVALID_HW_RING_ID; - } ring = &cpr->cp_ring_struct; if (ring->fw_ring_id != INVALID_HW_RING_ID) { hwrm_ring_free_send_msg(bp, ring, type, -- 2.51.0 From 09cc58d5944155d6e12a4e02e44c87dd667f43af Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 12 Feb 2025 17:12:33 -0800 Subject: [PATCH 02/16] bnxt_en: Refactor bnxt_free_tx_rings() to free per TX ring Modify bnxt_free_tx_rings() to free the skbs per TX ring. This will be useful later in the series. Reviewed-by: Michal Swiatkowski Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 115 ++++++++++++---------- 1 file changed, 61 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 52d4dc222759..453f52648145 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3314,74 +3314,81 @@ poll_done: return work_done; } -static void bnxt_free_tx_skbs(struct bnxt *bp) +static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, int idx) { int i, max_idx; struct pci_dev *pdev = bp->pdev; - if (!bp->tx_ring) - return; - max_idx = bp->tx_nr_pages * TX_DESC_CNT; - for (i = 0; i < bp->tx_nr_rings; i++) { - struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - int j; - if (!txr->tx_buf_ring) + for (i = 0; i < max_idx;) { + struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i]; + struct sk_buff *skb; + int j, last; + + if (idx < bp->tx_nr_rings_xdp && + tx_buf->action == XDP_REDIRECT) { + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + xdp_return_frame(tx_buf->xdpf); + tx_buf->action = 0; + tx_buf->xdpf = NULL; + i++; continue; + } - for (j = 0; j < max_idx;) { - struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j]; - struct sk_buff *skb; - int k, last; - - if (i < bp->tx_nr_rings_xdp && - tx_buf->action == XDP_REDIRECT) { - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - dma_unmap_len(tx_buf, len), - DMA_TO_DEVICE); - xdp_return_frame(tx_buf->xdpf); - tx_buf->action = 0; - tx_buf->xdpf = NULL; - j++; - continue; - } + skb = tx_buf->skb; + if (!skb) { + i++; + continue; + } - skb = tx_buf->skb; - if (!skb) { - j++; - continue; - } + tx_buf->skb = NULL; - tx_buf->skb = NULL; + if (tx_buf->is_push) { + dev_kfree_skb(skb); + i += 2; + continue; + } - if (tx_buf->is_push) { - dev_kfree_skb(skb); - j += 2; - continue; - } + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + skb_headlen(skb), + DMA_TO_DEVICE); - dma_unmap_single(&pdev->dev, - dma_unmap_addr(tx_buf, mapping), - skb_headlen(skb), - DMA_TO_DEVICE); + last = tx_buf->nr_frags; + i += 2; + for (j = 0; j < last; j++, i++) { + int ring_idx = i & bp->tx_ring_mask; + skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; - last = tx_buf->nr_frags; - j += 2; - for (k = 0; k < last; k++, j++) { - int ring_idx = j & bp->tx_ring_mask; - skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; - - tx_buf = &txr->tx_buf_ring[ring_idx]; - dma_unmap_page( - &pdev->dev, - dma_unmap_addr(tx_buf, mapping), - skb_frag_size(frag), DMA_TO_DEVICE); - } - dev_kfree_skb(skb); + tx_buf = &txr->tx_buf_ring[ring_idx]; + dma_unmap_page(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + skb_frag_size(frag), DMA_TO_DEVICE); } - netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, i)); + dev_kfree_skb(skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx)); +} + +static void bnxt_free_tx_skbs(struct bnxt *bp) +{ + int i; + + if (!bp->tx_ring) + return; + + for (i = 0; i < bp->tx_nr_rings; i++) { + struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; + + if (!txr->tx_buf_ring) + continue; + + bnxt_free_one_tx_ring_skbs(bp, txr, i); } } -- 2.51.0 From e1714de53218796087d7182b429b047392d6ba94 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 12 Feb 2025 17:12:34 -0800 Subject: [PATCH 03/16] bnxt_en: Refactor RX/RX AGG ring parameters setup for P5_PLUS There is some common code for setting up RX and RX AGG ring allocation parameters for P5_PLUS chips. Refactor the logic into a new function. Reviewed-by: Hongguang Gao Reviewed-by: Ajit Khaparde Reviewed-by: Michal Swiatkowski Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 58 +++++++++++------------ 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 453f52648145..ac63d3feaa1d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6944,6 +6944,28 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp) hwrm_req_drop(bp, req); } +static void bnxt_set_rx_ring_params_p5(struct bnxt *bp, u32 ring_type, + struct hwrm_ring_alloc_input *req, + struct bnxt_ring_struct *ring) +{ + struct bnxt_ring_grp_info *grp_info = &bp->grp_info[ring->grp_idx]; + u32 enables = RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID; + + if (ring_type == HWRM_RING_ALLOC_AGG) { + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; + req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id); + req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE); + enables |= RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID; + } else { + req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size); + if (NET_IP_ALIGN == 2) + req->flags = + cpu_to_le16(RING_ALLOC_REQ_FLAGS_RX_SOP_PAD); + } + req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->enables |= cpu_to_le32(enables); +} + static int hwrm_ring_alloc_send_msg(struct bnxt *bp, struct bnxt_ring_struct *ring, u32 ring_type, u32 map_index) @@ -6995,37 +7017,13 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, break; } case HWRM_RING_ALLOC_RX: - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; - req->length = cpu_to_le32(bp->rx_ring_mask + 1); - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - u16 flags = 0; - - /* Association of rx ring with stats context */ - grp_info = &bp->grp_info[ring->grp_idx]; - req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size); - req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); - req->enables |= cpu_to_le32( - RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); - if (NET_IP_ALIGN == 2) - flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD; - req->flags = cpu_to_le16(flags); - } - break; case HWRM_RING_ALLOC_AGG: - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; - /* Association of agg ring with rx ring */ - grp_info = &bp->grp_info[ring->grp_idx]; - req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id); - req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE); - req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); - req->enables |= cpu_to_le32( - RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID | - RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); - } else { - req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; - } - req->length = cpu_to_le32(bp->rx_agg_ring_mask + 1); + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; + req->length = (ring_type == HWRM_RING_ALLOC_RX) ? + cpu_to_le32(bp->rx_ring_mask + 1) : + cpu_to_le32(bp->rx_agg_ring_mask + 1); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + bnxt_set_rx_ring_params_p5(bp, ring_type, req, ring); break; case HWRM_RING_ALLOC_CMPL: req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; -- 2.51.0 From 4c8e612c9a36721e873d7ba56019706b294fa860 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 12 Feb 2025 17:12:35 -0800 Subject: [PATCH 04/16] bnxt_en: Pass NQ ID to the FW when allocating RX/RX AGG rings Newer firmware can use the NQ ring ID associated with each RX/RX AGG ring to enable PCIe Steering Tags on P5_PLUS chips. When allocating RX/RX AGG rings, pass along NQ ring ID for the firmware to use. This information helps optimize DMA writes by directing them to the cache closer to the CPU consuming the data, potentially improving the processing speed. This change is backward-compatible with older firmware, which will simply disregard the information. Reviewed-by: Hongguang Gao Reviewed-by: Ajit Khaparde Reviewed-by: Michal Swiatkowski Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-8-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ac63d3feaa1d..c6cf575af53f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6949,7 +6949,8 @@ static void bnxt_set_rx_ring_params_p5(struct bnxt *bp, u32 ring_type, struct bnxt_ring_struct *ring) { struct bnxt_ring_grp_info *grp_info = &bp->grp_info[ring->grp_idx]; - u32 enables = RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID; + u32 enables = RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID | + RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID; if (ring_type == HWRM_RING_ALLOC_AGG) { req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; @@ -6963,6 +6964,7 @@ static void bnxt_set_rx_ring_params_p5(struct bnxt *bp, u32 ring_type, cpu_to_le16(RING_ALLOC_REQ_FLAGS_RX_SOP_PAD); } req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id); req->enables |= cpu_to_le32(enables); } -- 2.51.0 From 6b6bf60fc95f908b5a3b45137436eb495af414cc Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 12 Feb 2025 17:12:36 -0800 Subject: [PATCH 05/16] bnxt_en: Reallocate RX completion ring for TPH support In order to program the correct Steering Tag during an IRQ affinity change, we need to free/re-allocate the RX completion ring during queue_restart. If TPH is enabled, call FW to free the Rx completion ring and clear the ring entries in queue_stop(). Re-allocate it in queue_start() if TPH is enabled. Note that TPH mode is not enabled in this patch and will be enabled later in the patch series. While modifying bnxt_queue_start(), remove the unnecessary zeroing of rxr->rx_next_cons. It gets overwritten by the clone in bnxt_queue_start(). Remove the rx_reset counter increment since restart is not reset. Add comment to clarify that the ring allocations in queue_start should never fail. Reviewed-by: Michal Swiatkowski Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-9-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 38 +++++++++++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c6cf575af53f..04980718c287 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7426,6 +7426,19 @@ static void bnxt_hwrm_cp_ring_free(struct bnxt *bp, ring->fw_ring_id = INVALID_HW_RING_ID; } +static void bnxt_clear_one_cp_ring(struct bnxt *bp, struct bnxt_cp_ring_info *cpr) +{ + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + int i, size = ring->ring_mem.page_size; + + cpr->cp_raw_cons = 0; + cpr->toggle = 0; + + for (i = 0; i < bp->cp_nr_pages; i++) + if (cpr->cp_desc_ring[i]) + memset(cpr->cp_desc_ring[i], 0, size); +} + static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) { u32 type; @@ -15623,7 +15636,6 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) { struct bnxt *bp = netdev_priv(dev); struct bnxt_rx_ring_info *rxr, *clone; - struct bnxt_cp_ring_info *cpr; struct bnxt_vnic_info *vnic; int i, rc; @@ -15642,20 +15654,27 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) bnxt_copy_rx_ring(bp, rxr, clone); + /* All rings have been reserved and previously allocated. + * Reallocating with the same parameters should never fail. + */ rc = bnxt_hwrm_rx_ring_alloc(bp, rxr); if (rc) return rc; + + if (bp->tph_mode) { + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr); + if (rc) + goto err_free_hwrm_rx_ring; + } + rc = bnxt_hwrm_rx_agg_ring_alloc(bp, rxr); if (rc) - goto err_free_hwrm_rx_ring; + goto err_free_hwrm_cp_ring; bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bp->flags & BNXT_FLAG_AGG_RINGS) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); - cpr = &rxr->bnapi->cp_ring; - cpr->sw_stats->rx.rx_resets++; - for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { vnic = &bp->vnic_info[i]; @@ -15672,6 +15691,9 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) return 0; +err_free_hwrm_cp_ring: + if (bp->tph_mode) + bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); err_free_hwrm_rx_ring: bnxt_hwrm_rx_ring_free(bp, rxr, false); return rc; @@ -15696,11 +15718,15 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) cancel_work_sync(&rxr->bnapi->cp_ring.dim.work); bnxt_hwrm_rx_ring_free(bp, rxr, false); bnxt_hwrm_rx_agg_ring_free(bp, rxr, false); - rxr->rx_next_cons = 0; page_pool_disable_direct_recycling(rxr->page_pool); if (bnxt_separate_head_pool()) page_pool_disable_direct_recycling(rxr->head_pool); + if (bp->tph_mode) { + bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); + bnxt_clear_one_cp_ring(bp, rxr->rx_cpr); + } + memcpy(qmem, rxr, sizeof(*rxr)); bnxt_init_rx_ring_struct(bp, qmem); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index db0be469a3db..4e20878e7714 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2410,6 +2410,8 @@ struct bnxt { u8 max_q; u8 num_tc; + u8 tph_mode; + unsigned int current_interval; #define BNXT_TIMER_INTERVAL HZ -- 2.51.0 From c8a0f7652d61dba8d7b0fee0539a0546ba78deda Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 12 Feb 2025 17:12:37 -0800 Subject: [PATCH 06/16] bnxt_en: Refactor TX ring free logic Add a new bnxt_hwrm_tx_ring_free() function to handle freeing a HW transmit ring. The new function will also be used in the next patch to free the TX ring in queue_stop. Reviewed-by: Ajit Khaparde Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-10-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 33 +++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 04980718c287..2d0d9ac8e2c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7368,6 +7368,23 @@ exit: return 0; } +static void bnxt_hwrm_tx_ring_free(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + bool close_path) +{ + struct bnxt_ring_struct *ring = &txr->tx_ring_struct; + u32 cmpl_ring_id; + + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + cmpl_ring_id = close_path ? bnxt_cp_ring_for_tx(bp, txr) : + INVALID_HW_RING_ID; + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX, + cmpl_ring_id); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + static void bnxt_hwrm_rx_ring_free(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, bool close_path) @@ -7447,20 +7464,8 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) if (!bp->bnapi) return; - for (i = 0; i < bp->tx_nr_rings; i++) { - struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; - struct bnxt_ring_struct *ring = &txr->tx_ring_struct; - - if (ring->fw_ring_id != INVALID_HW_RING_ID) { - u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); - - hwrm_ring_free_send_msg(bp, ring, - RING_FREE_REQ_RING_TYPE_TX, - close_path ? cmpl_ring_id : - INVALID_HW_RING_ID); - ring->fw_ring_id = INVALID_HW_RING_ID; - } - } + for (i = 0; i < bp->tx_nr_rings; i++) + bnxt_hwrm_tx_ring_free(bp, &bp->tx_ring[i], close_path); bnxt_cancel_dim(bp); for (i = 0; i < bp->rx_nr_rings; i++) { -- 2.51.0 From fe96d717d38ecd8e6e8d710ebf88e82ac35a0032 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 12 Feb 2025 17:12:38 -0800 Subject: [PATCH 07/16] bnxt_en: Extend queue stop/start for TX rings In order to use queue_stop/queue_start to support the new Steering Tags, we need to free the TX ring and TX completion ring if it is a combined channel with TX/RX sharing the same NAPI. Otherwise TX completions will not have the updated Steering Tag. If TPH is not enabled, we just stop the TX ring without freeing the TX/TX cmpl rings. With that we can now add napi_disable() and napi_enable() during queue_stop()/ queue_start(). This will guarantee that NAPI will stop processing the completion entries in case there are additional pending entries in the completion rings after queue_stop(). There could be some NQEs sitting unprocessed while NAPI is disabled thereby leaving the NQ unarmed. Explicitly re-arm the NQ after napi_enable() in queue start so that NAPI will resume properly. Error handling in bnxt_queue_start() requires a reset. If a TX ring cannot be allocated or initialized properly, it will cause TX timeout. The reset will also free any partially allocated rings. We don't expect to hit this error path because re-allocating previously reserved and allocated rings with the same parameters should never fail. Reviewed-by: Ajit Khaparde Reviewed-by: Michal Swiatkowski Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-11-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 119 ++++++++++++++++++++-- 1 file changed, 110 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2d0d9ac8e2c4..1997cdbd5801 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11279,6 +11279,78 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) return 0; } +static void bnxt_tx_queue_stop(struct bnxt *bp, int idx) +{ + struct bnxt_tx_ring_info *txr; + struct netdev_queue *txq; + struct bnxt_napi *bnapi; + int i; + + bnapi = bp->bnapi[idx]; + bnxt_for_each_napi_tx(i, bnapi, txr) { + WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING); + synchronize_net(); + + if (!(bnapi->flags & BNXT_NAPI_FLAG_XDP)) { + txq = netdev_get_tx_queue(bp->dev, txr->txq_index); + if (txq) { + __netif_tx_lock_bh(txq); + netif_tx_stop_queue(txq); + __netif_tx_unlock_bh(txq); + } + } + + if (!bp->tph_mode) + continue; + + bnxt_hwrm_tx_ring_free(bp, txr, true); + bnxt_hwrm_cp_ring_free(bp, txr->tx_cpr); + bnxt_free_one_tx_ring_skbs(bp, txr, txr->txq_index); + bnxt_clear_one_cp_ring(bp, txr->tx_cpr); + } +} + +static int bnxt_tx_queue_start(struct bnxt *bp, int idx) +{ + struct bnxt_tx_ring_info *txr; + struct netdev_queue *txq; + struct bnxt_napi *bnapi; + int rc, i; + + bnapi = bp->bnapi[idx]; + /* All rings have been reserved and previously allocated. + * Reallocating with the same parameters should never fail. + */ + bnxt_for_each_napi_tx(i, bnapi, txr) { + if (!bp->tph_mode) + goto start_tx; + + rc = bnxt_hwrm_cp_ring_alloc_p5(bp, txr->tx_cpr); + if (rc) + return rc; + + rc = bnxt_hwrm_tx_ring_alloc(bp, txr, false); + if (rc) + return rc; + + txr->tx_prod = 0; + txr->tx_cons = 0; + txr->tx_hw_cons = 0; +start_tx: + WRITE_ONCE(txr->dev_state, 0); + synchronize_net(); + + if (bnapi->flags & BNXT_NAPI_FLAG_XDP) + continue; + + txq = netdev_get_tx_queue(bp->dev, txr->txq_index); + if (txq) + netif_tx_start_queue(txq); + } + + return 0; +} + static void bnxt_free_irq(struct bnxt *bp) { struct bnxt_irq *irq; @@ -15641,7 +15713,9 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) { struct bnxt *bp = netdev_priv(dev); struct bnxt_rx_ring_info *rxr, *clone; + struct bnxt_cp_ring_info *cpr; struct bnxt_vnic_info *vnic; + struct bnxt_napi *bnapi; int i, rc; rxr = &bp->rx_ring[idx]; @@ -15659,27 +15733,39 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) bnxt_copy_rx_ring(bp, rxr, clone); + bnapi = rxr->bnapi; + cpr = &bnapi->cp_ring; + /* All rings have been reserved and previously allocated. * Reallocating with the same parameters should never fail. */ rc = bnxt_hwrm_rx_ring_alloc(bp, rxr); if (rc) - return rc; + goto err_reset; if (bp->tph_mode) { rc = bnxt_hwrm_cp_ring_alloc_p5(bp, rxr->rx_cpr); if (rc) - goto err_free_hwrm_rx_ring; + goto err_reset; } rc = bnxt_hwrm_rx_agg_ring_alloc(bp, rxr); if (rc) - goto err_free_hwrm_cp_ring; + goto err_reset; bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bp->flags & BNXT_FLAG_AGG_RINGS) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + if (bp->flags & BNXT_FLAG_SHARED_RINGS) { + rc = bnxt_tx_queue_start(bp, idx); + if (rc) + goto err_reset; + } + + napi_enable(&bnapi->napi); + bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { vnic = &bp->vnic_info[i]; @@ -15696,11 +15782,12 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) return 0; -err_free_hwrm_cp_ring: - if (bp->tph_mode) - bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); -err_free_hwrm_rx_ring: - bnxt_hwrm_rx_ring_free(bp, rxr, false); +err_reset: + netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", + rc); + napi_enable(&bnapi->napi); + bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + bnxt_reset_task(bp, true); return rc; } @@ -15708,7 +15795,9 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) { struct bnxt *bp = netdev_priv(dev); struct bnxt_rx_ring_info *rxr; + struct bnxt_cp_ring_info *cpr; struct bnxt_vnic_info *vnic; + struct bnxt_napi *bnapi; int i; for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { @@ -15720,17 +15809,29 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); rxr = &bp->rx_ring[idx]; - cancel_work_sync(&rxr->bnapi->cp_ring.dim.work); + bnapi = rxr->bnapi; + cpr = &bnapi->cp_ring; + cancel_work_sync(&cpr->dim.work); bnxt_hwrm_rx_ring_free(bp, rxr, false); bnxt_hwrm_rx_agg_ring_free(bp, rxr, false); page_pool_disable_direct_recycling(rxr->page_pool); if (bnxt_separate_head_pool()) page_pool_disable_direct_recycling(rxr->head_pool); + if (bp->flags & BNXT_FLAG_SHARED_RINGS) + bnxt_tx_queue_stop(bp, idx); + + /* Disable NAPI now after freeing the rings because HWRM_RING_FREE + * completion is handled in NAPI to guarantee no more DMA on that ring + * after seeing the completion. + */ + napi_disable(&bnapi->napi); + if (bp->tph_mode) { bnxt_hwrm_cp_ring_free(bp, rxr->rx_cpr); bnxt_clear_one_cp_ring(bp, rxr->rx_cpr); } + bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); memcpy(qmem, rxr, sizeof(*rxr)); bnxt_init_rx_ring_struct(bp, qmem); -- 2.51.0 From c214410c47d6ec3128143370747d9e388bab21d7 Mon Sep 17 00:00:00 2001 From: Manoj Panicker Date: Wed, 12 Feb 2025 17:12:39 -0800 Subject: [PATCH 08/16] bnxt_en: Add TPH support in BNXT driver Add TPH support to the Broadcom BNXT device driver. This allows the driver to utilize TPH functions for retrieving and configuring Steering Tags when changing interrupt affinity. With compatible NIC firmware, network traffic will be tagged correctly with Steering Tags, resulting in significant memory bandwidth savings and other advantages as demonstrated by real network benchmarks on TPH-capable platforms. Co-developed-by: Somnath Kotur Signed-off-by: Somnath Kotur Co-developed-by: Wei Huang Signed-off-by: Wei Huang Signed-off-by: Manoj Panicker Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250213011240.1640031-12-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 106 ++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 + 2 files changed, 111 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1997cdbd5801..15c57a06ecaf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -76,6 +78,7 @@ #define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \ NETIF_MSG_TX_ERR) +MODULE_IMPORT_NS("NETDEV_INTERNAL"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Broadcom NetXtreme network driver"); @@ -11351,6 +11354,83 @@ start_tx: return 0; } +static void bnxt_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct bnxt_irq *irq; + u16 tag; + int err; + + irq = container_of(notify, struct bnxt_irq, affinity_notify); + + if (!irq->bp->tph_mode) + return; + + cpumask_copy(irq->cpu_mask, mask); + + if (irq->ring_nr >= irq->bp->rx_nr_rings) + return; + + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, + cpumask_first(irq->cpu_mask), &tag)) + return; + + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag)) + return; + + rtnl_lock(); + if (netif_running(irq->bp->dev)) { + err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr); + if (err) + netdev_err(irq->bp->dev, + "RX queue restart failed: err=%d\n", err); + } + rtnl_unlock(); +} + +static void bnxt_irq_affinity_release(struct kref *ref) +{ + struct irq_affinity_notify *notify = + container_of(ref, struct irq_affinity_notify, kref); + struct bnxt_irq *irq; + + irq = container_of(notify, struct bnxt_irq, affinity_notify); + + if (!irq->bp->tph_mode) + return; + + if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, 0)) { + netdev_err(irq->bp->dev, + "Setting ST=0 for MSIX entry %d failed\n", + irq->msix_nr); + return; + } +} + +static void bnxt_release_irq_notifier(struct bnxt_irq *irq) +{ + irq_set_affinity_notifier(irq->vector, NULL); +} + +static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq) +{ + struct irq_affinity_notify *notify; + + irq->bp = bp; + + /* Nothing to do if TPH is not enabled */ + if (!bp->tph_mode) + return; + + /* Register IRQ affinity notifier */ + notify = &irq->affinity_notify; + notify->irq = irq->vector; + notify->notify = bnxt_irq_affinity_notify; + notify->release = bnxt_irq_affinity_release; + + irq_set_affinity_notifier(irq->vector, notify); +} + static void bnxt_free_irq(struct bnxt *bp) { struct bnxt_irq *irq; @@ -11373,11 +11453,18 @@ static void bnxt_free_irq(struct bnxt *bp) free_cpumask_var(irq->cpu_mask); irq->have_cpumask = 0; } + + bnxt_release_irq_notifier(irq); + free_irq(irq->vector, bp->bnapi[i]); } irq->requested = 0; } + + /* Disable TPH support */ + pcie_disable_tph(bp->pdev); + bp->tph_mode = 0; } static int bnxt_request_irq(struct bnxt *bp) @@ -11397,6 +11484,12 @@ static int bnxt_request_irq(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL rmap = bp->dev->rx_cpu_rmap; #endif + + /* Enable TPH support as part of IRQ request */ + rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE); + if (!rc) + bp->tph_mode = PCI_TPH_ST_IV_MODE; + for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { int map_idx = bnxt_cp_num_to_irq_num(bp, i); struct bnxt_irq *irq = &bp->irq_tbl[map_idx]; @@ -11420,8 +11513,11 @@ static int bnxt_request_irq(struct bnxt *bp) if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { int numa_node = dev_to_node(&bp->pdev->dev); + u16 tag; irq->have_cpumask = 1; + irq->msix_nr = map_idx; + irq->ring_nr = i; cpumask_set_cpu(cpumask_local_spread(i, numa_node), irq->cpu_mask); rc = irq_update_affinity_hint(irq->vector, irq->cpu_mask); @@ -11431,6 +11527,16 @@ static int bnxt_request_irq(struct bnxt *bp) irq->vector); break; } + + bnxt_register_irq_notifier(bp, irq); + + /* Init ST table entry */ + if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM, + cpumask_first(irq->cpu_mask), + &tag)) + continue; + + pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag); } } return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 4e20878e7714..e85b5ce94f58 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1234,6 +1234,11 @@ struct bnxt_irq { u8 have_cpumask:1; char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA]; cpumask_var_t cpu_mask; + + struct bnxt *bp; + int msix_nr; + int ring_nr; + struct irq_affinity_notify affinity_notify; }; #define HWRM_RING_ALLOC_TX 0x1 -- 2.51.0 From 0784d83df3bfc977c13252a0599be924f0afa68d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Feb 2025 14:07:05 +0000 Subject: [PATCH 09/16] ndisc: ndisc_send_redirect() cleanup ndisc_send_redirect() is always called under rcu_read_lock(). It can use dev_net_rcu() and avoid one redundant rcu_read_lock()/rcu_read_unlock() pair. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250214140705.2105890-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ndisc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8699d1a188dc..ecb5c4b8518f 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1680,7 +1680,7 @@ static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; - struct net *net = dev_net(dev); + struct net *net = dev_net_rcu(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; @@ -1695,8 +1695,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; - if (netif_is_l3_master(skb->dev)) { - dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) return; } @@ -1734,10 +1734,8 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) goto release; } - rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); - rcu_read_unlock(); if (!ret) goto release; -- 2.51.0 From 1dd1bf505c0916ff3ad62152b0f4da71ffe5708f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 16 Feb 2025 10:20:42 +0000 Subject: [PATCH 10/16] net: xpcs: rearrange register definitions Place register number definitions immediately above their field definitions and order by register number. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tjblS-00448F-8v@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 39d3f517b557..929fa238445e 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -55,23 +55,11 @@ /* Clause 37 Defines */ /* VR MII MMD registers offsets */ #define DW_VR_MII_DIG_CTRL1 0x8000 -#define DW_VR_MII_AN_CTRL 0x8001 -#define DW_VR_MII_AN_INTR_STS 0x8002 -/* EEE Mode Control Register */ -#define DW_VR_MII_EEE_MCTRL0 0x8006 -#define DW_VR_MII_EEE_MCTRL1 0x800b -#define DW_VR_MII_DIG_CTRL2 0x80e1 - -/* VR_MII_DIG_CTRL1 */ #define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW BIT(9) #define DW_VR_MII_DIG_CTRL1_2G5_EN BIT(2) #define DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL BIT(0) -/* VR_MII_DIG_CTRL2 */ -#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4) -#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0) - -/* VR_MII_AN_CTRL */ +#define DW_VR_MII_AN_CTRL 0x8001 #define DW_VR_MII_AN_CTRL_8BIT BIT(8) #define DW_VR_MII_TX_CONFIG_MASK BIT(3) #define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII 0x1 @@ -81,7 +69,7 @@ #define DW_VR_MII_PCS_MODE_C37_SGMII 0x2 #define DW_VR_MII_AN_INTR_EN BIT(0) -/* VR_MII_AN_INTR_STS */ +#define DW_VR_MII_AN_INTR_STS 0x8002 #define DW_VR_MII_AN_STS_C37_ANCMPLT_INTR BIT(0) #define DW_VR_MII_AN_STS_C37_ANSGM_FD BIT(1) #define DW_VR_MII_AN_STS_C37_ANSGM_SP GENMASK(3, 2) @@ -90,19 +78,22 @@ #define DW_VR_MII_C37_ANSGM_SP_1000 0x2 #define DW_VR_MII_C37_ANSGM_SP_LNKSTS BIT(4) -/* VR MII EEE Control 0 defines */ +#define DW_VR_MII_EEE_MCTRL0 0x8006 #define DW_VR_MII_EEE_LTX_EN BIT(0) /* LPI Tx Enable */ #define DW_VR_MII_EEE_LRX_EN BIT(1) /* LPI Rx Enable */ #define DW_VR_MII_EEE_TX_QUIET_EN BIT(2) /* Tx Quiet Enable */ #define DW_VR_MII_EEE_RX_QUIET_EN BIT(3) /* Rx Quiet Enable */ #define DW_VR_MII_EEE_TX_EN_CTRL BIT(4) /* Tx Control Enable */ #define DW_VR_MII_EEE_RX_EN_CTRL BIT(7) /* Rx Control Enable */ - #define DW_VR_MII_EEE_MULT_FACT_100NS GENMASK(11, 8) -/* VR MII EEE Control 1 defines */ +#define DW_VR_MII_EEE_MCTRL1 0x800b #define DW_VR_MII_EEE_TRN_LPI BIT(0) /* Transparent Mode Enable */ +#define DW_VR_MII_DIG_CTRL2 0x80e1 +#define DW_VR_MII_DIG_CTRL2_TX_POL_INV BIT(4) +#define DW_VR_MII_DIG_CTRL2_RX_POL_INV BIT(0) + #define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \ static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma } -- 2.51.0 From 2f435137a0484f11b47554281091ef4908f8cb31 Mon Sep 17 00:00:00 2001 From: Sky Huang Date: Thu, 13 Feb 2025 16:05:49 +0800 Subject: [PATCH 11/16] net: phy: mediatek: Change to more meaningful macros Replace magic number with more meaningful macros in mtk-ge.c. Also, move some common macros into mtk-phy-lib.c. Signed-off-by: Sky Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250213080553.921434-2-SkyLake.Huang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek/mtk-ge-soc.c | 1 - drivers/net/phy/mediatek/mtk-ge.c | 71 +++++++++++++++++++++------ drivers/net/phy/mediatek/mtk.h | 2 + 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index bdf99b327029..69cd6a1cbad4 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -24,7 +24,6 @@ #define MTK_PHY_SMI_DET_ON_THRESH_MASK GENMASK(13, 8) #define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 -#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 #define ANALOG_INTERNAL_OPERATION_MAX_US 20 #define TXRESERVE_MIN 0 diff --git a/drivers/net/phy/mediatek/mtk-ge.c b/drivers/net/phy/mediatek/mtk-ge.c index b517ca8573e7..75e1b0387710 100644 --- a/drivers/net/phy/mediatek/mtk-ge.c +++ b/drivers/net/phy/mediatek/mtk-ge.c @@ -8,18 +8,38 @@ #define MTK_GPHY_ID_MT7530 0x03a29412 #define MTK_GPHY_ID_MT7531 0x03a29441 -#define MTK_EXT_PAGE_ACCESS 0x1f -#define MTK_PHY_PAGE_STANDARD 0x0000 -#define MTK_PHY_PAGE_EXTENDED 0x0001 -#define MTK_PHY_PAGE_EXTENDED_2 0x0002 -#define MTK_PHY_PAGE_EXTENDED_3 0x0003 -#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 -#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 +#define MTK_PHY_PAGE_EXTENDED_1 0x0001 +#define MTK_PHY_AUX_CTRL_AND_STATUS 0x14 +#define MTK_PHY_ENABLE_DOWNSHIFT BIT(4) + +#define MTK_PHY_PAGE_EXTENDED_2 0x0002 +#define MTK_PHY_PAGE_EXTENDED_3 0x0003 +#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11 0x11 + +#define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 + +/* Registers on MDIO_MMD_VEND1 */ +#define MTK_PHY_GBE_MODE_TX_DELAY_SEL 0x13 +#define MTK_PHY_TEST_MODE_TX_DELAY_SEL 0x14 +#define MTK_TX_DELAY_PAIR_B_MASK GENMASK(10, 8) +#define MTK_TX_DELAY_PAIR_D_MASK GENMASK(2, 0) + +#define MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL 0xa6 +#define MTK_MCC_NEARECHO_OFFSET_MASK GENMASK(15, 8) + +#define MTK_PHY_RXADC_CTRL_RG7 0xc6 +#define MTK_PHY_DA_AD_BUF_BIAS_LP_MASK GENMASK(9, 8) + +#define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123 0x123 +#define MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK GENMASK(15, 8) +#define MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK GENMASK(7, 0) static void mtk_gephy_config_init(struct phy_device *phydev) { /* Enable HW auto downshift */ - phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED, 0x14, 0, BIT(4)); + phy_modify_paged(phydev, MTK_PHY_PAGE_EXTENDED_1, + MTK_PHY_AUX_CTRL_AND_STATUS, + 0, MTK_PHY_ENABLE_DOWNSHIFT); /* Increase SlvDPSready time */ phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); @@ -29,10 +49,20 @@ static void mtk_gephy_config_init(struct phy_device *phydev) phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); /* Adjust 100_mse_threshold */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x123, 0xffff); - - /* Disable mcc */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0xa6, 0x300); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, + MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG123, + MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK | + MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK, + FIELD_PREP(MTK_PHY_LPI_NORM_MSE_LO_THRESH100_MASK, + 0xff) | + FIELD_PREP(MTK_PHY_LPI_NORM_MSE_HI_THRESH100_MASK, + 0xff)); + + /* If echo time is narrower than 0x3, it will be regarded as noise */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, + MTK_PHY_MCC_CTRL_AND_TX_POWER_CTRL, + MTK_MCC_NEARECHO_OFFSET_MASK, + FIELD_PREP(MTK_MCC_NEARECHO_OFFSET_MASK, 0x3)); } static int mt7530_phy_config_init(struct phy_device *phydev) @@ -40,7 +70,8 @@ static int mt7530_phy_config_init(struct phy_device *phydev) mtk_gephy_config_init(phydev); /* Increase post_update_timer */ - phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, 0x11, 0x4b); + phy_write_paged(phydev, MTK_PHY_PAGE_EXTENDED_3, + MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11, 0x4b); return 0; } @@ -51,11 +82,19 @@ static int mt7531_phy_config_init(struct phy_device *phydev) /* PHY link down power saving enable */ phy_set_bits(phydev, 0x17, BIT(4)); - phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, 0xc6, 0x300); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, + FIELD_PREP(MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3)); /* Set TX Pair delay selection */ - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x13, 0x404); - phy_write_mmd(phydev, MDIO_MMD_VEND1, 0x14, 0x404); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_GBE_MODE_TX_DELAY_SEL, + MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK, + FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) | + FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4)); + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_TEST_MODE_TX_DELAY_SEL, + MTK_TX_DELAY_PAIR_B_MASK | MTK_TX_DELAY_PAIR_D_MASK, + FIELD_PREP(MTK_TX_DELAY_PAIR_B_MASK, 0x4) | + FIELD_PREP(MTK_TX_DELAY_PAIR_D_MASK, 0x4)); return 0; } diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index 63d9fe179b8f..a888e2e1dd6b 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -9,6 +9,8 @@ #define _MTK_EPHY_H_ #define MTK_EXT_PAGE_ACCESS 0x1f +#define MTK_PHY_PAGE_STANDARD 0x0000 +#define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 /* Registers on MDIO_MMD_VEND2 */ #define MTK_PHY_LED0_ON_CTRL 0x24 -- 2.51.0 From afa08fde7c4780ea5556d9d4df6c1daa38ba0d6b Mon Sep 17 00:00:00 2001 From: Sky Huang Date: Thu, 13 Feb 2025 16:05:50 +0800 Subject: [PATCH 12/16] net: phy: mediatek: Add token ring access helper functions in mtk-phy-lib This patch adds TR(token ring) manipulations and adds correct macro names for those magic numbers. TR is a way to access proprietary registers on page 52b5. Use these helper functions so we can see which fields we're going to modify/set/clear. TR functions with __* prefix mean that the operations inside aren't wrapped by page select/restore functions. This patch doesn't really change registers' settings but just enhances readability and maintainability. Signed-off-by: Sky Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250213080553.921434-3-SkyLake.Huang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek/mtk-ge-soc.c | 231 +++++++++++++++++-------- drivers/net/phy/mediatek/mtk-ge.c | 11 +- drivers/net/phy/mediatek/mtk-phy-lib.c | 63 +++++++ drivers/net/phy/mediatek/mtk.h | 5 + 4 files changed, 230 insertions(+), 80 deletions(-) diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index 69cd6a1cbad4..5d7373793659 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -25,6 +25,90 @@ #define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 +/* Registers on Token Ring debug nodes */ +/* ch_addr = 0x0, node_addr = 0x7, data_addr = 0x15 */ +/* NormMseLoThresh */ +#define NORMAL_MSE_LO_THRESH_MASK GENMASK(15, 8) + +/* ch_addr = 0x0, node_addr = 0xf, data_addr = 0x3c */ +/* RemAckCntLimitCtrl */ +#define REMOTE_ACK_COUNT_LIMIT_CTRL_MASK GENMASK(2, 1) + +/* ch_addr = 0x1, node_addr = 0xd, data_addr = 0x20 */ +/* VcoSlicerThreshBitsHigh */ +#define VCO_SLICER_THRESH_HIGH_MASK GENMASK(23, 0) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x0 */ +/* DfeTailEnableVgaThresh1000 */ +#define DFE_TAIL_EANBLE_VGA_TRHESH_1000 GENMASK(5, 1) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x1 */ +/* MrvlTrFix100Kp */ +#define MRVL_TR_FIX_100KP_MASK GENMASK(22, 20) +/* MrvlTrFix100Kf */ +#define MRVL_TR_FIX_100KF_MASK GENMASK(19, 17) +/* MrvlTrFix1000Kp */ +#define MRVL_TR_FIX_1000KP_MASK GENMASK(16, 14) +/* MrvlTrFix1000Kf */ +#define MRVL_TR_FIX_1000KF_MASK GENMASK(13, 11) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x12 */ +/* VgaDecRate */ +#define VGA_DECIMATION_RATE_MASK GENMASK(8, 5) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */ +/* SlvDSPreadyTime */ +#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15) +/* MasDSPreadyTime */ +#define MASTER_DSP_READY_TIME_MASK GENMASK(14, 7) + +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x20 */ +/* ResetSyncOffset */ +#define RESET_SYNC_OFFSET_MASK GENMASK(11, 8) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x0 */ +/* FfeUpdGainForceVal */ +#define FFE_UPDATE_GAIN_FORCE_VAL_MASK GENMASK(9, 7) +/* FfeUpdGainForce */ +#define FFE_UPDATE_GAIN_FORCE BIT(6) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x6 */ +/* SS: Steady-state, KP: Proportional Gain */ +/* SSTrKp100 */ +#define SS_TR_KP100_MASK GENMASK(21, 19) +/* SSTrKf100 */ +#define SS_TR_KF100_MASK GENMASK(18, 16) +/* SSTrKp1000Mas */ +#define SS_TR_KP1000_MASTER_MASK GENMASK(15, 13) +/* SSTrKf1000Mas */ +#define SS_TR_KF1000_MASTER_MASK GENMASK(12, 10) +/* SSTrKp1000Slv */ +#define SS_TR_KP1000_SLAVE_MASK GENMASK(9, 7) +/* SSTrKf1000Slv */ +#define SS_TR_KF1000_SLAVE_MASK GENMASK(6, 4) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xd */ +/* RegEEE_st2TrKf1000 */ +#define EEE1000_STAGE2_TR_KF_MASK GENMASK(13, 11) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xf */ +/* RegEEE_slv_waketr_timer_tar */ +#define SLAVE_WAKETR_TIMER_MASK GENMASK(20, 11) +/* RegEEE_slv_remtx_timer_tar */ +#define SLAVE_REMTX_TIMER_MASK GENMASK(10, 1) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x10 */ +/* RegEEE_slv_wake_int_timer_tar */ +#define SLAVE_WAKEINT_TIMER_MASK GENMASK(10, 1) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x14 */ +/* RegEEE_trfreeze_timer2 */ +#define TR_FREEZE_TIMER2_MASK GENMASK(9, 0) + +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x1c */ +/* RegEEE100Stg1_tar */ +#define EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK GENMASK(8, 0) + #define ANALOG_INTERNAL_OPERATION_MAX_US 20 #define TXRESERVE_MIN 0 #define TXRESERVE_MAX 7 @@ -700,40 +784,41 @@ restore: static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x17, + SLAVE_DSP_READY_TIME_MASK | MASTER_DSP_READY_TIME_MASK, + FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x18) | + FIELD_PREP(MASTER_DSP_READY_TIME_MASK, 0x18)); /* EnabRandUpdTrig = 1 */ __phy_write(phydev, 0x11, 0x2f00); __phy_write(phydev, 0x12, 0xe); __phy_write(phydev, 0x10, 0x8fb0); - /* NormMseLoThresh = 85 */ - __phy_write(phydev, 0x11, 0x55a0); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x83aa); + __mtk_tr_modify(phydev, 0x0, 0x7, 0x15, + NORMAL_MSE_LO_THRESH_MASK, + FIELD_PREP(NORMAL_MSE_LO_THRESH_MASK, 0x55)); - /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); + __mtk_tr_modify(phydev, 0x2, 0xd, 0x0, + FFE_UPDATE_GAIN_FORCE_VAL_MASK, + FIELD_PREP(FFE_UPDATE_GAIN_FORCE_VAL_MASK, 0x4) | + FFE_UPDATE_GAIN_FORCE); /* TrFreeze = 0 (mt7988 default) */ __phy_write(phydev, 0x11, 0x0); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9686); - /* SSTrKp100 = 5 */ - /* SSTrKf100 = 6 */ - /* SSTrKp1000Mas = 5 */ - /* SSTrKf1000Mas = 6 */ - /* SSTrKp1000Slv = 5 */ - /* SSTrKf1000Slv = 6 */ - __phy_write(phydev, 0x11, 0xbaef); - __phy_write(phydev, 0x12, 0x2e); - __phy_write(phydev, 0x10, 0x968c); + __mtk_tr_modify(phydev, 0x2, 0xd, 0x6, + SS_TR_KP100_MASK | SS_TR_KF100_MASK | + SS_TR_KP1000_MASTER_MASK | SS_TR_KF1000_MASTER_MASK | + SS_TR_KP1000_SLAVE_MASK | SS_TR_KF1000_SLAVE_MASK, + FIELD_PREP(SS_TR_KP100_MASK, 0x5) | + FIELD_PREP(SS_TR_KF100_MASK, 0x6) | + FIELD_PREP(SS_TR_KP1000_MASTER_MASK, 0x5) | + FIELD_PREP(SS_TR_KF1000_MASTER_MASK, 0x6) | + FIELD_PREP(SS_TR_KP1000_SLAVE_MASK, 0x5) | + FIELD_PREP(SS_TR_KF1000_SLAVE_MASK, 0x6)); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); } @@ -756,27 +841,29 @@ static void mt7981_phy_finetune(struct phy_device *phydev) } phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x20, + RESET_SYNC_OFFSET_MASK, + FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x6)); - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x12, + VGA_DECIMATION_RATE_MASK, + FIELD_PREP(VGA_DECIMATION_RATE_MASK, 0x1)); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 */ - __phy_write(phydev, 0x11, 0xd10a); - __phy_write(phydev, 0x12, 0x34); - __phy_write(phydev, 0x10, 0x8f82); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x1, + MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK | + MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK, + FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x3) | + FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x2) | + FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x3) | + FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x2)); /* VcoSlicerThreshBitsHigh */ - __phy_write(phydev, 0x11, 0x5555); - __phy_write(phydev, 0x12, 0x55); - __phy_write(phydev, 0x10, 0x8ec0); + __mtk_tr_modify(phydev, 0x1, 0xd, 0x20, + VCO_SLICER_THRESH_HIGH_MASK, + FIELD_PREP(VCO_SLICER_THRESH_HIGH_MASK, 0x555555)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ @@ -828,25 +915,23 @@ static void mt7988_phy_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* ResetSyncOffset = 5 */ - __phy_write(phydev, 0x11, 0x500); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x20, + RESET_SYNC_OFFSET_MASK, + FIELD_PREP(RESET_SYNC_OFFSET_MASK, 0x5)); /* VgaDecRate is 1 at default on mt7988 */ - /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, - * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 - */ - __phy_write(phydev, 0x11, 0xb90a); - __phy_write(phydev, 0x12, 0x6f); - __phy_write(phydev, 0x10, 0x8f82); - - /* RemAckCntLimitCtrl = 1 */ - __phy_write(phydev, 0x11, 0xfbba); - __phy_write(phydev, 0x12, 0xc3); - __phy_write(phydev, 0x10, 0x87f8); - + __mtk_tr_modify(phydev, 0x1, 0xf, 0x1, + MRVL_TR_FIX_100KP_MASK | MRVL_TR_FIX_100KF_MASK | + MRVL_TR_FIX_1000KP_MASK | MRVL_TR_FIX_1000KF_MASK, + FIELD_PREP(MRVL_TR_FIX_100KP_MASK, 0x6) | + FIELD_PREP(MRVL_TR_FIX_100KF_MASK, 0x7) | + FIELD_PREP(MRVL_TR_FIX_1000KP_MASK, 0x6) | + FIELD_PREP(MRVL_TR_FIX_1000KF_MASK, 0x7)); + + __mtk_tr_modify(phydev, 0x0, 0xf, 0x3c, + REMOTE_ACK_COUNT_LIMIT_CTRL_MASK, + FIELD_PREP(REMOTE_ACK_COUNT_LIMIT_CTRL_MASK, 0x1)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ @@ -927,40 +1012,36 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9690); - /* REG_EEE_st2TrKf1000 = 2 */ - __phy_write(phydev, 0x11, 0x114f); - __phy_write(phydev, 0x12, 0x2); - __phy_write(phydev, 0x10, 0x969a); + __mtk_tr_modify(phydev, 0x2, 0xd, 0xd, + EEE1000_STAGE2_TR_KF_MASK, + FIELD_PREP(EEE1000_STAGE2_TR_KF_MASK, 0x2)); - /* RegEEE_slv_wake_tr_timer_tar = 6, RegEEE_slv_remtx_timer_tar = 20 */ - __phy_write(phydev, 0x11, 0x3028); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x969e); + __mtk_tr_modify(phydev, 0x2, 0xd, 0xf, + SLAVE_WAKETR_TIMER_MASK | SLAVE_REMTX_TIMER_MASK, + FIELD_PREP(SLAVE_WAKETR_TIMER_MASK, 0x6) | + FIELD_PREP(SLAVE_REMTX_TIMER_MASK, 0x14)); - /* RegEEE_slv_wake_int_timer_tar = 8 */ - __phy_write(phydev, 0x11, 0x5010); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96a0); + __mtk_tr_modify(phydev, 0x2, 0xd, 0x10, + SLAVE_WAKEINT_TIMER_MASK, + FIELD_PREP(SLAVE_WAKEINT_TIMER_MASK, 0x8)); - /* RegEEE_trfreeze_timer2 = 586 */ - __phy_write(phydev, 0x11, 0x24a); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96a8); + __mtk_tr_modify(phydev, 0x2, 0xd, 0x14, + TR_FREEZE_TIMER2_MASK, + FIELD_PREP(TR_FREEZE_TIMER2_MASK, 0x24a)); - /* RegEEE100Stg1_tar = 16 */ - __phy_write(phydev, 0x11, 0x3210); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96b8); + __mtk_tr_modify(phydev, 0x2, 0xd, 0x1c, + EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK, + FIELD_PREP(EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK, + 0x10)); /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ __phy_write(phydev, 0x11, 0x1463); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96ca); - /* DfeTailEnableVgaThresh1000 = 27 */ - __phy_write(phydev, 0x11, 0x36); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8f80); + __mtk_tr_modify(phydev, 0x1, 0xf, 0x0, + DFE_TAIL_EANBLE_VGA_TRHESH_1000, + FIELD_PREP(DFE_TAIL_EANBLE_VGA_TRHESH_1000, 0x1b)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_3); diff --git a/drivers/net/phy/mediatek/mtk-ge.c b/drivers/net/phy/mediatek/mtk-ge.c index 75e1b0387710..a24c5ba469ba 100644 --- a/drivers/net/phy/mediatek/mtk-ge.c +++ b/drivers/net/phy/mediatek/mtk-ge.c @@ -18,6 +18,10 @@ #define MTK_PHY_PAGE_EXTENDED_2A30 0x2a30 +/* Registers on Token Ring debug nodes */ +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x17 */ +#define SLAVE_DSP_READY_TIME_MASK GENMASK(22, 15) + /* Registers on MDIO_MMD_VEND1 */ #define MTK_PHY_GBE_MODE_TX_DELAY_SEL 0x13 #define MTK_PHY_TEST_MODE_TX_DELAY_SEL 0x14 @@ -42,11 +46,8 @@ static void mtk_gephy_config_init(struct phy_device *phydev) 0, MTK_PHY_ENABLE_DOWNSHIFT); /* Increase SlvDPSready time */ - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - __phy_write(phydev, 0x10, 0xafae); - __phy_write(phydev, 0x12, 0x2f); - __phy_write(phydev, 0x10, 0x8fae); - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + mtk_tr_modify(phydev, 0x1, 0xf, 0x17, SLAVE_DSP_READY_TIME_MASK, + FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x5e)); /* Adjust 100_mse_threshold */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, diff --git a/drivers/net/phy/mediatek/mtk-phy-lib.c b/drivers/net/phy/mediatek/mtk-phy-lib.c index 98a09d670e9c..7275e4ee2298 100644 --- a/drivers/net/phy/mediatek/mtk-phy-lib.c +++ b/drivers/net/phy/mediatek/mtk-phy-lib.c @@ -6,6 +6,69 @@ #include "mtk.h" +/* Difference between functions with mtk_tr* and __mtk_tr* prefixes is + * mtk_tr* functions: wrapped by page switching operations + * __mtk_tr* functions: no page switching operations + */ + +static void __mtk_tr_access(struct phy_device *phydev, bool read, u8 ch_addr, + u8 node_addr, u8 data_addr) +{ + u16 tr_cmd = BIT(15); /* bit 14 & 0 are reserved */ + + if (read) + tr_cmd |= BIT(13); + + tr_cmd |= (((ch_addr & 0x3) << 11) | + ((node_addr & 0xf) << 7) | + ((data_addr & 0x3f) << 1)); + dev_dbg(&phydev->mdio.dev, "tr_cmd: 0x%x\n", tr_cmd); + __phy_write(phydev, 0x10, tr_cmd); +} + +static void __mtk_tr_read(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u16 *tr_high, u16 *tr_low) +{ + __mtk_tr_access(phydev, true, ch_addr, node_addr, data_addr); + *tr_low = __phy_read(phydev, 0x11); + *tr_high = __phy_read(phydev, 0x12); + dev_dbg(&phydev->mdio.dev, "tr_high read: 0x%x, tr_low read: 0x%x\n", + *tr_high, *tr_low); +} + +static void __mtk_tr_write(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 tr_data) +{ + __phy_write(phydev, 0x11, tr_data & 0xffff); + __phy_write(phydev, 0x12, tr_data >> 16); + dev_dbg(&phydev->mdio.dev, "tr_high write: 0x%x, tr_low write: 0x%x\n", + tr_data >> 16, tr_data & 0xffff); + __mtk_tr_access(phydev, false, ch_addr, node_addr, data_addr); +} + +void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set) +{ + u32 tr_data; + u16 tr_high; + u16 tr_low; + + __mtk_tr_read(phydev, ch_addr, node_addr, data_addr, &tr_high, &tr_low); + tr_data = (tr_high << 16) | tr_low; + tr_data = (tr_data & ~mask) | set; + __mtk_tr_write(phydev, ch_addr, node_addr, data_addr, tr_data); +} +EXPORT_SYMBOL_GPL(__mtk_tr_modify); + +void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set) +{ + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, mask, set); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} +EXPORT_SYMBOL_GPL(mtk_tr_modify); + int mtk_phy_read_page(struct phy_device *phydev) { return __phy_read(phydev, MTK_EXT_PAGE_ACCESS); diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index a888e2e1dd6b..af44d1ad8c9e 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -68,6 +68,11 @@ struct mtk_socphy_priv { unsigned long led_state; }; +void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set); +void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 mask, u32 set); + int mtk_phy_read_page(struct phy_device *phydev); int mtk_phy_write_page(struct phy_device *phydev, int page); -- 2.51.0 From 40d33d6d3c90eb104c66e05cdc00db61268c93f9 Mon Sep 17 00:00:00 2001 From: Sky Huang Date: Thu, 13 Feb 2025 16:05:51 +0800 Subject: [PATCH 13/16] net: phy: mediatek: Add token ring set bit operation support Previously in mtk-ge-soc.c, we set some register bits via token ring, which were implemented in three __phy_write(). Now we can do the same thing via __mtk_tr_set_bits() helper. Signed-off-by: Sky Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250213080553.921434-4-SkyLake.Huang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek/mtk-ge-soc.c | 10 ++++++---- drivers/net/phy/mediatek/mtk-phy-lib.c | 7 +++++++ drivers/net/phy/mediatek/mtk.h | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index 5d7373793659..37777ad104d8 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -62,6 +62,10 @@ /* MasDSPreadyTime */ #define MASTER_DSP_READY_TIME_MASK GENMASK(14, 7) +/* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x18 */ +/* EnabRandUpdTrig */ +#define ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER BIT(8) + /* ch_addr = 0x1, node_addr = 0xf, data_addr = 0x20 */ /* ResetSyncOffset */ #define RESET_SYNC_OFFSET_MASK GENMASK(11, 8) @@ -789,10 +793,8 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) FIELD_PREP(SLAVE_DSP_READY_TIME_MASK, 0x18) | FIELD_PREP(MASTER_DSP_READY_TIME_MASK, 0x18)); - /* EnabRandUpdTrig = 1 */ - __phy_write(phydev, 0x11, 0x2f00); - __phy_write(phydev, 0x12, 0xe); - __phy_write(phydev, 0x10, 0x8fb0); + __mtk_tr_set_bits(phydev, 0x1, 0xf, 0x18, + ENABLE_RANDOM_UPDOWN_COUNTER_TRIGGER); __mtk_tr_modify(phydev, 0x0, 0x7, 0x15, NORMAL_MSE_LO_THRESH_MASK, diff --git a/drivers/net/phy/mediatek/mtk-phy-lib.c b/drivers/net/phy/mediatek/mtk-phy-lib.c index 7275e4ee2298..df8fdadcc0f4 100644 --- a/drivers/net/phy/mediatek/mtk-phy-lib.c +++ b/drivers/net/phy/mediatek/mtk-phy-lib.c @@ -69,6 +69,13 @@ void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, } EXPORT_SYMBOL_GPL(mtk_tr_modify); +void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 set) +{ + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, 0, set); +} +EXPORT_SYMBOL_GPL(__mtk_tr_set_bits); + int mtk_phy_read_page(struct phy_device *phydev) { return __phy_read(phydev, MTK_EXT_PAGE_ACCESS); diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index af44d1ad8c9e..2d8e5b934a02 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -72,6 +72,8 @@ void __mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, u8 data_addr, u32 mask, u32 set); void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, u8 data_addr, u32 mask, u32 set); +void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 set); int mtk_phy_read_page(struct phy_device *phydev); int mtk_phy_write_page(struct phy_device *phydev, int page); -- 2.51.0 From 4786eff288bcc77a5dbc2be2308f46f70e58600d Mon Sep 17 00:00:00 2001 From: Sky Huang Date: Thu, 13 Feb 2025 16:05:52 +0800 Subject: [PATCH 14/16] net: phy: mediatek: Add token ring clear bit operation support Similar to __mtk_tr_set_bits() support. Previously in mtk-ge-soc.c, we clear some register bits via token ring, which were also implemented in three __phy_write(). Now we can do the same thing via __mtk_tr_clr_bits() helper. Signed-off-by: Sky Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250213080553.921434-5-SkyLake.Huang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek/mtk-ge-soc.c | 30 +++++++++++++++----------- drivers/net/phy/mediatek/mtk-phy-lib.c | 7 ++++++ drivers/net/phy/mediatek/mtk.h | 2 ++ 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c index 37777ad104d8..9de6fbb45564 100644 --- a/drivers/net/phy/mediatek/mtk-ge-soc.c +++ b/drivers/net/phy/mediatek/mtk-ge-soc.c @@ -76,6 +76,10 @@ /* FfeUpdGainForce */ #define FFE_UPDATE_GAIN_FORCE BIT(6) +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x3 */ +/* TrFreeze */ +#define TR_FREEZE_MASK GENMASK(11, 0) + /* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x6 */ /* SS: Steady-state, KP: Proportional Gain */ /* SSTrKp100 */ @@ -91,6 +95,11 @@ /* SSTrKf1000Slv */ #define SS_TR_KF1000_SLAVE_MASK GENMASK(6, 4) +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x8 */ +/* clear this bit if wanna select from AFE */ +/* Regsigdet_sel_1000 */ +#define EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE BIT(4) + /* ch_addr = 0x2, node_addr = 0xd, data_addr = 0xd */ /* RegEEE_st2TrKf1000 */ #define EEE1000_STAGE2_TR_KF_MASK GENMASK(13, 11) @@ -113,6 +122,10 @@ /* RegEEE100Stg1_tar */ #define EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK GENMASK(8, 0) +/* ch_addr = 0x2, node_addr = 0xd, data_addr = 0x25 */ +/* REGEEE_wake_slv_tr_wait_dfesigdet_en */ +#define WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN BIT(11) + #define ANALOG_INTERNAL_OPERATION_MAX_US 20 #define TXRESERVE_MIN 0 #define TXRESERVE_MAX 7 @@ -805,10 +818,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) FIELD_PREP(FFE_UPDATE_GAIN_FORCE_VAL_MASK, 0x4) | FFE_UPDATE_GAIN_FORCE); - /* TrFreeze = 0 (mt7988 default) */ - __phy_write(phydev, 0x11, 0x0); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9686); + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x3, TR_FREEZE_MASK); __mtk_tr_modify(phydev, 0x2, 0xd, 0x6, SS_TR_KP100_MASK | SS_TR_KF100_MASK | @@ -1009,10 +1019,8 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_TR_READY_SKIP_AFE_WAKEUP); phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* Regsigdet_sel_1000 = 0 */ - __phy_write(phydev, 0x11, 0xb); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9690); + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x8, + EEE1000_SELECT_SIGNAL_DETECTION_FROM_DFE); __mtk_tr_modify(phydev, 0x2, 0xd, 0xd, EEE1000_STAGE2_TR_KF_MASK, @@ -1036,10 +1044,8 @@ static void mt798x_phy_eee(struct phy_device *phydev) FIELD_PREP(EEE100_LPSYNC_STAGE1_UPDATE_TIMER_MASK, 0x10)); - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ - __phy_write(phydev, 0x11, 0x1463); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x96ca); + __mtk_tr_clr_bits(phydev, 0x2, 0xd, 0x25, + WAKE_SLAVE_TR_WAIT_DFE_DETECTION_EN); __mtk_tr_modify(phydev, 0x1, 0xf, 0x0, DFE_TAIL_EANBLE_VGA_TRHESH_1000, diff --git a/drivers/net/phy/mediatek/mtk-phy-lib.c b/drivers/net/phy/mediatek/mtk-phy-lib.c index df8fdadcc0f4..dfd0f4e439a2 100644 --- a/drivers/net/phy/mediatek/mtk-phy-lib.c +++ b/drivers/net/phy/mediatek/mtk-phy-lib.c @@ -76,6 +76,13 @@ void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, } EXPORT_SYMBOL_GPL(__mtk_tr_set_bits); +void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 clr) +{ + __mtk_tr_modify(phydev, ch_addr, node_addr, data_addr, clr, 0); +} +EXPORT_SYMBOL_GPL(__mtk_tr_clr_bits); + int mtk_phy_read_page(struct phy_device *phydev) { return __phy_read(phydev, MTK_EXT_PAGE_ACCESS); diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index 2d8e5b934a02..4e4468dc0234 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -74,6 +74,8 @@ void mtk_tr_modify(struct phy_device *phydev, u8 ch_addr, u8 node_addr, u8 data_addr, u32 mask, u32 set); void __mtk_tr_set_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, u8 data_addr, u32 set); +void __mtk_tr_clr_bits(struct phy_device *phydev, u8 ch_addr, u8 node_addr, + u8 data_addr, u32 clr); int mtk_phy_read_page(struct phy_device *phydev); int mtk_phy_write_page(struct phy_device *phydev, int page); -- 2.51.0 From be378ebd6cfb8e369d4aa03a551e594d00debda5 Mon Sep 17 00:00:00 2001 From: Sky Huang Date: Thu, 13 Feb 2025 16:05:53 +0800 Subject: [PATCH 15/16] net: phy: mediatek: Move some macros to phy-lib for later use Move some macros to phy-lib because MediaTek's 2.5G built-in ethernet PHY will also use them. Signed-off-by: Sky Huang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250213080553.921434-6-SkyLake.Huang@mediatek.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mediatek/mtk-ge.c | 4 ---- drivers/net/phy/mediatek/mtk.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mediatek/mtk-ge.c b/drivers/net/phy/mediatek/mtk-ge.c index a24c5ba469ba..73d9b72f9d9e 100644 --- a/drivers/net/phy/mediatek/mtk-ge.c +++ b/drivers/net/phy/mediatek/mtk-ge.c @@ -8,10 +8,6 @@ #define MTK_GPHY_ID_MT7530 0x03a29412 #define MTK_GPHY_ID_MT7531 0x03a29441 -#define MTK_PHY_PAGE_EXTENDED_1 0x0001 -#define MTK_PHY_AUX_CTRL_AND_STATUS 0x14 -#define MTK_PHY_ENABLE_DOWNSHIFT BIT(4) - #define MTK_PHY_PAGE_EXTENDED_2 0x0002 #define MTK_PHY_PAGE_EXTENDED_3 0x0003 #define MTK_PHY_RG_LPI_PCS_DSP_CTRL_REG11 0x11 diff --git a/drivers/net/phy/mediatek/mtk.h b/drivers/net/phy/mediatek/mtk.h index 4e4468dc0234..320f76ffa81f 100644 --- a/drivers/net/phy/mediatek/mtk.h +++ b/drivers/net/phy/mediatek/mtk.h @@ -8,7 +8,11 @@ #ifndef _MTK_EPHY_H_ #define _MTK_EPHY_H_ +#define MTK_PHY_AUX_CTRL_AND_STATUS 0x14 +#define MTK_PHY_ENABLE_DOWNSHIFT BIT(4) + #define MTK_EXT_PAGE_ACCESS 0x1f +#define MTK_PHY_PAGE_EXTENDED_1 0x0001 #define MTK_PHY_PAGE_STANDARD 0x0000 #define MTK_PHY_PAGE_EXTENDED_52B5 0x52b5 -- 2.51.0 From 9dd3d5d258aceb37bdf09c8b91fa448f58ea81f0 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Thu, 13 Feb 2025 11:46:38 +0200 Subject: [PATCH 16/16] net/mlx5: Apply rate-limiting to high temperature warning Wrap the high temperature warning in a temperature event with a call to net_ratelimit() to prevent flooding the kernel log with repeated warning messages when temperature exceeds the threshold multiple times within a short duration. Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan Reviewed-by: Mateusz Polchlopek Link: https://patch.msgid.link/20250213094641.226501-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/events.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index d91ea53eb394..e8beb6289d01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -165,9 +165,10 @@ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - mlx5_core_warn(events->dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); + if (net_ratelimit()) + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); return NOTIFY_OK; } -- 2.51.0