From 561939ed44932da639ba703ffcd4d4d5ff2c7569 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 9 Jun 2025 11:32:35 -0400 Subject: [PATCH 01/16] net: remove unused sock_enable_timestamps This function was introduced in commit 783da70e8396 ("net: add sock_enable_timestamps"), with one caller in rxrpc. That only caller was removed in commit 7903d4438b3f ("rxrpc: Don't use received skbuff timestamps"). Signed-off-by: Willem de Bruijn Reviewed-by: Jason Xing Link: https://patch.msgid.link/20250609153254.3504909-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 1 - net/core/sock.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..85e17da5c9db 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2982,7 +2982,6 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool); int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping); -void sock_enable_timestamps(struct sock *sk); #if defined(CONFIG_CGROUP_BPF) void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op); #else diff --git a/net/core/sock.c b/net/core/sock.c index 3b409bc8ef6d..502042a0d3b5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -837,14 +837,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) } } -void sock_enable_timestamps(struct sock *sk) -{ - lock_sock(sk); - __sock_set_timestamps(sk, true, false, true); - release_sock(sk); -} -EXPORT_SYMBOL(sock_enable_timestamps); - void sock_set_timestamp(struct sock *sk, int optname, bool valbool) { switch (optname) { -- 2.51.0 From 2bc64b89c4c4073ee8f9543373c64da9b6bbe5e0 Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:52 +0300 Subject: [PATCH 02/16] queue_api: add subqueue variant netif_subqueue_sent Add a new function, netif_subqueue_sent, which is a wrapper for netdev_tx_sent_queue. Drivers that use the subqueue variant macros, netif_subqueue_xxx, identify queue by index and are not required to obtain struct netdev_queue explicitly. Such drivers still need to call netdev_tx_sent_queue which is a counterpart of netif_subqueue_completed_wake. Allowing drivers to use a subqueue variant for this purpose improves their code consistency by always referring to queue by its index. Signed-off-by: Gur Stavi Link: https://patch.msgid.link/909a5c92db49cad39f0954d6cb86775e6480ef4c.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index ba2eaf39089b..6e835972abd1 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -294,6 +294,15 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue, netif_txq_try_stop(_txq, get_desc, start_thrs); \ }) +static inline void netif_subqueue_sent(const struct net_device *dev, + unsigned int idx, unsigned int bytes) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, idx); + netdev_tx_sent_queue(txq, bytes); +} + #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ({ \ struct netdev_queue *_txq; \ -- 2.51.0 From eb89bc3744f35383b34b9df055622831ce24dc40 Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:53 +0300 Subject: [PATCH 03/16] hinic3: use netif_subqueue_sent api Improve consistency of code by using only netif_subqueue variant apis Signed-off-by: Gur Stavi Link: https://patch.msgid.link/5fd897b75729cf078385aacd9ed40091314ea63d.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic3/hinic3_tx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index ae08257dd1d2..7b6f101da313 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -542,8 +542,7 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, goto err_drop_pkt; } - netdev_tx_sent_queue(netdev_get_tx_queue(netdev, txq->sq->q_id), - skb->len); + netif_subqueue_sent(netdev, txq->sq->q_id, skb->len); netif_subqueue_maybe_stop(netdev, tx_q->sq->q_id, hinic3_wq_free_wqebbs(&tx_q->sq->wq), tx_q->tx_stop_thrs, -- 2.51.0 From 48b9ce0a7c721c4c65697de8396468fae67631de Mon Sep 17 00:00:00 2001 From: Gur Stavi Date: Mon, 9 Jun 2025 18:07:54 +0300 Subject: [PATCH 04/16] hinic3: remove tx_q name collision hack A local variable of tx_q worked around name collision with internal txq variable in netif_subqueue macros. This workaround is no longer needed. Signed-off-by: Gur Stavi Link: https://patch.msgid.link/6376db2a39b8d3bf2fa893f58f56246bed128d5d.1749038081.git.gur.stavi@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/huawei/hinic3/hinic3_tx.c | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index 7b6f101da313..3f7f73430be4 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -482,7 +482,6 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, { struct hinic3_sq_wqe_combo wqe_combo = {}; struct hinic3_tx_info *tx_info; - struct hinic3_txq *tx_q = txq; u32 offload, queue_info = 0; struct hinic3_sq_task task; u16 wqebb_cnt, num_sge; @@ -506,9 +505,9 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, if (likely(wqebb_cnt > txq->tx_stop_thrs)) txq->tx_stop_thrs = min(wqebb_cnt, txq->tx_start_thrs); - netif_subqueue_try_stop(netdev, tx_q->sq->q_id, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_start_thrs); + netif_subqueue_try_stop(netdev, txq->sq->q_id, + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_start_thrs); return NETDEV_TX_BUSY; } @@ -543,10 +542,10 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, } netif_subqueue_sent(netdev, txq->sq->q_id, skb->len); - netif_subqueue_maybe_stop(netdev, tx_q->sq->q_id, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_stop_thrs, - tx_q->tx_start_thrs); + netif_subqueue_maybe_stop(netdev, txq->sq->q_id, + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_stop_thrs, + txq->tx_start_thrs); hinic3_prepare_sq_ctrl(&wqe_combo, queue_info, num_sge, owner); hinic3_write_db(txq->sq, 0, DB_CFLAG_DP_SQ, @@ -630,7 +629,6 @@ bool hinic3_tx_poll(struct hinic3_txq *txq, int budget) struct net_device *netdev = txq->netdev; u16 hw_ci, sw_ci, q_id = txq->sq->q_id; struct hinic3_tx_info *tx_info; - struct hinic3_txq *tx_q = txq; unsigned int bytes_compl = 0; unsigned int pkts = 0; u16 wqebb_cnt = 0; @@ -662,8 +660,8 @@ bool hinic3_tx_poll(struct hinic3_txq *txq, int budget) hinic3_wq_put_wqebbs(&txq->sq->wq, wqebb_cnt); netif_subqueue_completed_wake(netdev, q_id, pkts, bytes_compl, - hinic3_wq_free_wqebbs(&tx_q->sq->wq), - tx_q->tx_start_thrs); + hinic3_wq_free_wqebbs(&txq->sq->wq), + txq->tx_start_thrs); return pkts == HINIC3_TX_POLL_WEIGHT; } -- 2.51.0 From d0976b43956ee8c8bd093223df9115bfcf63dfe5 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 9 Jun 2025 21:21:49 +0530 Subject: [PATCH 05/16] octeontx2: Annotate mmio regions as __iomem This patch removes unnecessary typecasts by marking the mbox_regions array as __iomem since it is used to store pointers to memory-mapped I/O (MMIO) regions. Also simplified the call to readq() in PF driver by removing redundant type casts. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1749484309-3434-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 12 ++++++------ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index a8025f0486c9..43eea74bf541 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2364,7 +2364,7 @@ static inline void rvu_afvf_mbox_up_handler(struct work_struct *work) __rvu_mbox_up_handler(mwork, TYPE_AFVF); } -static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, +static int rvu_get_mbox_regions(struct rvu *rvu, void __iomem **mbox_addr, int num, int type, unsigned long *pf_bmap) { struct rvu_hwinfo *hw = rvu->hw; @@ -2389,7 +2389,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, bar4 = rvupf_read64(rvu, RVU_PF_VF_BAR4_ADDR); bar4 += region * MBOX_SIZE; } - mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + mbox_addr[region] = ioremap_wc(bar4, MBOX_SIZE); if (!mbox_addr[region]) goto error; } @@ -2412,7 +2412,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, RVU_AF_PF_BAR4_ADDR); bar4 += region * MBOX_SIZE; } - mbox_addr[region] = (void *)ioremap_wc(bar4, MBOX_SIZE); + mbox_addr[region] = ioremap_wc(bar4, MBOX_SIZE); if (!mbox_addr[region]) goto error; } @@ -2420,7 +2420,7 @@ static int rvu_get_mbox_regions(struct rvu *rvu, void **mbox_addr, error: while (region--) - iounmap((void __iomem *)mbox_addr[region]); + iounmap(mbox_addr[region]); return -ENOMEM; } @@ -2430,10 +2430,10 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, void (mbox_up_handler)(struct work_struct *)) { int err = -EINVAL, i, dir, dir_up; + void __iomem **mbox_regions; void __iomem *reg_base; struct rvu_work *mwork; unsigned long *pf_bmap; - void **mbox_regions; const char *name; u64 cfg; @@ -2456,7 +2456,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw, mutex_init(&rvu->mbox_lock); - mbox_regions = kcalloc(num, sizeof(void *), GFP_KERNEL); + mbox_regions = kcalloc(num, sizeof(void __iomem *), GFP_KERNEL); if (!mbox_regions) { err = -ENOMEM; goto free_bitmap; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index db7c466fdc39..83deebc37b34 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -602,8 +602,7 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + MBOX_SIZE; else - base = readq((void __iomem *)((u64)pf->reg_base + - RVU_PF_VF_BAR4_ADDR)); + base = readq(pf->reg_base + RVU_PF_VF_BAR4_ADDR); hwbase = ioremap_wc(base, MBOX_SIZE * pf->total_vfs); if (!hwbase) { -- 2.51.0 From c4246f4cce05f1134fb9ea82460b5690ab6710a5 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 9 Jun 2025 21:23:41 +0530 Subject: [PATCH 06/16] octeontx2-pf: Avoid typecasts by simplifying otx2_atomic64_add macro Just because otx2_atomic64_add is using u64 pointer as argument all callers has to typecast __iomem void pointers which inturn causing sparse warnings. Fix those by changing otx2_atomic64_add argument to void pointer. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1749484421-3607-1-git-send-email-sbhatta@marvell.com Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/nic/otx2_common.c | 17 +++++++++-------- .../marvell/octeontx2/nic/otx2_common.h | 11 ++++++++--- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 4 ++-- .../net/ethernet/marvell/octeontx2/nic/qos_sq.c | 5 +++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6f572589f1e5..713928d81b9e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -28,12 +28,12 @@ static void otx2_nix_rq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) { u64 incr = (u64)qidx << 32; - u64 *ptr; + void __iomem *ptr; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_OCTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_OCTS); stats->bytes = otx2_atomic64_add(incr, ptr); - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_PKTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_RQ_OP_PKTS); stats->pkts = otx2_atomic64_add(incr, ptr); } @@ -41,12 +41,12 @@ static void otx2_nix_sq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) { u64 incr = (u64)qidx << 32; - u64 *ptr; + void __iomem *ptr; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_OCTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_OCTS); stats->bytes = otx2_atomic64_add(incr, ptr); - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_PKTS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_PKTS); stats->pkts = otx2_atomic64_add(incr, ptr); } @@ -860,9 +860,10 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) { int qidx, sqe_tail, sqe_head; struct otx2_snd_queue *sq; - u64 incr, *ptr, val; + void __iomem *ptr; + u64 incr, val; - ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { sq = &pfvf->qset.sq[qidx]; if (!sq->sqb_ptrs) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index ca0e6ab12ceb..a2a7fc99695d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -730,8 +730,9 @@ static inline void otx2_write128(u64 lo, u64 hi, void __iomem *addr) ::[x0]"r"(lo), [x1]"r"(hi), [p1]"r"(addr)); } -static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) +static inline u64 otx2_atomic64_add(u64 incr, void __iomem *addr) { + u64 __iomem *ptr = addr; u64 result; __asm__ volatile(".cpu generic+lse\n" @@ -744,7 +745,11 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) #else #define otx2_write128(lo, hi, addr) writeq((hi) | (lo), addr) -#define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; }) + +static inline u64 otx2_atomic64_add(u64 incr, void __iomem *addr) +{ + return 0; +} #endif static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, @@ -794,7 +799,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { - u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); + void __iomem *ptr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); u64 incr = (u64)aura | BIT_ULL(63); return otx2_atomic64_add(incr, ptr); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 83deebc37b34..07da4d6dbbc9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1322,8 +1322,8 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) { struct otx2_nic *pf = data; struct otx2_snd_queue *sq; - u64 val, *ptr; - u64 qidx = 0; + void __iomem *ptr; + u64 val, qidx = 0; /* CQ */ for (qidx = 0; qidx < pf->qset.cq_cnt; qidx++) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index 58d572ce08ef..2872adabc830 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -151,9 +151,10 @@ static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx) static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx) { int sqe_tail, sqe_head; - u64 incr, *ptr, val; + void __iomem *ptr; + u64 incr, val; - ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + ptr = otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); incr = (u64)qidx << 32; val = otx2_atomic64_add(incr, ptr); sqe_head = (val >> 20) & 0x3F; -- 2.51.0 From 7fc18f9476256c03755d93b1cec0d42cf64d850a Mon Sep 17 00:00:00 2001 From: Moon Yeounsu Date: Tue, 10 Jun 2025 09:01:30 +0900 Subject: [PATCH 07/16] net: dlink: enable RMON MMIO access on supported devices Enable memory-mapped I/O access to RMON statistics registers for devices known to work correctly. Currently, only the D-Link DGE-550T (`0x4000`) with PCI revision A3 (`0x0c`) is allowed. To avoid issues on other hardware, a runtime check was added to restrict MMIO usage. The `MEM_MAPPING` macro was removed in favor of runtime detection. To access RMON registers, the code `dw32(RmonStatMask, 0x0007ffff);` must also be skipped, so this patch conditionally disables it as well. Tested-on: D-Link DGE-550T Rev-A3 Signed-off-by: Moon Yeounsu Link: https://patch.msgid.link/20250610000130.49065-2-yyyynoom@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 57 ++++++++++++++++--------------- drivers/net/ethernet/dlink/dl2k.h | 2 ++ 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 038a0400c1f9..ea8361ba6cad 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -99,6 +99,13 @@ static const struct net_device_ops netdev_ops = { .ndo_tx_timeout = rio_tx_timeout, }; +static bool is_support_rmon_mmio(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_DLINK && + pdev->device == 0x4000 && + pdev->revision == 0x0c; +} + static int rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -131,18 +138,22 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) np = netdev_priv(dev); + if (is_support_rmon_mmio(pdev)) + np->rmon_enable = true; + /* IO registers range. */ ioaddr = pci_iomap(pdev, 0, 0); if (!ioaddr) goto err_out_dev; np->eeprom_addr = ioaddr; -#ifdef MEM_MAPPING - /* MM registers range. */ - ioaddr = pci_iomap(pdev, 1, 0); - if (!ioaddr) - goto err_out_iounmap; -#endif + if (np->rmon_enable) { + /* MM registers range. */ + ioaddr = pci_iomap(pdev, 1, 0); + if (!ioaddr) + goto err_out_iounmap; + } + np->ioaddr = ioaddr; np->chip_id = chip_idx; np->pdev = pdev; @@ -289,9 +300,8 @@ err_out_unmap_tx: dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, np->tx_ring_dma); err_out_iounmap: -#ifdef MEM_MAPPING - pci_iounmap(pdev, np->ioaddr); -#endif + if (np->rmon_enable) + pci_iounmap(pdev, np->ioaddr); pci_iounmap(pdev, np->eeprom_addr); err_out_dev: free_netdev (dev); @@ -578,7 +588,8 @@ static void rio_hw_init(struct net_device *dev) dw8(TxDMAPollPeriod, 0xff); dw8(RxDMABurstThresh, 0x30); dw8(RxDMAUrgentThresh, 0x30); - dw32(RmonStatMask, 0x0007ffff); + if (!np->rmon_enable) + dw32(RmonStatMask, 0x0007ffff); /* clear statistics */ clear_stats (dev); @@ -1076,9 +1087,6 @@ get_stats (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; -#ifdef MEM_MAPPING - int i; -#endif unsigned int stat_reg; unsigned long flags; @@ -1123,10 +1131,10 @@ get_stats (struct net_device *dev) dr16(MacControlFramesXmtd); dr16(FramesWEXDeferal); -#ifdef MEM_MAPPING - for (i = 0x100; i <= 0x150; i += 4) - dr32(i); -#endif + if (np->rmon_enable) + for (int i = 0x100; i <= 0x150; i += 4) + dr32(i); + dr16(TxJumboFrames); dr16(RxJumboFrames); dr16(TCPCheckSumErrors); @@ -1143,9 +1151,6 @@ clear_stats (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; -#ifdef MEM_MAPPING - int i; -#endif /* All statistics registers need to be acknowledged, else statistic overflow could cause problems */ @@ -1181,10 +1186,9 @@ clear_stats (struct net_device *dev) dr16(BcstFramesXmtdOk); dr16(MacControlFramesXmtd); dr16(FramesWEXDeferal); -#ifdef MEM_MAPPING - for (i = 0x100; i <= 0x150; i += 4) - dr32(i); -#endif + if (np->rmon_enable) + for (int i = 0x100; i <= 0x150; i += 4) + dr32(i); dr16(TxJumboFrames); dr16(RxJumboFrames); dr16(TCPCheckSumErrors); @@ -1810,9 +1814,8 @@ rio_remove1 (struct pci_dev *pdev) np->rx_ring_dma); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, np->tx_ring, np->tx_ring_dma); -#ifdef MEM_MAPPING - pci_iounmap(pdev, np->ioaddr); -#endif + if (np->rmon_enable) + pci_iounmap(pdev, np->ioaddr); pci_iounmap(pdev, np->eeprom_addr); free_netdev (dev); pci_release_regions (pdev); diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h index ba679025e866..4788cc94639d 100644 --- a/drivers/net/ethernet/dlink/dl2k.h +++ b/drivers/net/ethernet/dlink/dl2k.h @@ -403,6 +403,8 @@ struct netdev_private { u16 negotiate; /* Negotiated media */ int phy_addr; /* PHY addresses. */ u16 led_mode; /* LED mode read from EEPROM (IP1000A only) */ + + bool rmon_enable; }; /* The station address location in the EEPROM. */ -- 2.51.0 From 689883de94dd8a43a0ea77311327effab240afda Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 9 Jun 2025 17:30:15 +0000 Subject: [PATCH 08/16] net: stop napi kthreads when THREADED napi is disabled Once the THREADED napi is disabled, the napi kthread should also be stopped. Keeping the kthread intact after disabling THREADED napi makes the PID of this kthread show up in the output of netlink 'napi-get' and ps -ef output. The is discussed in the patch below: https://lore.kernel.org/all/20250502191548.559cc416@kernel.org NAPI kthread should stop only if, - There are no pending napi poll scheduled for this thread. - There are no new napi poll scheduled for this thread while it has stopped. - The ____napi_schedule can correctly fallback to the softirq for napi polling. Since napi_schedule_prep provides mutual exclusion over STATE_SCHED bit, it is safe to unset the STATE_THREADED when SCHED_THREADED is set or the SCHED bit is not set. SCHED_THREADED being set means that SCHED is already set and the kthread owns this napi. To disable threaded napi, unset STATE_THREADED bit safely if SCHED_THREADED is set or SCHED is unset. Once STATE_THREADED is unset safely then wait for the kthread to unset the SCHED_THREADED bit so it safe to stop the kthread. Add a new test in nl_netdev to verify this behaviour. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..6 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.nsim_rxq_reset_down # Totals: pass:6 fail:0 xfail:0 xpass:0 skip:0 error:0 Ran neper for 300 seconds and did enable/disable of thread napi in a loop continuously. Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250609173015.3851695-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 45 ++++++++++++++++++++++-- tools/testing/selftests/net/nl_netdev.py | 38 ++++++++++++++++++-- 2 files changed, 79 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index be97c440ecd5..5baa4691074f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6926,6 +6926,43 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) return HRTIMER_NORESTART; } +static void napi_stop_kthread(struct napi_struct *napi) +{ + unsigned long val, new; + + /* Wait until the napi STATE_THREADED is unset. */ + while (true) { + val = READ_ONCE(napi->state); + + /* If napi kthread own this napi or the napi is idle, + * STATE_THREADED can be unset here. + */ + if ((val & NAPIF_STATE_SCHED_THREADED) || + !(val & NAPIF_STATE_SCHED)) { + new = val & (~NAPIF_STATE_THREADED); + } else { + msleep(20); + continue; + } + + if (try_cmpxchg(&napi->state, &val, new)) + break; + } + + /* Once STATE_THREADED is unset, wait for SCHED_THREADED to be unset by + * the kthread. + */ + while (true) { + if (!test_bit(NAPIF_STATE_SCHED_THREADED, &napi->state)) + break; + + msleep(20); + } + + kthread_stop(napi->thread); + napi->thread = NULL; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6961,8 +6998,12 @@ int dev_set_threaded(struct net_device *dev, bool threaded) * softirq mode will happen in the next round of napi_schedule(). * This should not cause hiccups/stalls to the live traffic. */ - list_for_each_entry(napi, &dev->napi_list, dev_list) - assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (!threaded && napi->thread) + napi_stop_kthread(napi); + else + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } return err; } diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index beaee5e4e2aa..c9109627a741 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -34,6 +35,39 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: """ @@ -122,7 +156,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - nsim_rxq_reset_down], + dev_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() -- 2.51.0 From 265c6ff0f8c2feef5981e9a1aedf6b5b476d7492 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:00:01 -0700 Subject: [PATCH 09/16] selftests/net: packetdrill: more xfail changes Most of the packetdrill tests have not flaked once last week. Add the few which did to the XFAIL list. Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610000001.1970934-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/packetdrill/ksft_runner.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..c5b01e1bd4c7 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_blocking_blocking-connect.pkt" + "tcp_blocking_blocking-read.pkt" "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_sack_sack-route-refresh-ip-tos.pkt" "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_cl.*.pkt" "tcp_zerocopy_epoll_.*.pkt" "tcp_tcp_info_tcp-info-.*-limited.pkt" ) -- 2.51.0 From 0097c4195b1d0ca57d15979626c769c74747b5a0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Jun 2025 22:28:40 +0200 Subject: [PATCH 10/16] net: airoha: Add PPPoE offload support Introduce flowtable hw acceleration for PPPoE traffic. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250609-b4-airoha-flowtable-pppoe-v1-1-1520fa7711b4@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_ppe.c | 31 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 9067d2fc7706..50d816344b1f 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -232,6 +232,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, FIELD_PREP(AIROHA_FOE_IB1_BIND_UDP, l4proto == IPPROTO_UDP) | FIELD_PREP(AIROHA_FOE_IB1_BIND_VLAN_LAYER, data->vlan.num) | FIELD_PREP(AIROHA_FOE_IB1_BIND_VPM, data->vlan.num) | + FIELD_PREP(AIROHA_FOE_IB1_BIND_PPPOE, data->pppoe.num) | AIROHA_FOE_IB1_BIND_TTL; hwe->ib1 = val; @@ -281,33 +282,42 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, hwe->ipv6.data = qdata; hwe->ipv6.ib2 = val; l2 = &hwe->ipv6.l2; + l2->etype = ETH_P_IPV6; } else { hwe->ipv4.data = qdata; hwe->ipv4.ib2 = val; l2 = &hwe->ipv4.l2.common; + l2->etype = ETH_P_IP; } l2->dest_mac_hi = get_unaligned_be32(data->eth.h_dest); l2->dest_mac_lo = get_unaligned_be16(data->eth.h_dest + 4); if (type <= PPE_PKT_TYPE_IPV4_DSLITE) { + struct airoha_foe_mac_info *mac_info; + l2->src_mac_hi = get_unaligned_be32(data->eth.h_source); hwe->ipv4.l2.src_mac_lo = get_unaligned_be16(data->eth.h_source + 4); + + mac_info = (struct airoha_foe_mac_info *)l2; + mac_info->pppoe_id = data->pppoe.sid; } else { - l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id); + l2->src_mac_hi = FIELD_PREP(AIROHA_FOE_MAC_SMAC_ID, smac_id) | + FIELD_PREP(AIROHA_FOE_MAC_PPPOE_ID, + data->pppoe.sid); } if (data->vlan.num) { - l2->etype = dsa_port >= 0 ? BIT(dsa_port) : 0; l2->vlan1 = data->vlan.hdr[0].id; if (data->vlan.num == 2) l2->vlan2 = data->vlan.hdr[1].id; - } else if (dsa_port >= 0) { - l2->etype = BIT(15) | BIT(dsa_port); - } else if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T) { - l2->etype = ETH_P_IPV6; - } else { - l2->etype = ETH_P_IP; + } + + if (dsa_port >= 0) { + l2->etype = BIT(dsa_port); + l2->etype |= !data->vlan.num ? BIT(15) : 0; + } else if (data->pppoe.num) { + l2->etype = ETH_P_PPP_SES; } return 0; @@ -957,6 +967,11 @@ static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port, case FLOW_ACTION_VLAN_POP: break; case FLOW_ACTION_PPPOE_PUSH: + if (data.pppoe.num == 1 || data.vlan.num == 2) + return -EOPNOTSUPP; + + data.pppoe.sid = act->pppoe.sid; + data.pppoe.num++; break; default: return -EOPNOTSUPP; -- 2.51.0 From fe4d9e8394ff04af47cc3d040e03496d94916504 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:05 -0400 Subject: [PATCH 11/16] igc: move TXDCTL and RXDCTL related macros Move and consolidate TXDCTL and RXDCTL macros in preparation for upcoming TXDCTL changes. This improves organization and readability. Reviewed-by: Simon Horman Signed-off-by: Faizal Rahim Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 11 ++++++++++- drivers/net/ethernet/intel/igc/igc_base.h | 8 -------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 859a15e4ccba..25695eada563 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -487,10 +487,19 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) */ #define IGC_RX_PTHRESH 8 #define IGC_RX_HTHRESH 8 +#define IGC_RX_WTHRESH 4 +/* Ena specific Rx Queue */ +#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 +/* Receive Software Flush */ +#define IGC_RXDCTL_SWFLUSH 0x04000000 + #define IGC_TX_PTHRESH 8 #define IGC_TX_HTHRESH 1 -#define IGC_RX_WTHRESH 4 #define IGC_TX_WTHRESH 16 +/* Ena specific Tx Queue */ +#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 +/* Transmit Software Flush */ +#define IGC_TXDCTL_SWFLUSH 0x04000000 #define IGC_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index 6320eabb72fe..eaf17cd031c3 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -86,14 +86,6 @@ union igc_adv_rx_desc { } wb; /* writeback */ }; -/* Additional Transmit Descriptor Control definitions */ -#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ -#define IGC_TXDCTL_SWFLUSH 0x04000000 /* Transmit Software Flush */ - -/* Additional Receive Descriptor Control definitions */ -#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ -#define IGC_RXDCTL_SWFLUSH 0x04000000 /* Receive Software Flush */ - /* SRRCTL bit definitions */ #define IGC_SRRCTL_BSIZEPKT_MASK GENMASK(6, 0) #define IGC_SRRCTL_BSIZEPKT(x) FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \ -- 2.51.0 From 4cdb4ef8a9ff10b5ee829549561296b117a72bb1 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:06 -0400 Subject: [PATCH 12/16] igc: add DCTL prefix to related macros Rename macros to use the DCTL prefix for consistency with existing macros that reference the same register. This prepares for an upcoming patch that adds new fields to TXDCTL. Reviewed-by: Simon Horman Signed-off-by: Faizal Rahim Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 12 ++++++------ drivers/net/ethernet/intel/igc/igc_main.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 25695eada563..db1e2db1619e 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -485,17 +485,17 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) * descriptors until either it has this many to write back, or the * ITR timer expires. */ -#define IGC_RX_PTHRESH 8 -#define IGC_RX_HTHRESH 8 -#define IGC_RX_WTHRESH 4 +#define IGC_RXDCTL_PTHRESH 8 +#define IGC_RXDCTL_HTHRESH 8 +#define IGC_RXDCTL_WTHRESH 4 /* Ena specific Rx Queue */ #define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Receive Software Flush */ #define IGC_RXDCTL_SWFLUSH 0x04000000 -#define IGC_TX_PTHRESH 8 -#define IGC_TX_HTHRESH 1 -#define IGC_TX_WTHRESH 16 +#define IGC_TXDCTL_PTHRESH 8 +#define IGC_TXDCTL_HTHRESH 1 +#define IGC_TXDCTL_WTHRESH 16 /* Ena specific Tx Queue */ #define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Transmit Software Flush */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 27575a1e1777..4f1a8bc006c6 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -683,9 +683,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, wr32(IGC_SRRCTL(reg_idx), srrctl); - rxdctl |= IGC_RX_PTHRESH; - rxdctl |= IGC_RX_HTHRESH << 8; - rxdctl |= IGC_RX_WTHRESH << 16; + rxdctl |= IGC_RXDCTL_PTHRESH; + rxdctl |= IGC_RXDCTL_HTHRESH << 8; + rxdctl |= IGC_RXDCTL_WTHRESH << 16; /* initialize rx_buffer_info */ memset(ring->rx_buffer_info, 0, @@ -749,9 +749,9 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, wr32(IGC_TDH(reg_idx), 0); writel(0, ring->tail); - txdctl |= IGC_TX_PTHRESH; - txdctl |= IGC_TX_HTHRESH << 8; - txdctl |= IGC_TX_WTHRESH << 16; + txdctl |= IGC_TXDCTL_PTHRESH; + txdctl |= IGC_TXDCTL_HTHRESH << 8; + txdctl |= IGC_TXDCTL_WTHRESH << 16; txdctl |= IGC_TXDCTL_QUEUE_ENABLE; wr32(IGC_TXDCTL(reg_idx), txdctl); -- 2.51.0 From e35ba6d3c6c3464cf12da6fd0b6380c90af81d27 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:07 -0400 Subject: [PATCH 13/16] igc: refactor TXDCTL macros to use FIELD_PREP and GEN_MASK Refactor TXDCTL macro handling to use FIELD_PREP and GENMASK macros. This prepares the code for adding a new TXDCTL priority field in an upcoming patch. Verified that the macro values remain unchanged before and after refactoring. Reviewed-by: Simon Horman Signed-off-by: Faizal Rahim Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 15 ++++++++++----- drivers/net/ethernet/intel/igc/igc_main.c | 6 ++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index db1e2db1619e..daab06fc3f80 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -493,13 +493,18 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) /* Receive Software Flush */ #define IGC_RXDCTL_SWFLUSH 0x04000000 -#define IGC_TXDCTL_PTHRESH 8 -#define IGC_TXDCTL_HTHRESH 1 -#define IGC_TXDCTL_WTHRESH 16 +#define IGC_TXDCTL_PTHRESH_MASK GENMASK(4, 0) +#define IGC_TXDCTL_HTHRESH_MASK GENMASK(12, 8) +#define IGC_TXDCTL_WTHRESH_MASK GENMASK(20, 16) +#define IGC_TXDCTL_QUEUE_ENABLE_MASK GENMASK(25, 25) +#define IGC_TXDCTL_SWFLUSH_MASK GENMASK(26, 26) +#define IGC_TXDCTL_PTHRESH(x) FIELD_PREP(IGC_TXDCTL_PTHRESH_MASK, (x)) +#define IGC_TXDCTL_HTHRESH(x) FIELD_PREP(IGC_TXDCTL_HTHRESH_MASK, (x)) +#define IGC_TXDCTL_WTHRESH(x) FIELD_PREP(IGC_TXDCTL_WTHRESH_MASK, (x)) /* Ena specific Tx Queue */ -#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 +#define IGC_TXDCTL_QUEUE_ENABLE FIELD_PREP(IGC_TXDCTL_QUEUE_ENABLE_MASK, 1) /* Transmit Software Flush */ -#define IGC_TXDCTL_SWFLUSH 0x04000000 +#define IGC_TXDCTL_SWFLUSH FIELD_PREP(IGC_TXDCTL_SWFLUSH_MASK, 1) #define IGC_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4f1a8bc006c6..f3a312c9413b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -749,11 +749,9 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, wr32(IGC_TDH(reg_idx), 0); writel(0, ring->tail); - txdctl |= IGC_TXDCTL_PTHRESH; - txdctl |= IGC_TXDCTL_HTHRESH << 8; - txdctl |= IGC_TXDCTL_WTHRESH << 16; + txdctl |= IGC_TXDCTL_PTHRESH(8) | IGC_TXDCTL_HTHRESH(1) | + IGC_TXDCTL_WTHRESH(16) | IGC_TXDCTL_QUEUE_ENABLE; - txdctl |= IGC_TXDCTL_QUEUE_ENABLE; wr32(IGC_TXDCTL(reg_idx), txdctl); } -- 2.51.0 From 650a2fe79538bd61d294b7041ed700316f025a32 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:08 -0400 Subject: [PATCH 14/16] igc: assign highest TX queue number as highest priority in mqprio Previously, TX arbitration prioritized queues based on the TC they were mapped to. A queue mapped to TC 3 had higher priority than one mapped to TC 0. To improve code reuse for upcoming patches and align with typical NIC behavior, this patch updates the logic to prioritize higher queue numbers when mqprio is used. As a result, queue 0 becomes the lowest priority and queue 3 becomes the highest. This patch also introduces igc_tsn_is_tc_to_queue_priority_ordered() to preserve the original TC-based priority rule and reject configurations where a higher TC maps to a lower queue offset. Reviewed-by: Simon Horman Signed-off-by: Faizal Rahim Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 20 +++++++++++++ drivers/net/ethernet/intel/igc/igc_tsn.c | 35 ++++++++++++++--------- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f3a312c9413b..1322a2db6dba 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6724,6 +6724,20 @@ static void igc_save_mqprio_params(struct igc_adapter *adapter, u8 num_tc, adapter->queue_per_tc[i] = offset[i]; } +static bool +igc_tsn_is_tc_to_queue_priority_ordered(struct tc_mqprio_qopt_offload *mqprio) +{ + int num_tc = mqprio->qopt.num_tc; + int i; + + for (i = 1; i < num_tc; i++) { + if (mqprio->qopt.offset[i - 1] > mqprio->qopt.offset[i]) + return false; + } + + return true; +} + static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, struct tc_mqprio_qopt_offload *mqprio) { @@ -6756,6 +6770,12 @@ static int igc_tsn_enable_mqprio(struct igc_adapter *adapter, } } + if (!igc_tsn_is_tc_to_queue_priority_ordered(mqprio)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "tc to queue mapping must preserve increasing priority (higher tc -> higher queue)"); + return -EOPNOTSUPP; + } + /* Preemption is not supported yet. */ if (mqprio->preemptible_tcs) { NL_SET_ERR_MSG_MOD(mqprio->extack, diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index f22cc4d4f459..78a4a9cf5f96 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -13,6 +13,13 @@ #define TX_MAX_FRAG_SIZE (TX_MIN_FRAG_SIZE * \ (MAX_MULTPLIER_TX_MIN_FRAG + 1)) +enum tx_queue { + TX_QUEUE_0 = 0, + TX_QUEUE_1, + TX_QUEUE_2, + TX_QUEUE_3, +}; + DEFINE_STATIC_KEY_FALSE(igc_fpe_enabled); static int igc_fpe_init_smd_frame(struct igc_ring *ring, @@ -238,7 +245,7 @@ bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) adapter->taprio_offload_enable; } -static void igc_tsn_tx_arb(struct igc_adapter *adapter, u16 *queue_per_tc) +static void igc_tsn_tx_arb(struct igc_adapter *adapter, bool reverse_prio) { struct igc_hw *hw = &adapter->hw; u32 txarb; @@ -250,10 +257,17 @@ static void igc_tsn_tx_arb(struct igc_adapter *adapter, u16 *queue_per_tc) IGC_TXARB_TXQ_PRIO_2_MASK | IGC_TXARB_TXQ_PRIO_3_MASK); - txarb |= IGC_TXARB_TXQ_PRIO_0(queue_per_tc[3]); - txarb |= IGC_TXARB_TXQ_PRIO_1(queue_per_tc[2]); - txarb |= IGC_TXARB_TXQ_PRIO_2(queue_per_tc[1]); - txarb |= IGC_TXARB_TXQ_PRIO_3(queue_per_tc[0]); + if (reverse_prio) { + txarb |= IGC_TXARB_TXQ_PRIO_0(TX_QUEUE_3); + txarb |= IGC_TXARB_TXQ_PRIO_1(TX_QUEUE_2); + txarb |= IGC_TXARB_TXQ_PRIO_2(TX_QUEUE_1); + txarb |= IGC_TXARB_TXQ_PRIO_3(TX_QUEUE_0); + } else { + txarb |= IGC_TXARB_TXQ_PRIO_0(TX_QUEUE_0); + txarb |= IGC_TXARB_TXQ_PRIO_1(TX_QUEUE_1); + txarb |= IGC_TXARB_TXQ_PRIO_2(TX_QUEUE_2); + txarb |= IGC_TXARB_TXQ_PRIO_3(TX_QUEUE_3); + } wr32(IGC_TXARB, txarb); } @@ -286,7 +300,6 @@ static void igc_tsn_set_rxpbsize(struct igc_adapter *adapter, */ static int igc_tsn_disable_offload(struct igc_adapter *adapter) { - u16 queue_per_tc[4] = { 3, 2, 1, 0 }; struct igc_hw *hw = &adapter->hw; u32 tqavctrl; int i; @@ -319,7 +332,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) /* Restore the default Tx arbitration: Priority 0 has the highest * priority and is assigned to queue 0 and so on and so forth. */ - igc_tsn_tx_arb(adapter, queue_per_tc); + igc_tsn_tx_arb(adapter, false); adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED; @@ -385,12 +398,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) if (igc_is_device_id_i226(hw)) igc_tsn_set_retx_qbvfullthreshold(adapter); - if (adapter->strict_priority_enable) { - /* Configure queue priorities according to the user provided - * mapping. - */ - igc_tsn_tx_arb(adapter, adapter->queue_per_tc); - } + if (adapter->strict_priority_enable) + igc_tsn_tx_arb(adapter, true); for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; -- 2.51.0 From e395f6a690d8e490049e87a777c7fd5e7f6f8c0e Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:09 -0400 Subject: [PATCH 15/16] igc: add private flag to reverse TX queue priority in TSN mode By default, igc assigns TX hw queue 0 the highest priority and queue 3 the lowest. This is opposite of most NICs, where TX hw queue 3 has the highest priority and queue 0 the lowest. mqprio in igc already uses TX arbitration unconditionally to reverse TX queue priority when mqprio is enabled. The TX arbitration logic does not require a private flag, because mqprio was added recently and no known users depend on the default queue ordering, which differs from the typical convention. taprio does not use TX arbitration, so it inherits the default igc TX queue priority order. This causes tc command inconsistencies when configuring frame preemption with taprio compared to mqprio in igc. Other tc command inconsistencies and configuration issues already exist when using taprio on igc compared to other network controllers. These issues are described in a later section. To harmonize TX queue priority behavior between taprio and mqprio, and to fix these issues without breaking long-standing taprio use cases, this patch adds a new private flag, called reverse-tsn-txq-prio, to reverse the TX queue priority. It makes queue 3 the highest and queue 0 the lowest, reusing the TX arbitration logic already used by mqprio. Users must set the private flag when enabling frame preemption with taprio to follow the standard convention. Doing so promotes adoption of the correct priority model for new features while preserving compatibility with legacy configurations. This new private flag addresses: 1. Non-standard socket -> tc -> TX hw queue mapping for taprio in igc Without the private flag: - taprio maps (socket -> tc -> TX hardware queue) differently on igc compared to other network controllers - On igc, mqprio maps tc differently from taprio, since mqprio already uses TX arbitration The following examples compare taprio configuration on igc and other network controllers: a) On other NICs (TX hw queue 3 is highest priority): taprio num_tc 4 map 0 1 2 3 .... \ queues 1@0 1@1 1@2 1@3 Mapping translates to: socket 0 -> tc 0 -> queue 0 socket 3 -> tc 3 -> queue 3 This is the normal mapping that respects the standard convention: higher socket number -> higher tc -> higher priority TX hw queue b) On igc (TX hw queue 0 is highest priority by default): taprio num_tc 4 map 3 2 1 0 .... \ queues 1@0 1@1 1@2 1@3 Mapping translates to: socket 0 -> tc 3 -> queue 3 socket 3 -> tc 0 -> queue 0 This igc tc mapping example is based on Intel's TSN validation test case, where a higher socket priority maps to a higher priority queue. It respects the mapping: higher socket number -> higher priority TX hw queue but breaks the expected ordering: higher tc -> higher priority TX hw queue as defined in [Ref1]. This custom mapping complicates common taprio setup across NICs. 2. Non-standard frame preemption mapping for taprio in igc Without the private flag: - Compared to other network controllers, taprio on igc must flip the expected fp sequence, since express traffic is expected to map to the highest priority queue and preemptible traffic to lower ones - On igc, frame preemption configuration for mqprio differs from taprio, since mqprio already uses TX arbitration The following examples compare taprio frame preemption configuration on igc and other network controllers: a) On other NICs (TX hw queue 3 is highest priority): taprio num_tc 4 map ..... \ queues 1@0 1@1 1@2 1@3 \ fp P P P E Mapping translates to: tc0, tc1, tc2 -> preemptible -> queue 0, 1, 2 tc3 -> express -> queue 3 This is the normal mapping that respects the standard convention: higher tc -> express traffic -> higher priority TX hw queue lower tc -> preemptible traffic -> lower priority TX hw queue b) On igc (TX hw queue 0 is highest priority by default): taprio num_tc 4 map ...... \ queues 1@0 1@1 1@2 1@3 \ fp E P P P Mapping translates to: tc0 -> express -> queue 0 tc1, tc2, tc3 -> preemptible -> queue 1, 2, 3 This inversion respects the mapping of: express traffic -> higher priority TX hw queue but breaks the expected ordering: higher tc -> express traffic as defined in [Ref1] where higher tc indicates higher priority. In this case, the lower tc0 is assigned to express traffic. This custom mapping further complicates common preemption setup across NICs. Tests were performed on taprio with the following combinations, where two apps send traffic simultaneously on different queues: Private Flag Traffic Sent By Traffic Sent By ---------------------------------------------------------------- enabled iperf3 (queue 3) iperf3 (queue 0) disabled iperf3 (queue 0) iperf3 (queue 3) enabled iperf3 (queue 3) real-time app (queue 0) disabled iperf3 (queue 0) real-time app (queue 3) enabled real-time app (queue 3) iperf3 (queue 0) disabled real-time app (queue 0) iperf3 (queue 3) enabled real-time app (queue 3) real-time app (queue 0) disabled real-time app (queue 0) real-time app (queue 3) Private flag is controlled with: ethtool --set-priv-flags enp1s0 reverse-tsn-txq-prio [Ref1] IEEE 802.1Q clause 8.6.8 Transmission selection: "For a given Port and traffic class, frames are selected from the corresponding queue for transmission if and only if: ... b) For each queue corresponding to a numerically higher value of traffic class supported by the Port, the operation of the transmission selection algorithm supported by that queue determines that there is no frame available for transmission." Reviewed-by: Simon Horman Signed-off-by: Faizal Rahim Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 12 ++++++++++-- drivers/net/ethernet/intel/igc/igc_main.c | 3 ++- drivers/net/ethernet/intel/igc/igc_tsn.c | 3 ++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index daab06fc3f80..023ff8a5b285 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -395,6 +395,7 @@ extern char igc_driver_name[]; #define IGC_FLAG_TSN_QBV_ENABLED BIT(17) #define IGC_FLAG_TSN_QAV_ENABLED BIT(18) #define IGC_FLAG_TSN_PREEMPT_ENABLED BIT(19) +#define IGC_FLAG_TSN_REVERSE_TXQ_PRIO BIT(20) #define IGC_FLAG_TSN_ANY_ENABLED \ (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED | \ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 3fc1eded9605..054b7390cb4b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -122,9 +122,11 @@ static const char igc_gstrings_test[][ETH_GSTRING_LEN] = { #define IGC_STATS_LEN \ (IGC_GLOBAL_STATS_LEN + IGC_NETDEV_STATS_LEN + IGC_QUEUE_STATS_LEN) +#define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) +#define IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO BIT(1) static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IGC_PRIV_FLAGS_LEGACY_RX BIT(0) "legacy-rx", + "reverse-tsn-txq-prio", }; #define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings) @@ -1600,6 +1602,9 @@ static u32 igc_ethtool_get_priv_flags(struct net_device *netdev) if (adapter->flags & IGC_FLAG_RX_LEGACY) priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX; + if (adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO) + priv_flags |= IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO; + return priv_flags; } @@ -1608,10 +1613,13 @@ static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags) struct igc_adapter *adapter = netdev_priv(netdev); unsigned int flags = adapter->flags; - flags &= ~IGC_FLAG_RX_LEGACY; + flags &= ~(IGC_FLAG_RX_LEGACY | IGC_FLAG_TSN_REVERSE_TXQ_PRIO); if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX) flags |= IGC_FLAG_RX_LEGACY; + if (priv_flags & IGC_PRIV_FLAGS_REVERSE_TSN_TXQ_PRIO) + flags |= IGC_FLAG_TSN_REVERSE_TXQ_PRIO; + if (flags != adapter->flags) { adapter->flags = flags; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1322a2db6dba..82d53fad6b6a 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6698,7 +6698,8 @@ static int igc_tc_query_caps(struct igc_adapter *adapter, case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_caps *caps = base->caps; - caps->broken_mqprio = true; + if (!(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) + caps->broken_mqprio = true; if (hw->mac.type == igc_i225) { caps->supports_queue_max_sdu = true; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 78a4a9cf5f96..43151ab4c1b7 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -398,7 +398,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) if (igc_is_device_id_i226(hw)) igc_tsn_set_retx_qbvfullthreshold(adapter); - if (adapter->strict_priority_enable) + if (adapter->strict_priority_enable || + adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO) igc_tsn_tx_arb(adapter, true); for (i = 0; i < adapter->num_tx_queues; i++) { -- 2.51.0 From 17643482e9ff7bd192cce5c46bbbf607f5b64573 Mon Sep 17 00:00:00 2001 From: Faizal Rahim Date: Mon, 19 May 2025 03:19:10 -0400 Subject: [PATCH 16/16] igc: add preemptible queue support in taprio Changes: 1. Introduce tx_enabled flag to control preemptible queue. tx_enabled is set via mmsv module based on multiple factors, including link up/down status, to determine if FPE is active or inactive. 2. Add priority field to TXDCTL for express queue to improve data fetch performance. 3. Block preemptible queue setup in taprio unless reverse-tsn-txq-prio private flag is set. Encourages adoption of standard queue priority scheme for new features. 4. Hardware-padded frames from preemptible queues result in incorrect mCRC values, as padding bytes are excluded from the computation. Pad frames to at least 60 bytes using skb_padto() before transmission to ensure the hardware includes padding in the mCRC calculation. Tested preemption with taprio by: 1. Enable FPE: ethtool --set-mm enp1s0 pmac-enabled on tx-enabled on verify-enabled on 2. Enable private flag to reverse TX queue priority: ethtool --set-priv-flags enp1s0 reverse-txq-prio on 3. Enable preemptible queue in taprio: taprio num_tc 4 map 0 1 2 3 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 1@1 1@2 1@3 \ fp P P P E Reviewed-by: Aleksandr Loktionov Co-developed-by: Chwee-Lin Choong Signed-off-by: Chwee-Lin Choong Signed-off-by: Faizal Rahim Reviewed-by: Simon Horman Tested-by: Mor Bar-Gabay Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 6 ++ drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 21 +++++- drivers/net/ethernet/intel/igc/igc_tsn.c | 71 ++++++++++++++++++++ drivers/net/ethernet/intel/igc/igc_tsn.h | 4 ++ 5 files changed, 100 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 023ff8a5b285..1525ae25fd3e 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -43,6 +43,7 @@ void igc_ethtool_set_ops(struct net_device *); struct igc_fpe_t { struct ethtool_mmsv mmsv; u32 tx_min_frag_size; + bool tx_enabled; }; enum igc_mac_filter_type { @@ -163,6 +164,7 @@ struct igc_ring { bool launchtime_enable; /* true if LaunchTime is enabled */ ktime_t last_tx_cycle; /* end of the cycle with a launchtime transmission */ ktime_t last_ff_cycle; /* Last cycle with an active first flag */ + bool preemptible; /* True if preemptible queue, false if express queue */ u32 start_time; u32 end_time; @@ -499,6 +501,8 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) #define IGC_TXDCTL_WTHRESH_MASK GENMASK(20, 16) #define IGC_TXDCTL_QUEUE_ENABLE_MASK GENMASK(25, 25) #define IGC_TXDCTL_SWFLUSH_MASK GENMASK(26, 26) +#define IGC_TXDCTL_PRIORITY_MASK GENMASK(27, 27) + #define IGC_TXDCTL_PTHRESH(x) FIELD_PREP(IGC_TXDCTL_PTHRESH_MASK, (x)) #define IGC_TXDCTL_HTHRESH(x) FIELD_PREP(IGC_TXDCTL_HTHRESH_MASK, (x)) #define IGC_TXDCTL_WTHRESH(x) FIELD_PREP(IGC_TXDCTL_WTHRESH_MASK, (x)) @@ -506,6 +510,8 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) #define IGC_TXDCTL_QUEUE_ENABLE FIELD_PREP(IGC_TXDCTL_QUEUE_ENABLE_MASK, 1) /* Transmit Software Flush */ #define IGC_TXDCTL_SWFLUSH FIELD_PREP(IGC_TXDCTL_SWFLUSH_MASK, 1) +#define IGC_TXDCTL_PRIORITY(x) FIELD_PREP(IGC_TXDCTL_PRIORITY_MASK, (x)) +#define IGC_TXDCTL_PRIORITY_HIGH IGC_TXDCTL_PRIORITY(1) #define IGC_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 7189dfc389ad..86b346687196 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -588,6 +588,7 @@ #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x00000001 #define IGC_TXQCTL_STRICT_CYCLE 0x00000002 #define IGC_TXQCTL_STRICT_END 0x00000004 +#define IGC_TXQCTL_PREEMPTIBLE 0x00000008 #define IGC_TXQCTL_QAV_SEL_MASK 0x000000C0 #define IGC_TXQCTL_QAV_SEL_CBS0 0x00000080 #define IGC_TXQCTL_QAV_SEL_CBS1 0x000000C0 diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 82d53fad6b6a..23cbe02ee238 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1685,6 +1685,15 @@ done: first->tx_flags = tx_flags; first->protocol = protocol; + /* For preemptible queue, manually pad the skb so that HW includes + * padding bytes in mCRC calculation + */ + if (tx_ring->preemptible && skb->len < ETH_ZLEN) { + if (skb_padto(skb, ETH_ZLEN)) + goto out_drop; + skb_put(skb, ETH_ZLEN - skb->len); + } + tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len); if (tso < 0) goto out_drop; @@ -6419,6 +6428,7 @@ static int igc_qbv_clear_schedule(struct igc_adapter *adapter) ring->start_time = 0; ring->end_time = NSEC_PER_SEC; ring->max_sdu = 0; + ring->preemptible = false; } spin_lock_irqsave(&adapter->qbv_tx_lock, flags); @@ -6484,9 +6494,12 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; - /* preemptible isn't supported yet */ - if (qopt->mqprio.preemptible_tcs) - return -EOPNOTSUPP; + if (qopt->mqprio.preemptible_tcs && + !(adapter->flags & IGC_FLAG_TSN_REVERSE_TXQ_PRIO)) { + NL_SET_ERR_MSG_MOD(qopt->extack, + "reverse-tsn-txq-prio private flag must be enabled before setting preemptible tc"); + return -ENODEV; + } igc_ptp_read(adapter, &now); @@ -6579,6 +6592,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->max_sdu = 0; } + igc_fpe_save_preempt_queue(adapter, &qopt->mqprio); + return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 43151ab4c1b7..811856d66571 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -116,6 +116,18 @@ static int igc_fpe_xmit_smd_frame(struct igc_adapter *adapter, return err; } +static void igc_fpe_configure_tx(struct ethtool_mmsv *mmsv, bool tx_enable) +{ + struct igc_fpe_t *fpe = container_of(mmsv, struct igc_fpe_t, mmsv); + struct igc_adapter *adapter; + + adapter = container_of(fpe, struct igc_adapter, fpe); + adapter->fpe.tx_enabled = tx_enable; + + /* Update config since tx_enabled affects preemptible queue configuration */ + igc_tsn_offload_apply(adapter); +} + static void igc_fpe_send_mpacket(struct ethtool_mmsv *mmsv, enum ethtool_mpacket type) { @@ -137,15 +149,50 @@ static void igc_fpe_send_mpacket(struct ethtool_mmsv *mmsv, } static const struct ethtool_mmsv_ops igc_mmsv_ops = { + .configure_tx = igc_fpe_configure_tx, .send_mpacket = igc_fpe_send_mpacket, }; void igc_fpe_init(struct igc_adapter *adapter) { adapter->fpe.tx_min_frag_size = TX_MIN_FRAG_SIZE; + adapter->fpe.tx_enabled = false; ethtool_mmsv_init(&adapter->fpe.mmsv, adapter->netdev, &igc_mmsv_ops); } +static u32 igc_fpe_map_preempt_tc_to_queue(const struct igc_adapter *adapter, + unsigned long preemptible_tcs) +{ + struct net_device *dev = adapter->netdev; + u32 i, queue = 0; + + for (i = 0; i < dev->num_tc; i++) { + u32 offset, count; + + if (!(preemptible_tcs & BIT(i))) + continue; + + offset = dev->tc_to_txq[i].offset; + count = dev->tc_to_txq[i].count; + queue |= GENMASK(offset + count - 1, offset); + } + + return queue; +} + +void igc_fpe_save_preempt_queue(struct igc_adapter *adapter, + const struct tc_mqprio_qopt_offload *mqprio) +{ + u32 preemptible_queue = igc_fpe_map_preempt_tc_to_queue(adapter, + mqprio->preemptible_tcs); + + for (int i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + tx_ring->preemptible = !!(preemptible_queue & BIT(i)); + } +} + static bool is_any_launchtime(struct igc_adapter *adapter) { int i; @@ -321,9 +368,16 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_TQAVCTRL, tqavctrl); for (i = 0; i < adapter->num_tx_queues; i++) { + int reg_idx = adapter->tx_ring[i]->reg_idx; + u32 txdctl; + wr32(IGC_TXQCTL(i), 0); wr32(IGC_STQT(i), 0); wr32(IGC_ENDQT(i), NSEC_PER_SEC); + + txdctl = rd32(IGC_TXDCTL(reg_idx)); + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + wr32(IGC_TXDCTL(reg_idx), txdctl); } wr32(IGC_QBVCYCLET_S, 0); @@ -404,6 +458,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; + u32 txdctl = rd32(IGC_TXDCTL(ring->reg_idx)); u32 txqctl = 0; u16 cbs_value; u32 tqavcc; @@ -437,6 +492,22 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) if (ring->launchtime_enable) txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; + if (!adapter->fpe.tx_enabled) { + /* fpe inactive: clear both flags */ + txqctl &= ~IGC_TXQCTL_PREEMPTIBLE; + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + } else if (ring->preemptible) { + /* fpe active + preemptible: enable preemptible queue + set low priority */ + txqctl |= IGC_TXQCTL_PREEMPTIBLE; + txdctl &= ~IGC_TXDCTL_PRIORITY_HIGH; + } else { + /* fpe active + express: enable express queue + set high priority */ + txqctl &= ~IGC_TXQCTL_PREEMPTIBLE; + txdctl |= IGC_TXDCTL_PRIORITY_HIGH; + } + + wr32(IGC_TXDCTL(ring->reg_idx), txdctl); + /* Skip configuring CBS for Q2 and Q3 */ if (i > 1) goto skip_cbs; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index c2a77229207b..f2e8bfef4871 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -4,6 +4,8 @@ #ifndef _IGC_TSN_H_ #define _IGC_TSN_H_ +#include + #define IGC_RX_MIN_FRAG_SIZE 60 #define SMD_FRAME_SIZE 60 @@ -15,6 +17,8 @@ enum igc_txd_popts_type { DECLARE_STATIC_KEY_FALSE(igc_fpe_enabled); void igc_fpe_init(struct igc_adapter *adapter); +void igc_fpe_save_preempt_queue(struct igc_adapter *adapter, + const struct tc_mqprio_qopt_offload *mqprio); u32 igc_fpe_get_supported_frag_size(u32 frag_size); int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); -- 2.51.0