From 00b5b7aab9e422d00d5a9d03d7e0760a76b5d57f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 20 Nov 2024 09:51:07 +0000 Subject: [PATCH 01/16] net/ipv6: delete temporary address if mngtmpaddr is removed or unmanaged MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit RFC8981 section 3.4 says that existing temporary addresses must have their lifetimes adjusted so that no temporary addresses should ever remain "valid" or "preferred" longer than the incoming SLAAC Prefix Information. This would strongly imply in Linux's case that if the "mngtmpaddr" address is deleted or un-flagged as such, its corresponding temporary addresses must be cleared out right away. But now the temporary address is renewed even after ‘mngtmpaddr’ is removed or becomes unmanaged as manage_tempaddrs() set temporary addresses prefered/valid time to 0, and later in addrconf_verify_rtnl() all checkings failed to remove the addresses. Fix this by deleting the temporary address directly for these situations. Fixes: 778964f2fdf0 ("ipv6/addrconf: fix timing bug in tempaddr regen") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96b5b2b0d507..c489a1e6aec9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2570,6 +2570,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return idev; } +static void delete_tempaddrs(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ift, *tmp; + + write_lock_bh(&idev->lock); + list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) { + if (ift->ifpub != ifp) + continue; + + in6_ifa_hold(ift); + write_unlock_bh(&idev->lock); + ipv6_del_addr(ift); + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +} + static void manage_tempaddrs(struct inet6_dev *idev, struct inet6_ifaddr *ifp, __u32 valid_lft, __u32 prefered_lft, @@ -3124,11 +3142,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); - if (!(ifp->flags & IFA_F_TEMPORARY) && - (ifa_flags & IFA_F_MANAGETEMPADDR)) - manage_tempaddrs(idev, ifp, 0, 0, false, - jiffies); ipv6_del_addr(ifp); + + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifp->flags & IFA_F_MANAGETEMPADDR)) + delete_tempaddrs(idev, ifp); + addrconf_verify_rtnl(net); if (ipv6_addr_is_multicast(pfx)) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, @@ -4952,14 +4971,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, } if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { - if (was_managetempaddr && - !(ifp->flags & IFA_F_MANAGETEMPADDR)) { - cfg->valid_lft = 0; - cfg->preferred_lft = 0; - } - manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft, - cfg->preferred_lft, !was_managetempaddr, - jiffies); + if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) + delete_tempaddrs(ifp->idev, ifp); + else + manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft, + cfg->preferred_lft, !was_managetempaddr, + jiffies); } addrconf_verify_rtnl(net); -- 2.51.0 From f6e1dcd6444485356d1df9d29df702acc7bba00c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 20 Nov 2024 09:51:08 +0000 Subject: [PATCH 02/16] selftests/rtnetlink.sh: add mngtempaddr test Add a test to check the temporary address could be added/removed correctly when mngtempaddr is set or removed/unmanaged. Signed-off-by: Sam Edwards Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rtnetlink.sh | 95 ++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7f05b5f9b76f..2e8243a65b50 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -29,6 +29,7 @@ ALL_TESTS=" kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr " devdummy="test-dummy0" @@ -44,6 +45,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -1239,6 +1241,99 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + kci_test_rtnl() { local current_test -- 2.51.0 From 9cc8d0ecdd2aad42e377e971e3bb114339df609e Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 21 Nov 2024 11:31:52 -0800 Subject: [PATCH 03/16] net: mdio-ipq4019: add missing error check If an optional resource is found but fails to remap, return on failure. Avoids any potential problems when using the iomapped resource as the assumption is that it's available. Fixes: 23a890d493e3 ("net: mdio: Add the reset function for IPQ MDIO driver") Signed-off-by: Rosen Penev Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241121193152.8966-1-rosenp@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/mdio/mdio-ipq4019.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index dd3ed2d6430b..d9a94df482d9 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -352,8 +352,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev) /* The platform resource is provided on the chipset IPQ5018 */ /* This resource is optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (res) + if (res) { priv->eth_ldo_rdy = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->eth_ldo_rdy)) + return PTR_ERR(priv->eth_ldo_rdy); + } bus->name = "ipq4019_mdio"; bus->read = ipq4019_mdio_read_c22; -- 2.51.0 From b032ae57d4fe2b2445e3bc190db6fcaa8c102f68 Mon Sep 17 00:00:00 2001 From: Vitalii Mordan Date: Thu, 21 Nov 2024 23:06:58 +0300 Subject: [PATCH 04/16] marvell: pxa168_eth: fix call balance of pep->clk handling routines If the clock pep->clk was not enabled in pxa168_eth_probe, it should not be disabled in any path. Conversely, if it was enabled in pxa168_eth_probe, it must be disabled in all error paths to ensure proper cleanup. Use the devm_clk_get_enabled helper function to ensure proper call balance for pep->clk. Found by Linux Verification Center (linuxtesting.org) with Klever. Fixes: a49f37eed22b ("net: add Fast Ethernet driver for PXA168.") Signed-off-by: Vitalii Mordan Link: https://patch.msgid.link/20241121200658.2203871-1-mordan@ispras.ru Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/pxa168_eth.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index fe38426ec42d..2bf426cea6dd 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1394,18 +1394,15 @@ static int pxa168_eth_probe(struct platform_device *pdev) printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); - clk = devm_clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) { - dev_err(&pdev->dev, "Fast Ethernet failed to get clock\n"); + dev_err(&pdev->dev, "Fast Ethernet failed to get and enable clock\n"); return -ENODEV; } - clk_prepare_enable(clk); dev = alloc_etherdev(sizeof(struct pxa168_eth_private)); - if (!dev) { - err = -ENOMEM; - goto err_clk; - } + if (!dev) + return -ENOMEM; platform_set_drvdata(pdev, dev); pep = netdev_priv(dev); @@ -1523,8 +1520,6 @@ err_free_mdio: mdiobus_free(pep->smi_bus); err_netdev: free_netdev(dev); -err_clk: - clk_disable_unprepare(clk); return err; } @@ -1542,7 +1537,6 @@ static void pxa168_eth_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); - clk_disable_unprepare(pep->clk); mdiobus_unregister(pep->smi_bus); mdiobus_free(pep->smi_bus); unregister_netdev(dev); -- 2.51.0 From 407618d66dba55e7db1278872e8be106808bbe91 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 22 Nov 2024 15:12:55 +0100 Subject: [PATCH 05/16] net: stmmac: dwmac-socfpga: Set RX watchdog interrupt as broken On DWMAC3 and later, there's a RX Watchdog interrupt that's used for interrupt coalescing. It's known to be buggy on some platforms, and dwmac-socfpga appears to be one of them. Changing the interrupt coalescing from ethtool doesn't appear to have any effect here. Without disabling RIWT (Received Interrupt Watchdog Timer, I believe...), we observe latencies while receiving traffic that amount to around ~0.4ms. This was discovered with NTP but can be easily reproduced with a simple ping. Without this patch : 64 bytes from 192.168.5.2: icmp_seq=1 ttl=64 time=0.657 ms With this patch : 64 bytes from 192.168.5.2: icmp_seq=1 ttl=64 time=0.254 ms Fixes: 801d233b7302 ("net: stmmac: Add SOCFPGA glue driver") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20241122141256.764578-1-maxime.chevallier@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 248b30d7b864..16020b72dec8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -487,6 +487,8 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->select_pcs = socfpga_dwmac_select_pcs; plat_dat->has_gmac = true; + plat_dat->riwt_off = 1; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) return ret; -- 2.51.0 From 7ebbbb23ea5b6d051509cb11399afac5042c9266 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:31 +0530 Subject: [PATCH 06/16] octeontx2-af: RPM: Fix mismatch in lmac type Due to a bug in the previous patch, there is a mismatch between the lmac type reported by the driver and the actual hardware configuration. Fixes: 3ad3f8f93c81 ("octeontx2-af: cn10k: MAC internal loopback support") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 1b34cf9c9703..9e8c5e4389f8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -467,7 +467,7 @@ u8 rpm_get_lmac_type(void *rpmd, int lmac_id) int err; req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_LINK_STS, req); - err = cgx_fwi_cmd_generic(req, &resp, rpm, 0); + err = cgx_fwi_cmd_generic(req, &resp, rpm, lmac_id); if (!err) return FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, resp); return err; -- 2.51.0 From d1e8884e050c1255a9ceb477f5ff926ee9214a23 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:32 +0530 Subject: [PATCH 07/16] octeontx2-af: RPM: Fix low network performance Low network performance is observed even on RPMs with larger FIFO lengths. The cn10kb silicon has three RPM blocks with the following FIFO sizes: -------------------- | RPM0 | 256KB | | RPM1 | 256KB | | RPM2 | 128KB | -------------------- The current design stores the FIFO length in a common structure for all RPMs (mac_ops). As a result, the FIFO length of the last RPM is applied to all RPMs, leading to reduced network performance. This patch resolved the problem by storing the fifo length in per MAC structure (cgx). Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 9 +++++++-- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h | 5 ++++- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 6 +++--- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 9 ++++----- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 27935c54b91b..2f621714c54e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -112,6 +112,11 @@ struct mac_ops *get_mac_ops(void *cgxd) return ((struct cgx *)cgxd)->mac_ops; } +u32 cgx_get_fifo_len(void *cgxd) +{ + return ((struct cgx *)cgxd)->fifo_len; +} + void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val) { writeq(val, cgx->reg_base + (lmac << cgx->mac_ops->lmac_offset) + @@ -501,7 +506,7 @@ static u32 cgx_get_lmac_fifo_len(void *cgxd, int lmac_id) u8 num_lmacs; u32 fifo_len; - fifo_len = cgx->mac_ops->fifo_len; + fifo_len = cgx->fifo_len; num_lmacs = cgx->mac_ops->get_nr_lmacs(cgx); switch (num_lmacs) { @@ -1764,7 +1769,7 @@ static void cgx_populate_features(struct cgx *cgx) u64 cfg; cfg = cgx_read(cgx, 0, CGX_CONST); - cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); + cgx->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); cgx->max_lmac_per_mac = FIELD_GET(CGX_CONST_MAX_LMACS, cfg); if (is_dev_rpm(cgx)) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index dc9ace30554a..f9cd4b58f0c0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -185,4 +185,5 @@ int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, int pfvf_idx); int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr); +u32 cgx_get_fifo_len(void *cgxd); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 9ffc6790c513..c43ff68ef140 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -72,7 +72,6 @@ struct mac_ops { u8 irq_offset; u8 int_ena_bit; u8 lmac_fwi; - u32 fifo_len; bool non_contiguous_serdes_lane; /* RPM & CGX differs in number of Receive/transmit stats */ u8 rx_stats_cnt; @@ -142,6 +141,10 @@ struct cgx { u8 lmac_count; /* number of LMACs per MAC could be 4 or 8 */ u8 max_lmac_per_mac; + /* length of fifo varies depending on the number + * of LMACS + */ + u32 fifo_len; #define MAX_LMAC_COUNT 8 struct lmac *lmac_idmap[MAX_LMAC_COUNT]; struct work_struct cgx_cmd_work; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 9e8c5e4389f8..22dd50a3fcd3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -480,7 +480,7 @@ u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id) u8 num_lmacs; u32 fifo_len; - fifo_len = rpm->mac_ops->fifo_len; + fifo_len = rpm->fifo_len; num_lmacs = rpm->mac_ops->get_nr_lmacs(rpm); switch (num_lmacs) { @@ -533,9 +533,9 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) */ max_lmac = (rpm_read(rpm, 0, CGX_CONST) >> 24) & 0xFF; if (max_lmac > 4) - fifo_len = rpm->mac_ops->fifo_len / 2; + fifo_len = rpm->fifo_len / 2; else - fifo_len = rpm->mac_ops->fifo_len; + fifo_len = rpm->fifo_len; if (lmac_id < 4) { num_lmacs = hweight8(lmac_info & 0xF); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 266ecbc1b97a..4dcd7bfcad4e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -923,13 +923,12 @@ int rvu_mbox_handler_cgx_features_get(struct rvu *rvu, u32 rvu_cgx_get_fifolen(struct rvu *rvu) { - struct mac_ops *mac_ops; - u32 fifo_len; + void *cgxd = rvu_first_cgx_pdata(rvu); - mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); - fifo_len = mac_ops ? mac_ops->fifo_len : 0; + if (!cgxd) + return 0; - return fifo_len; + return cgx_get_fifo_len(cgxd); } u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac) -- 2.51.0 From 07cd1eb166a3fa7244afa74d48bd13c9df7c559d Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:33 +0530 Subject: [PATCH 08/16] octeontx2-af: RPM: fix stale RSFEC counters The earlier patch sets the 'Stats control register' for RPM receive/transmit statistics instead of RSFEC statistics, causing the driver to return stale FEC counters. Fixes: 84ad3642115d ("octeontx2-af: Add FEC stats for RPM/RPM_USX block") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 13 +++++++++---- drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 22dd50a3fcd3..70629f94c27e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -699,6 +699,10 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE) return 0; + /* latched registers FCFECX_CW_HI/RSFEC_STAT_FAST_DATA_HI_CDC are common + * for all counters. Acquire lock to ensure serialized reads + */ + mutex_lock(&rpm->lock); if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO); val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); @@ -725,20 +729,21 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) } } else { /* enable RS-FEC capture */ - cfg = rpm_read(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL); + cfg = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_STATN_CONTROL); cfg |= RPMX_RSFEC_RX_CAPTURE | BIT(lmac_id); - rpm_write(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL, cfg); + rpm_write(rpm, 0, RPMX_MTI_RSFEC_STAT_STATN_CONTROL, cfg); val_lo = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2); - val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + val_hi = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC); rsp->fec_corr_blks = (val_hi << 32 | val_lo); val_lo = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3); - val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + val_hi = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC); rsp->fec_uncorr_blks = (val_hi << 32 | val_lo); } + mutex_unlock(&rpm->lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 34b11deb0f3c..a5773fbacaff 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -84,9 +84,11 @@ /* FEC stats */ #define RPMX_MTI_STAT_STATN_CONTROL 0x10018 #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 -#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(27) +#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(28) #define RPMX_CMD_CLEAR_RX BIT_ULL(30) #define RPMX_CMD_CLEAR_TX BIT_ULL(31) +#define RPMX_MTI_RSFEC_STAT_STATN_CONTROL 0x40018 +#define RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC 0x40000 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 #define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 -- 2.51.0 From 6fc2164108462b913a1290fa2c44054c70b060ef Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:34 +0530 Subject: [PATCH 09/16] octeontx2-af: RPM: fix stale FCFEC counters The corrected words register(FCFECX_VL0_CCW_LO)/Uncorrected words register (FCFECX_VL0_NCCW_LO) of FCFEC counter has different LMAC offset which needs to be accessed differently. Fixes: 84ad3642115d ("octeontx2-af: Add FEC stats for RPM/RPM_USX block") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/rpm.c | 24 +++++++++---------- .../net/ethernet/marvell/octeontx2/af/rpm.h | 10 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 70629f94c27e..e97fcc51d7f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -704,27 +704,27 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) */ mutex_lock(&rpm->lock); if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { - val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO); - val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, RPMX_MTI_FCFECX_VL0_CCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_corr_blks = (val_hi << 16 | val_lo); - val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_NCCW_LO); - val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, RPMX_MTI_FCFECX_VL0_NCCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_uncorr_blks = (val_hi << 16 | val_lo); /* 50G uses 2 Physical serdes lines */ if (rpm->lmac_idmap[lmac_id]->link_info.lmac_type_id == LMAC_MODE_50G_R) { - val_lo = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_VL1_CCW_LO); - val_hi = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_VL1_CCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_corr_blks += (val_hi << 16 | val_lo); - val_lo = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_VL1_NCCW_LO); - val_hi = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_VL1_NCCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_uncorr_blks += (val_hi << 16 | val_lo); } } else { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index a5773fbacaff..5194fec4c3b8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -91,11 +91,11 @@ #define RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC 0x40000 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 -#define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 -#define RPMX_MTI_FCFECX_VL0_NCCW_LO 0x38620 -#define RPMX_MTI_FCFECX_VL1_CCW_LO 0x38628 -#define RPMX_MTI_FCFECX_VL1_NCCW_LO 0x38630 -#define RPMX_MTI_FCFECX_CW_HI 0x38638 +#define RPMX_MTI_FCFECX_VL0_CCW_LO(a) (0x38618 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL0_NCCW_LO(a) (0x38620 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL1_CCW_LO(a) (0x38628 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL1_NCCW_LO(a) (0x38630 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_CW_HI(a) (0x38638 + ((a) * 0x40)) /* CN10KB CSR Declaration */ #define RPM2_CMRX_SW_INT 0x1b0 -- 2.51.0 From 762ca6eed026346d9d41ed5ac633083c4f1e5071 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:35 +0530 Subject: [PATCH 10/16] octeontx2-af: Quiesce traffic before NIX block reset During initialization, the AF driver resets all blocks. The RPM (MAC) block and NIX block operate on a credit-based model. When the NIX block resets during active traffic flow, it doesn't release credits to the RPM block. This causes the RPM FIFO to overflow, leading to receive traffic struck. To address this issue, the patch introduces the following changes: 1. Stop receiving traffic at the MAC level during AF driver initialization. 2. Perform an X2P reset (prevents RXFIFO of all LMACS from pushing data) 3. Reset the NIX block. 4. Clear the X2P reset and re-enable receiving traffic. Fixes: 54d557815e15 ("octeontx2-af: Reset all RVU blocks") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/cgx.c | 61 +++++++++++++++++++ .../net/ethernet/marvell/octeontx2/af/cgx.h | 4 ++ .../marvell/octeontx2/af/lmac_common.h | 2 + .../net/ethernet/marvell/octeontx2/af/rpm.c | 42 +++++++++++++ .../net/ethernet/marvell/octeontx2/af/rpm.h | 4 ++ .../net/ethernet/marvell/octeontx2/af/rvu.c | 1 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 36 +++++++++-- 8 files changed, 145 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 2f621714c54e..8216f843a7cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -214,6 +214,24 @@ u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id) return (cfg & CMR_P2X_SEL_MASK) >> CMR_P2X_SEL_SHIFT; } +static u8 cgx_get_nix_resetbit(struct cgx *cgx) +{ + int first_lmac; + u8 p2x; + + /* non 98XX silicons supports only NIX0 block */ + if (cgx->pdev->subsystem_device != PCI_SUBSYS_DEVID_98XX) + return CGX_NIX0_RESET; + + first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac); + p2x = cgx_lmac_get_p2x(cgx->cgx_id, first_lmac); + + if (p2x == CMR_P2X_SEL_NIX1) + return CGX_NIX1_RESET; + else + return CGX_NIX0_RESET; +} + /* Ensure the required lock for event queue(where asynchronous events are * posted) is acquired before calling this API. Else an asynchronous event(with * latest link status) can reach the destination before this function returns @@ -1724,6 +1742,8 @@ static int cgx_lmac_init(struct cgx *cgx) lmac->lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac->lmac_id); } + /* Start X2P reset on given MAC block */ + cgx->mac_ops->mac_x2p_reset(cgx, true); return cgx_lmac_verify_fwi_version(cgx); err_bitmap_free: @@ -1789,6 +1809,45 @@ static u8 cgx_get_rxid_mapoffset(struct cgx *cgx) return 0x60; } +static void cgx_x2p_reset(void *cgxd, bool enable) +{ + struct cgx *cgx = cgxd; + int lmac_id; + u64 cfg; + + if (enable) { + for_each_set_bit(lmac_id, &cgx->lmac_bmap, cgx->max_lmac_per_mac) + cgx->mac_ops->mac_enadis_rx(cgx, lmac_id, false); + + usleep_range(1000, 2000); + + cfg = cgx_read(cgx, 0, CGXX_CMR_GLOBAL_CONFIG); + cfg |= cgx_get_nix_resetbit(cgx) | CGX_NSCI_DROP; + cgx_write(cgx, 0, CGXX_CMR_GLOBAL_CONFIG, cfg); + } else { + cfg = cgx_read(cgx, 0, CGXX_CMR_GLOBAL_CONFIG); + cfg &= ~(cgx_get_nix_resetbit(cgx) | CGX_NSCI_DROP); + cgx_write(cgx, 0, CGXX_CMR_GLOBAL_CONFIG, cfg); + } +} + +static int cgx_enadis_rx(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + if (enable) + cfg |= DATA_PKT_RX_EN; + else + cfg &= ~DATA_PKT_RX_EN; + cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); + return 0; +} + static struct mac_ops cgx_mac_ops = { .name = "cgx", .csr_offset = 0, @@ -1820,6 +1879,8 @@ static struct mac_ops cgx_mac_ops = { .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, .mac_reset = cgx_lmac_reset, .mac_stats_reset = cgx_stats_reset, + .mac_x2p_reset = cgx_x2p_reset, + .mac_enadis_rx = cgx_enadis_rx, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index f9cd4b58f0c0..1cf12e5c7da8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -32,6 +32,10 @@ #define CGX_LMAC_TYPE_MASK 0xF #define CGXX_CMRX_INT 0x040 #define FW_CGX_INT BIT_ULL(1) +#define CGXX_CMR_GLOBAL_CONFIG 0x08 +#define CGX_NIX0_RESET BIT_ULL(2) +#define CGX_NIX1_RESET BIT_ULL(3) +#define CGX_NSCI_DROP BIT_ULL(9) #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index c43ff68ef140..6180e68e1765 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -132,6 +132,8 @@ struct mac_ops { int (*get_fec_stats)(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); int (*mac_stats_reset)(void *cgxd, int lmac_id); + void (*mac_x2p_reset)(void *cgxd, bool enable); + int (*mac_enadis_rx)(void *cgxd, int lmac_id, bool enable); }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index e97fcc51d7f2..2e9945446199 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -39,6 +39,8 @@ static struct mac_ops rpm_mac_ops = { .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, .mac_stats_reset = rpm_stats_reset, + .mac_x2p_reset = rpm_x2p_reset, + .mac_enadis_rx = rpm_enadis_rx, }; static struct mac_ops rpm2_mac_ops = { @@ -72,6 +74,8 @@ static struct mac_ops rpm2_mac_ops = { .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, .mac_stats_reset = rpm_stats_reset, + .mac_x2p_reset = rpm_x2p_reset, + .mac_enadis_rx = rpm_enadis_rx, }; bool is_dev_rpm2(void *rpmd) @@ -768,3 +772,41 @@ int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr) return 0; } + +void rpm_x2p_reset(void *rpmd, bool enable) +{ + rpm_t *rpm = rpmd; + int lmac_id; + u64 cfg; + + if (enable) { + for_each_set_bit(lmac_id, &rpm->lmac_bmap, rpm->max_lmac_per_mac) + rpm->mac_ops->mac_enadis_rx(rpm, lmac_id, false); + + usleep_range(1000, 2000); + + cfg = rpm_read(rpm, 0, RPMX_CMR_GLOBAL_CFG); + rpm_write(rpm, 0, RPMX_CMR_GLOBAL_CFG, cfg | RPM_NIX0_RESET); + } else { + cfg = rpm_read(rpm, 0, RPMX_CMR_GLOBAL_CFG); + cfg &= ~RPM_NIX0_RESET; + rpm_write(rpm, 0, RPMX_CMR_GLOBAL_CFG, cfg); + } +} + +int rpm_enadis_rx(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (enable) + cfg |= RPM_RX_EN; + else + cfg &= ~RPM_RX_EN; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 5194fec4c3b8..b8d3972e096a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -17,6 +17,8 @@ /* Registers */ #define RPMX_CMRX_CFG 0x00 +#define RPMX_CMR_GLOBAL_CFG 0x08 +#define RPM_NIX0_RESET BIT_ULL(3) #define RPMX_RX_TS_PREPEND BIT_ULL(22) #define RPMX_TX_PTP_1S_SUPPORT BIT_ULL(17) #define RPMX_CMRX_RX_ID_MAP 0x80 @@ -139,4 +141,6 @@ bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); int rpm_stats_reset(void *rpmd, int lmac_id); +void rpm_x2p_reset(void *rpmd, bool enable); +int rpm_enadis_rx(void *rpmd, int lmac_id, bool enable); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 1a97fb9032fa..cd0d7b7774f1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1162,6 +1162,7 @@ cpt: } rvu_program_channels(rvu); + cgx_start_linkup(rvu); err = rvu_mcs_init(rvu); if (err) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b897845e25fd..a383b5ef5b2d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1025,6 +1025,7 @@ int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_ int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); +void cgx_start_linkup(struct rvu *rvu); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 4dcd7bfcad4e..992fa0b82e8d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -349,6 +349,7 @@ static void rvu_cgx_wq_destroy(struct rvu *rvu) int rvu_cgx_init(struct rvu *rvu) { + struct mac_ops *mac_ops; int cgx, err; void *cgxd; @@ -375,6 +376,15 @@ int rvu_cgx_init(struct rvu *rvu) if (err) return err; + /* Clear X2P reset on all MAC blocks */ + for (cgx = 0; cgx < rvu->cgx_cnt_max; cgx++) { + cgxd = rvu_cgx_pdata(cgx, rvu); + if (!cgxd) + continue; + mac_ops = get_mac_ops(cgxd); + mac_ops->mac_x2p_reset(cgxd, false); + } + /* Register for CGX events */ err = cgx_lmac_event_handler_init(rvu); if (err) @@ -382,10 +392,26 @@ int rvu_cgx_init(struct rvu *rvu) mutex_init(&rvu->cgx_cfg_lock); - /* Ensure event handler registration is completed, before - * we turn on the links - */ - mb(); + return 0; +} + +void cgx_start_linkup(struct rvu *rvu) +{ + unsigned long lmac_bmap; + struct mac_ops *mac_ops; + int cgx, lmac, err; + void *cgxd; + + /* Enable receive on all LMACS */ + for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) { + cgxd = rvu_cgx_pdata(cgx, rvu); + if (!cgxd) + continue; + mac_ops = get_mac_ops(cgxd); + lmac_bmap = cgx_get_lmac_bmap(cgxd); + for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) + mac_ops->mac_enadis_rx(cgxd, lmac, true); + } /* Do link up for all CGX ports */ for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) { @@ -398,8 +424,6 @@ int rvu_cgx_init(struct rvu *rvu) "Link up process failed to start on cgx %d\n", cgx); } - - return 0; } int rvu_cgx_exit(struct rvu *rvu) -- 2.51.0 From 9cfb5e7f0ded2bfaabc270ceb5f91d13f0e805b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Nov 2024 17:13:43 +0000 Subject: [PATCH 11/16] net: hsr: fix hsr_init_sk() vs network/transport headers. Following sequence in hsr_init_sk() is invalid : skb_reset_mac_header(skb); skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); It is invalid because skb_reset_mac_len() needs the correct network header, which should be after the mac header. This patch moves the skb_reset_network_header() and skb_reset_transport_header() before the call to dev_hard_header(). As a result skb->mac_len is no longer set to a value close to 65535. Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") Signed-off-by: Eric Dumazet Cc: George McCollister Link: https://patch.msgid.link/20241122171343.897551-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 9e64496a5c1c..31a416ee21ad 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -268,6 +268,8 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) skb->dev = master->dev; skb->priority = TC_PRIO_CONTROL; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); if (dev_hard_header(skb, skb->dev, ETH_P_PRP, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) @@ -275,8 +277,6 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) skb_reset_mac_header(skb); skb_reset_mac_len(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); return skb; out: -- 2.51.0 From 5311598f7f3293683cdc761df71ae3469327332c Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Nov 2024 14:45:41 -0800 Subject: [PATCH 12/16] bnxt_en: Reserve rings after PCIe AER recovery if NIC interface is down After successful PCIe AER recovery, FW will reset all resource reservations. If it is IF_UP, the driver will call bnxt_open() and all resources will be reserved again. It it is IF_DOWN, we should call bnxt_reserve_rings() so that we can reserve resources including RoCE resources to allow RoCE to resume after AER. Without this patch, RoCE fails to resume in this IF_DOWN scenario. Later, if it becomes IF_UP, bnxt_open() will see that resources have been reserved and will not reserve again. Fixes: fb1e6e562b37 ("bnxt_en: Fix AER recovery.") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5f7bdafcf05d..3eeaceb3ff38 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16478,8 +16478,12 @@ static void bnxt_io_resume(struct pci_dev *pdev) rtnl_lock(); err = bnxt_hwrm_func_qcaps(bp); - if (!err && netif_running(netdev)) - err = bnxt_open(netdev); + if (!err) { + if (netif_running(netdev)) + err = bnxt_open(netdev); + else + err = bnxt_reserve_rings(bp, true); + } if (!err) netif_device_attach(netdev); -- 2.51.0 From 5007991670941c132fb3bc0484c009cf4bcea30f Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Fri, 22 Nov 2024 14:45:42 -0800 Subject: [PATCH 13/16] bnxt_en: Set backplane link modes correctly for ethtool Use the return value from bnxt_get_media() to determine the port and link modes. bnxt_get_media() returns the proper BNXT_MEDIA_KR when the PHY is backplane. This will correct the ethtool settings for backplane devices. Fixes: 5d4e1bf60664 ("bnxt_en: extend media types to supported and autoneg modes") Reviewed-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2f4987ec7464..f1f6bb328a55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2837,19 +2837,24 @@ static int bnxt_get_link_ksettings(struct net_device *dev, } base->port = PORT_NONE; - if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) { + if (media == BNXT_MEDIA_TP) { base->port = PORT_TP; linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, lk_ksettings->link_modes.supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, lk_ksettings->link_modes.advertising); + } else if (media == BNXT_MEDIA_KR) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, + lk_ksettings->link_modes.supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, + lk_ksettings->link_modes.advertising); } else { linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, lk_ksettings->link_modes.supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, lk_ksettings->link_modes.advertising); - if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC) + if (media == BNXT_MEDIA_CR) base->port = PORT_DA; else base->port = PORT_FIBRE; -- 2.51.0 From 5ac066b7b062ee753a14557ea11bdc62364c8090 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Fri, 22 Nov 2024 14:45:43 -0800 Subject: [PATCH 14/16] bnxt_en: Fix queue start to update vnic RSS table HWRM_RING_FREE followed by a HWRM_RING_ALLOC is not guaranteed to have the same FW ring ID as before. So we must reinitialize the RSS table with the correct ring IDs. Otherwise, traffic may not resume properly if the restarted ring ID is stale. Since this feature is only supported on P5_PLUS chips, we call bnxt_vnic_set_rss_p5() to update the HW RSS table. Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Cc: David Wei Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3eeaceb3ff38..3bee485b50f0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15477,6 +15477,13 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { vnic = &bp->vnic_info[i]; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; bnxt_hwrm_vnic_update(bp, vnic, VNIC_UPDATE_REQ_ENABLES_MRU_VALID); -- 2.51.0 From 3051a77a09dfe3022aa012071346937fdf059033 Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Fri, 22 Nov 2024 14:45:44 -0800 Subject: [PATCH 15/16] bnxt_en: Fix receive ring space parameters when XDP is active The MTU setting at the time an XDP multi-buffer is attached determines whether the aggregation ring will be used and the rx_skb_func handler. This is done in bnxt_set_rx_skb_mode(). If the MTU is later changed, the aggregation ring setting may need to be changed and it may become out-of-sync with the settings initially done in bnxt_set_rx_skb_mode(). This may result in random memory corruption and crashes as the HW may DMA data larger than the allocated buffer size, such as: BUG: kernel NULL pointer dereference, address: 00000000000003c0 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 17 PID: 0 Comm: swapper/17 Kdump: loaded Tainted: G S OE 6.1.0-226bf9805506 #1 Hardware name: Wiwynn Delta Lake PVT BZA.02601.0150/Delta Lake-Class1, BIOS F0E_3A12 08/26/2021 RIP: 0010:bnxt_rx_pkt+0xe97/0x1ae0 [bnxt_en] Code: 8b 95 70 ff ff ff 4c 8b 9d 48 ff ff ff 66 41 89 87 b4 00 00 00 e9 0b f7 ff ff 0f b7 43 0a 49 8b 95 a8 04 00 00 25 ff 0f 00 00 <0f> b7 14 42 48 c1 e2 06 49 03 95 a0 04 00 00 0f b6 42 33f RSP: 0018:ffffa19f40cc0d18 EFLAGS: 00010202 RAX: 00000000000001e0 RBX: ffff8e2c805c6100 RCX: 00000000000007ff RDX: 0000000000000000 RSI: ffff8e2c271ab990 RDI: ffff8e2c84f12380 RBP: ffffa19f40cc0e48 R08: 000000000001000d R09: 974ea2fcddfa4cbf R10: 0000000000000000 R11: ffffa19f40cc0ff8 R12: ffff8e2c94b58980 R13: ffff8e2c952d6600 R14: 0000000000000016 R15: ffff8e2c271ab990 FS: 0000000000000000(0000) GS:ffff8e3b3f840000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000003c0 CR3: 0000000e8580a004 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __bnxt_poll_work+0x1c2/0x3e0 [bnxt_en] To address the issue, we now call bnxt_set_rx_skb_mode() within bnxt_change_mtu() to properly set the AGG rings configuration and update rx_skb_func based on the new MTU value. Additionally, BNXT_FLAG_NO_AGG_RINGS is cleared at the beginning of bnxt_set_rx_skb_mode() to make sure it gets set or cleared based on the current MTU. Fixes: 08450ea98ae9 ("bnxt_en: Fix max_mtu setting for multi-buf XDP") Co-developed-by: Somnath Kotur Signed-off-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3bee485b50f0..b7541156fe46 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4661,7 +4661,7 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) struct net_device *dev = bp->dev; if (page_mode) { - bp->flags &= ~BNXT_FLAG_AGG_RINGS; + bp->flags &= ~(BNXT_FLAG_AGG_RINGS | BNXT_FLAG_NO_AGG_RINGS); bp->flags |= BNXT_FLAG_RX_PAGE_MODE; if (bp->xdp_prog->aux->xdp_has_frags) @@ -14740,6 +14740,14 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) bnxt_close_nic(bp, true, false); WRITE_ONCE(dev->mtu, new_mtu); + + /* MTU change may change the AGG ring settings if an XDP multi-buffer + * program is attached. We need to set the AGG rings settings and + * rx_skb_func accordingly. + */ + if (READ_ONCE(bp->xdp_prog)) + bnxt_set_rx_skb_mode(bp, true); + bnxt_set_ring_params(bp); if (netif_running(dev)) -- 2.51.0 From 1e9614cd956268e10a669c0593e7e54d03d0c087 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 22 Nov 2024 14:45:45 -0800 Subject: [PATCH 16/16] bnxt_en: Refactor bnxt_ptp_init() Instead of passing the 2nd parameter phc_cfg to bnxt_ptp_init(). Store it in bp->ptp_cfg so that the caller doesn't need to know what the value should be. In the next patch, we'll need to call bnxt_ptp_init() in bnxt_resume() and this will make it easier. Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b7541156fe46..9c4b8ea22bf9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9296,7 +9296,6 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) struct hwrm_port_mac_ptp_qcfg_output *resp; struct hwrm_port_mac_ptp_qcfg_input *req; struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - bool phc_cfg; u8 flags; int rc; @@ -9343,8 +9342,9 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) rc = -ENODEV; goto exit; } - phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; - rc = bnxt_ptp_init(bp, phc_cfg); + ptp->rtc_configured = + (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; + rc = bnxt_ptp_init(bp); if (rc) netdev_warn(bp->dev, "PTP initialization failed.\n"); exit: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 075ccd589845..2d4e19b96ee7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1038,7 +1038,7 @@ static void bnxt_ptp_free(struct bnxt *bp) } } -int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) +int bnxt_ptp_init(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; int rc; @@ -1061,7 +1061,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) if (BNXT_PTP_USE_RTC(bp)) { bnxt_ptp_timecounter_init(bp, false); - rc = bnxt_ptp_init_rtc(bp, phc_cfg); + rc = bnxt_ptp_init_rtc(bp, ptp->rtc_configured); if (rc) goto out; } else { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index c7851f8c971c..a95f05e9c579 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -135,6 +135,7 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; + u8 rtc_configured:1; int rx_filter; u32 tstamp_filters; @@ -168,7 +169,7 @@ void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, struct tx_ts_cmp *tscmp); void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); -int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); +int bnxt_ptp_init(struct bnxt *bp); void bnxt_ptp_clear(struct bnxt *bp); static inline u64 bnxt_timecounter_cyc2time(struct bnxt_ptp_cfg *ptp, u64 ts) { -- 2.51.0