From 20d9b73217c6109ae69679ebb28ccfaf87e55c14 Mon Sep 17 00:00:00 2001 From: Sumanth Gavini Date: Fri, 16 May 2025 18:59:37 -0700 Subject: [PATCH 01/16] selftests: nci: Fix "Electrnoics" to "Electronics" Fix misspelling reported by codespell Signed-off-by: Sumanth Gavini Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250517020003.1159640-1-sumanth.gavini@yahoo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/nci/nci_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 1562aa7d60b0..6dec59d64083 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2021 Samsung Electrnoics + * Copyright (C) 2021 Samsung Electronics * Bongsu Jeon * * Test code for nci -- 2.51.0 From 6a7e8b5d632834f2722cdabf81bd0b9eef3a214d Mon Sep 17 00:00:00 2001 From: Sumanth Gavini Date: Fri, 16 May 2025 20:25:33 -0700 Subject: [PATCH 02/16] selftests: net: Fix spellings Fix "withouth" to "without" Fix "instaces" to "instances" Signed-off-by: Sumanth Gavini Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20250517032535.1176351-1-sumanth.gavini@yahoo.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/srv6_end_flavors_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh index 50563443a4ad..318487eda671 100755 --- a/tools/testing/selftests/net/srv6_end_flavors_test.sh +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -399,7 +399,7 @@ __get_srv6_rtcfg_id() # Given the description of a router as an input, the function returns # the token which represents the operation (e.g. End behavior with or -# withouth flavors) configured for the node. +# without flavors) configured for the node. # Note that when the operation represents an End behavior with a list of # flavors, the output is the ordered version of that list. @@ -480,7 +480,7 @@ setup_rt_local_sids() # all SIDs start with a common locator. Routes and SRv6 Endpoint - # behavior instaces are grouped together in the 'localsid' table. + # behavior instances are grouped together in the 'localsid' table. ip -netns "${nsname}" -6 rule \ add to "${LOCATOR_SERVICE}::/16" \ lookup "${LOCALSID_TABLE_ID}" prio 999 -- 2.51.0 From 4c2bd7913f52b1e5c978edf56cdef39c30a1f603 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 17 May 2025 13:08:10 -0700 Subject: [PATCH 03/16] net: let lockdep compare instance locks AFAIU always returning -1 from lockdep's compare function basically disables checking of dependencies between given locks. Try to be a little more precise about what guarantees that instance locks won't deadlock. Right now we only nest them under protection of rtnl_lock. Mostly in unregister_netdevice_many() and dev_close_many(). Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250517200810.466531-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_lock.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 2a753813f849..c345afecd4c5 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -99,16 +99,15 @@ static inline void netdev_unlock_ops_compat(struct net_device *dev) static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { - /* Only lower devices currently grab the instance lock, so no - * real ordering issues can occur. In the near future, only - * hardware devices will grab instance lock which also does not - * involve any ordering. Suppress lockdep ordering warnings - * until (if) we start grabbing instance lock on pure SW - * devices (bond/team/veth/etc). - */ if (a == b) return 0; - return -1; + + /* Allow locking multiple devices only under rtnl_lock, + * the exact order doesn't matter. + * Note that upper devices don't lock their ops, so nesting + * mostly happens in batched device removal for now. + */ + return lockdep_rtnl_is_held() ? -1 : 1; } #define netdev_lockdep_set_classes(dev) \ -- 2.51.0 From 3f1716ee0f6c63795e6d225e3f5ec3825cd2bd57 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:34:32 +0200 Subject: [PATCH 04/16] net: phy: fixed_phy: remove irq argument from fixed_phy_add All callers pass PHY_POLL, therefore remove irq argument from fixed_phy_add(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Acked-by: Greg Ungerer Link: https://patch.msgid.link/b3b9b3bc-c310-4a54-b376-c909c83575de@gmail.com Signed-off-by: Jakub Kicinski --- arch/m68k/coldfire/m5272.c | 2 +- arch/mips/bcm47xx/setup.c | 2 +- drivers/net/phy/fixed_phy.c | 5 ++--- include/linux/phy_fixed.h | 5 ++--- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c index 734dab657fe3..5b70dfdab368 100644 --- a/arch/m68k/coldfire/m5272.c +++ b/arch/m68k/coldfire/m5272.c @@ -119,7 +119,7 @@ static struct fixed_phy_status nettel_fixed_phy_status __initdata = { static int __init init_BSP(void) { m5272_uarts_init(); - fixed_phy_add(PHY_POLL, 0, &nettel_fixed_phy_status); + fixed_phy_add(0, &nettel_fixed_phy_status); clkdev_add_table(m5272_clk_lookup, ARRAY_SIZE(m5272_clk_lookup)); return 0; } diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 247be207f293..de426a474b5b 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -282,7 +282,7 @@ static int __init bcm47xx_register_bus_complete(void) bcm47xx_leds_register(); bcm47xx_workarounds(); - fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status); + fixed_phy_add(0, &bcm47xx_fixed_phy_status); return 0; } device_initcall(bcm47xx_register_bus_complete); diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c91adf2464c5..34a71f223f0f 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -160,10 +160,9 @@ static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr, return 0; } -int fixed_phy_add(unsigned int irq, int phy_addr, - struct fixed_phy_status *status) +int fixed_phy_add(int phy_addr, struct fixed_phy_status *status) { - return fixed_phy_add_gpiod(irq, phy_addr, status, NULL); + return fixed_phy_add_gpiod(PHY_POLL, phy_addr, status, NULL); } EXPORT_SYMBOL_GPL(fixed_phy_add); diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 3392c09b5d24..316bb4deda37 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,8 +17,7 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -extern int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status); +int fixed_phy_add(int phy_id, struct fixed_phy_status *status); extern struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, struct device_node *np); @@ -28,7 +27,7 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline int fixed_phy_add(unsigned int irq, int phy_id, +static inline int fixed_phy_add(int phy_id, struct fixed_phy_status *status) { return -ENODEV; -- 2.51.0 From d23b4af5df3900fb0b4e1a05cb8119dd1c395519 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:35:56 +0200 Subject: [PATCH 05/16] net: phy: fixed_phy: remove irq argument from fixed_phy_register All callers pass PHY_POLL, therefore remove irq argument from fixed_phy_register(). Note: I keep the irq argument in fixed_phy_add_gpiod() for now, for the case that somebody may want to use a GPIO interrupt in the future, by e.g. adding a call to fwnode_irq_get() to fixed_phy_get_gpiod(). Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/31cdb232-a5e9-4997-a285-cb9a7d208124@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop.c | 2 +- drivers/net/ethernet/broadcom/bgmac.c | 2 +- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/mdio/of_mdio.c | 2 +- drivers/net/phy/fixed_phy.c | 5 ++--- drivers/net/usb/lan78xx.c | 2 +- include/linux/phy_fixed.h | 11 +++++------ 8 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index adbab544c60f..d8a35f25a4c8 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -405,7 +405,7 @@ static int __init dsa_loop_init(void) unsigned int i, ret; for (i = 0; i < NUM_FIXED_PHYS; i++) - phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL); + phydevs[i] = fixed_phy_register(&status, NULL); ret = mdio_driver_register(&dsa_loop_drv); if (ret) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index a461ec612e95..3e9c57196a39 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1446,7 +1446,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac) struct phy_device *phy_dev; int err; - phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); + phy_dev = fixed_phy_register(&fphy_status, NULL); if (IS_ERR(phy_dev)) { dev_err(bgmac->dev, "Failed to register fixed PHY device\n"); return PTR_ERR(phy_dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 71c619d2bea5..b6437ba7a2eb 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -625,7 +625,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) .asym_pause = 0, }; - phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); + phydev = fixed_phy_register(&fphy_status, NULL); if (IS_ERR(phydev)) { dev_err(kdev, "failed to register fixed PHY device\n"); return PTR_ERR(phydev); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 17ec35e75a65..a98d5af3f9e3 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1906,7 +1906,7 @@ static int ftgmac100_probe(struct platform_device *pdev) goto err_phy_connect; } - phydev = fixed_phy_register(PHY_POLL, &ncsi_phy_status, np); + phydev = fixed_phy_register(&ncsi_phy_status, np); if (IS_ERR(phydev)) { dev_err(&pdev->dev, "failed to register fixed PHY device\n"); err = PTR_ERR(phydev); diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 2f4fc664d2e1..98f667b121f7 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -458,7 +458,7 @@ int of_phy_register_fixed_link(struct device_node *np) return -ENODEV; register_phy: - return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, np)); + return PTR_ERR_OR_ZERO(fixed_phy_register(&status, np)); } EXPORT_SYMBOL(of_phy_register_fixed_link); diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 34a71f223f0f..ea002a137a7d 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -222,8 +222,7 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) } #endif -struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, +struct phy_device *fixed_phy_register(struct fixed_phy_status *status, struct device_node *np) { struct fixed_mdio_bus *fmb = &platform_fmb; @@ -245,7 +244,7 @@ struct phy_device *fixed_phy_register(unsigned int irq, if (phy_addr < 0) return ERR_PTR(phy_addr); - ret = fixed_phy_add_gpiod(irq, phy_addr, status, gpiod); + ret = fixed_phy_add_gpiod(PHY_POLL, phy_addr, status, gpiod); if (ret < 0) { ida_free(&phy_fixed_ida, phy_addr); return ERR_PTR(ret); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 58e3589e3b89..480bbc0f2d8f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2640,7 +2640,7 @@ static struct phy_device *lan78xx_register_fixed_phy(struct lan78xx_net *dev) netdev_info(dev->net, "No PHY found on LAN7801 – registering fixed PHY (e.g. EVB-KSZ9897-1)\n"); - return fixed_phy_register(PHY_POLL, &fphy_status, NULL); + return fixed_phy_register(&fphy_status, NULL); } /** diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 316bb4deda37..634149a73c2a 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -18,9 +18,8 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); int fixed_phy_add(int phy_id, struct fixed_phy_status *status); -extern struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np); +struct phy_device *fixed_phy_register(struct fixed_phy_status *status, + struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, @@ -32,9 +31,9 @@ static inline int fixed_phy_add(int phy_id, { return -ENODEV; } -static inline struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np) +static inline struct phy_device * +fixed_phy_register(struct fixed_phy_status *status, + struct device_node *np) { return ERR_PTR(-ENODEV); } -- 2.51.0 From 4ba1c5bb4811f560a86697311cb4e9741e047a5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 17 May 2025 22:37:29 +0200 Subject: [PATCH 06/16] net: phy: fixed_phy: constify status argument where possible Constify the passed struct fixed_phy_status *status where possible. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/d1764b62-8538-408b-a4e3-b63715481a38@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 6 +++--- include/linux/phy_fixed.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index ea002a137a7d..033656d574b8 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -131,7 +131,7 @@ int fixed_phy_set_link_update(struct phy_device *phydev, EXPORT_SYMBOL_GPL(fixed_phy_set_link_update); static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr, - struct fixed_phy_status *status, + const struct fixed_phy_status *status, struct gpio_desc *gpiod) { int ret; @@ -160,7 +160,7 @@ static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr, return 0; } -int fixed_phy_add(int phy_addr, struct fixed_phy_status *status) +int fixed_phy_add(int phy_addr, const struct fixed_phy_status *status) { return fixed_phy_add_gpiod(PHY_POLL, phy_addr, status, NULL); } @@ -222,7 +222,7 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) } #endif -struct phy_device *fixed_phy_register(struct fixed_phy_status *status, +struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) { struct fixed_mdio_bus *fmb = &platform_fmb; diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 634149a73c2a..5399b9e41e35 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -17,8 +17,8 @@ struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -int fixed_phy_add(int phy_id, struct fixed_phy_status *status); -struct phy_device *fixed_phy_register(struct fixed_phy_status *status, +int fixed_phy_add(int phy_id, const struct fixed_phy_status *status); +struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); @@ -27,12 +27,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev, struct fixed_phy_status *)); #else static inline int fixed_phy_add(int phy_id, - struct fixed_phy_status *status) + const struct fixed_phy_status *status) { return -ENODEV; } static inline struct phy_device * -fixed_phy_register(struct fixed_phy_status *status, +fixed_phy_register(const struct fixed_phy_status *status, struct device_node *np) { return ERR_PTR(-ENODEV); -- 2.51.0 From 59aa6e3072aa7e51e9040e8c342d0c0825c5f48f Mon Sep 17 00:00:00 2001 From: Zak Kemble Date: Mon, 19 May 2025 12:32:55 +0100 Subject: [PATCH 07/16] net: bcmgenet: switch to use 64bit statistics Update the driver to use ndo_get_stats64, rtnl_link_stats64 and u64_stats_t counters for statistics. Signed-off-by: Zak Kemble Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250519113257.1031-2-zakkemble@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/genet/bcmgenet.c | 246 ++++++++++++------ .../net/ethernet/broadcom/genet/bcmgenet.h | 29 ++- 2 files changed, 187 insertions(+), 88 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 73d78dcb774d..101ba6b2f70f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -969,12 +969,13 @@ static int bcmgenet_set_pauseparam(struct net_device *dev, /* standard ethtool support functions. */ enum bcmgenet_stat_type { - BCMGENET_STAT_NETDEV = -1, + BCMGENET_STAT_RTNL = -1, BCMGENET_STAT_MIB_RX, BCMGENET_STAT_MIB_TX, BCMGENET_STAT_RUNT, BCMGENET_STAT_MISC, BCMGENET_STAT_SOFT, + BCMGENET_STAT_SOFT64, }; struct bcmgenet_stats { @@ -984,13 +985,15 @@ struct bcmgenet_stats { enum bcmgenet_stat_type type; /* reg offset from UMAC base for misc counters */ u16 reg_offset; + /* sync for u64 stats counters */ + int syncp_offset; }; -#define STAT_NETDEV(m) { \ +#define STAT_RTNL(m) { \ .stat_string = __stringify(m), \ - .stat_sizeof = sizeof(((struct net_device_stats *)0)->m), \ - .stat_offset = offsetof(struct net_device_stats, m), \ - .type = BCMGENET_STAT_NETDEV, \ + .stat_sizeof = sizeof(((struct rtnl_link_stats64 *)0)->m), \ + .stat_offset = offsetof(struct rtnl_link_stats64, m), \ + .type = BCMGENET_STAT_RTNL, \ } #define STAT_GENET_MIB(str, m, _type) { \ @@ -1000,6 +1003,14 @@ struct bcmgenet_stats { .type = _type, \ } +#define STAT_GENET_SOFT_MIB64(str, s, m) { \ + .stat_string = str, \ + .stat_sizeof = sizeof(((struct bcmgenet_priv *)0)->s.m), \ + .stat_offset = offsetof(struct bcmgenet_priv, s.m), \ + .type = BCMGENET_STAT_SOFT64, \ + .syncp_offset = offsetof(struct bcmgenet_priv, s.syncp), \ +} + #define STAT_GENET_MIB_RX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_RX) #define STAT_GENET_MIB_TX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_TX) #define STAT_GENET_RUNT(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_RUNT) @@ -1014,18 +1025,18 @@ struct bcmgenet_stats { } #define STAT_GENET_Q(num) \ - STAT_GENET_SOFT_MIB("txq" __stringify(num) "_packets", \ - tx_rings[num].packets), \ - STAT_GENET_SOFT_MIB("txq" __stringify(num) "_bytes", \ - tx_rings[num].bytes), \ - STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_bytes", \ - rx_rings[num].bytes), \ - STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_packets", \ - rx_rings[num].packets), \ - STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_errors", \ - rx_rings[num].errors), \ - STAT_GENET_SOFT_MIB("rxq" __stringify(num) "_dropped", \ - rx_rings[num].dropped) + STAT_GENET_SOFT_MIB64("txq" __stringify(num) "_packets", \ + tx_rings[num].stats64, packets), \ + STAT_GENET_SOFT_MIB64("txq" __stringify(num) "_bytes", \ + tx_rings[num].stats64, bytes), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_bytes", \ + rx_rings[num].stats64, bytes), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_packets", \ + rx_rings[num].stats64, packets), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_errors", \ + rx_rings[num].stats64, errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_dropped", \ + rx_rings[num].stats64, dropped) /* There is a 0xC gap between the end of RX and beginning of TX stats and then * between the end of TX stats and the beginning of the RX RUNT @@ -1037,15 +1048,15 @@ struct bcmgenet_stats { */ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { /* general stats */ - STAT_NETDEV(rx_packets), - STAT_NETDEV(tx_packets), - STAT_NETDEV(rx_bytes), - STAT_NETDEV(tx_bytes), - STAT_NETDEV(rx_errors), - STAT_NETDEV(tx_errors), - STAT_NETDEV(rx_dropped), - STAT_NETDEV(tx_dropped), - STAT_NETDEV(multicast), + STAT_RTNL(rx_packets), + STAT_RTNL(tx_packets), + STAT_RTNL(rx_bytes), + STAT_RTNL(tx_bytes), + STAT_RTNL(rx_errors), + STAT_RTNL(tx_errors), + STAT_RTNL(rx_dropped), + STAT_RTNL(tx_dropped), + STAT_RTNL(multicast), /* UniMAC RSV counters */ STAT_GENET_MIB_RX("rx_64_octets", mib.rx.pkt_cnt.cnt_64), STAT_GENET_MIB_RX("rx_65_127_oct", mib.rx.pkt_cnt.cnt_127), @@ -1133,6 +1144,20 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { #define BCMGENET_STATS_LEN ARRAY_SIZE(bcmgenet_gstrings_stats) +#define BCMGENET_STATS64_ADD(stats, m, v) \ + do { \ + u64_stats_update_begin(&stats->syncp); \ + u64_stats_add(&stats->m, v); \ + u64_stats_update_end(&stats->syncp); \ + } while (0) + +#define BCMGENET_STATS64_INC(stats, m) \ + do { \ + u64_stats_update_begin(&stats->syncp); \ + u64_stats_inc(&stats->m); \ + u64_stats_update_end(&stats->syncp); \ + } while (0) + static void bcmgenet_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -1216,8 +1241,9 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) s = &bcmgenet_gstrings_stats[i]; switch (s->type) { - case BCMGENET_STAT_NETDEV: + case BCMGENET_STAT_RTNL: case BCMGENET_STAT_SOFT: + case BCMGENET_STAT_SOFT64: continue; case BCMGENET_STAT_RUNT: offset += BCMGENET_STAT_OFFSET; @@ -1255,28 +1281,40 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, u64 *data) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct rtnl_link_stats64 stats64; + struct u64_stats_sync *syncp; + unsigned int start; int i; if (netif_running(dev)) bcmgenet_update_mib_counters(priv); - dev->netdev_ops->ndo_get_stats(dev); + dev_get_stats(dev, &stats64); for (i = 0; i < BCMGENET_STATS_LEN; i++) { const struct bcmgenet_stats *s; char *p; s = &bcmgenet_gstrings_stats[i]; - if (s->type == BCMGENET_STAT_NETDEV) - p = (char *)&dev->stats; - else - p = (char *)priv; - p += s->stat_offset; - if (sizeof(unsigned long) != sizeof(u32) && - s->stat_sizeof == sizeof(unsigned long)) - data[i] = *(unsigned long *)p; - else - data[i] = *(u32 *)p; + p = (char *)priv; + + if (s->type == BCMGENET_STAT_SOFT64) { + syncp = (struct u64_stats_sync *)(p + s->syncp_offset); + do { + start = u64_stats_fetch_begin(syncp); + data[i] = u64_stats_read((u64_stats_t *)(p + s->stat_offset)); + } while (u64_stats_fetch_retry(syncp, start)); + } else { + if (s->type == BCMGENET_STAT_RTNL) + p = (char *)&stats64; + + p += s->stat_offset; + if (sizeof(unsigned long) != sizeof(u32) && + s->stat_sizeof == sizeof(unsigned long)) + data[i] = *(unsigned long *)p; + else + data[i] = *(u32 *)p; + } } } @@ -1856,6 +1894,7 @@ static struct sk_buff *bcmgenet_free_rx_cb(struct device *dev, static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { + struct bcmgenet_tx_stats64 *stats = &ring->stats64; struct bcmgenet_priv *priv = netdev_priv(dev); unsigned int txbds_processed = 0; unsigned int bytes_compl = 0; @@ -1896,8 +1935,10 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, ring->free_bds += txbds_processed; ring->c_index = c_index; - ring->packets += pkts_compl; - ring->bytes += bytes_compl; + u64_stats_update_begin(&stats->syncp); + u64_stats_add(&stats->packets, pkts_compl); + u64_stats_add(&stats->bytes, bytes_compl); + u64_stats_update_end(&stats->syncp); netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->index), pkts_compl, bytes_compl); @@ -1983,8 +2024,10 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) * the transmit checksum offsets in the descriptors */ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev, - struct sk_buff *skb) + struct sk_buff *skb, + struct bcmgenet_tx_ring *ring) { + struct bcmgenet_tx_stats64 *stats = &ring->stats64; struct bcmgenet_priv *priv = netdev_priv(dev); struct status_64 *status = NULL; struct sk_buff *new_skb; @@ -2001,7 +2044,7 @@ static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev, if (!new_skb) { dev_kfree_skb_any(skb); priv->mib.tx_realloc_tsb_failed++; - dev->stats.tx_dropped++; + BCMGENET_STATS64_INC(stats, dropped); return NULL; } dev_consume_skb_any(skb); @@ -2089,7 +2132,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) GENET_CB(skb)->bytes_sent = skb->len; /* add the Transmit Status Block */ - skb = bcmgenet_add_tsb(dev, skb); + skb = bcmgenet_add_tsb(dev, skb, ring); if (!skb) { ret = NETDEV_TX_OK; goto out; @@ -2231,6 +2274,7 @@ static struct sk_buff *bcmgenet_rx_refill(struct bcmgenet_priv *priv, static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, unsigned int budget) { + struct bcmgenet_rx_stats64 *stats = &ring->stats64; struct bcmgenet_priv *priv = ring->priv; struct net_device *dev = priv->dev; struct enet_cb *cb; @@ -2253,7 +2297,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, DMA_P_INDEX_DISCARD_CNT_MASK; if (discards > ring->old_discards) { discards = discards - ring->old_discards; - ring->errors += discards; + BCMGENET_STATS64_ADD(stats, errors, discards); ring->old_discards += discards; /* Clear HW register when we reach 75% of maximum 0xFFFF */ @@ -2279,7 +2323,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, skb = bcmgenet_rx_refill(priv, cb); if (unlikely(!skb)) { - ring->dropped++; + BCMGENET_STATS64_INC(stats, dropped); goto next; } @@ -2306,8 +2350,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(len > RX_BUF_LENGTH)) { netif_err(priv, rx_status, dev, "oversized packet\n"); - dev->stats.rx_length_errors++; - dev->stats.rx_errors++; + BCMGENET_STATS64_INC(stats, length_errors); dev_kfree_skb_any(skb); goto next; } @@ -2315,7 +2358,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) { netif_err(priv, rx_status, dev, "dropping fragmented packet!\n"); - ring->errors++; + BCMGENET_STATS64_INC(stats, errors); dev_kfree_skb_any(skb); goto next; } @@ -2328,15 +2371,22 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, DMA_RX_RXER))) { netif_err(priv, rx_status, dev, "dma_flag=0x%x\n", (unsigned int)dma_flag); + u64_stats_update_begin(&stats->syncp); if (dma_flag & DMA_RX_CRC_ERROR) - dev->stats.rx_crc_errors++; + u64_stats_inc(&stats->crc_errors); if (dma_flag & DMA_RX_OV) - dev->stats.rx_over_errors++; + u64_stats_inc(&stats->over_errors); if (dma_flag & DMA_RX_NO) - dev->stats.rx_frame_errors++; + u64_stats_inc(&stats->frame_errors); if (dma_flag & DMA_RX_LG) - dev->stats.rx_length_errors++; - dev->stats.rx_errors++; + u64_stats_inc(&stats->length_errors); + if ((dma_flag & (DMA_RX_CRC_ERROR | + DMA_RX_OV | + DMA_RX_NO | + DMA_RX_LG | + DMA_RX_RXER)) == DMA_RX_RXER) + u64_stats_inc(&stats->errors); + u64_stats_update_end(&stats->syncp); dev_kfree_skb_any(skb); goto next; } /* error packet */ @@ -2356,10 +2406,13 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, /*Finish setting up the received SKB and send it to the kernel*/ skb->protocol = eth_type_trans(skb, priv->dev); - ring->packets++; - ring->bytes += len; + + u64_stats_update_begin(&stats->syncp); + u64_stats_inc(&stats->packets); + u64_stats_add(&stats->bytes, len); if (dma_flag & DMA_RX_MULT) - dev->stats.multicast++; + u64_stats_inc(&stats->multicast); + u64_stats_update_end(&stats->syncp); /* Notify kernel */ napi_gro_receive(&ring->napi, skb); @@ -3420,7 +3473,7 @@ static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue) netif_trans_update(dev); - dev->stats.tx_errors++; + BCMGENET_STATS64_INC((&priv->tx_rings[txqueue].stats64), errors); netif_tx_wake_all_queues(dev); } @@ -3509,39 +3562,68 @@ static int bcmgenet_set_mac_addr(struct net_device *dev, void *p) return 0; } -static struct net_device_stats *bcmgenet_get_stats(struct net_device *dev) +static void bcmgenet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct bcmgenet_priv *priv = netdev_priv(dev); - unsigned long tx_bytes = 0, tx_packets = 0; - unsigned long rx_bytes = 0, rx_packets = 0; - unsigned long rx_errors = 0, rx_dropped = 0; - struct bcmgenet_tx_ring *tx_ring; - struct bcmgenet_rx_ring *rx_ring; + struct bcmgenet_tx_stats64 *tx_stats; + struct bcmgenet_rx_stats64 *rx_stats; + u64 rx_length_errors, rx_over_errors; + u64 rx_crc_errors, rx_frame_errors; + u64 tx_errors, tx_dropped; + u64 rx_errors, rx_dropped; + u64 tx_bytes, tx_packets; + u64 rx_bytes, rx_packets; + unsigned int start; unsigned int q; + u64 multicast; for (q = 0; q <= priv->hw_params->tx_queues; q++) { - tx_ring = &priv->tx_rings[q]; - tx_bytes += tx_ring->bytes; - tx_packets += tx_ring->packets; + tx_stats = &priv->tx_rings[q].stats64; + do { + start = u64_stats_fetch_begin(&tx_stats->syncp); + tx_bytes = u64_stats_read(&tx_stats->bytes); + tx_packets = u64_stats_read(&tx_stats->packets); + tx_errors = u64_stats_read(&tx_stats->errors); + tx_dropped = u64_stats_read(&tx_stats->dropped); + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + stats->tx_bytes += tx_bytes; + stats->tx_packets += tx_packets; + stats->tx_errors += tx_errors; + stats->tx_dropped += tx_dropped; } for (q = 0; q <= priv->hw_params->rx_queues; q++) { - rx_ring = &priv->rx_rings[q]; - - rx_bytes += rx_ring->bytes; - rx_packets += rx_ring->packets; - rx_errors += rx_ring->errors; - rx_dropped += rx_ring->dropped; + rx_stats = &priv->rx_rings[q].stats64; + do { + start = u64_stats_fetch_begin(&rx_stats->syncp); + rx_bytes = u64_stats_read(&rx_stats->bytes); + rx_packets = u64_stats_read(&rx_stats->packets); + rx_errors = u64_stats_read(&rx_stats->errors); + rx_dropped = u64_stats_read(&rx_stats->dropped); + rx_length_errors = u64_stats_read(&rx_stats->length_errors); + rx_over_errors = u64_stats_read(&rx_stats->over_errors); + rx_crc_errors = u64_stats_read(&rx_stats->crc_errors); + rx_frame_errors = u64_stats_read(&rx_stats->frame_errors); + multicast = u64_stats_read(&rx_stats->multicast); + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + rx_errors += rx_length_errors; + rx_errors += rx_crc_errors; + rx_errors += rx_frame_errors; + + stats->rx_bytes += rx_bytes; + stats->rx_packets += rx_packets; + stats->rx_errors += rx_errors; + stats->rx_dropped += rx_dropped; + stats->rx_missed_errors += rx_errors; + stats->rx_length_errors += rx_length_errors; + stats->rx_over_errors += rx_over_errors; + stats->rx_crc_errors += rx_crc_errors; + stats->rx_frame_errors += rx_frame_errors; + stats->multicast += multicast; } - - dev->stats.tx_bytes = tx_bytes; - dev->stats.tx_packets = tx_packets; - dev->stats.rx_bytes = rx_bytes; - dev->stats.rx_packets = rx_packets; - dev->stats.rx_errors = rx_errors; - dev->stats.rx_missed_errors = rx_errors; - dev->stats.rx_dropped = rx_dropped; - return &dev->stats; } static int bcmgenet_change_carrier(struct net_device *dev, bool new_carrier) @@ -3569,7 +3651,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = { .ndo_set_mac_address = bcmgenet_set_mac_addr, .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = bcmgenet_set_features, - .ndo_get_stats = bcmgenet_get_stats, + .ndo_get_stats64 = bcmgenet_get_stats64, .ndo_change_carrier = bcmgenet_change_carrier, }; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 10c631bbe964..27d4fcecca8b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -155,6 +155,27 @@ struct bcmgenet_mib_counters { u32 tx_realloc_tsb_failed; }; +struct bcmgenet_tx_stats64 { + struct u64_stats_sync syncp; + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t errors; + u64_stats_t dropped; +}; + +struct bcmgenet_rx_stats64 { + struct u64_stats_sync syncp; + u64_stats_t bytes; + u64_stats_t packets; + u64_stats_t errors; + u64_stats_t dropped; + u64_stats_t multicast; + u64_stats_t length_errors; + u64_stats_t over_errors; + u64_stats_t crc_errors; + u64_stats_t frame_errors; +}; + #define UMAC_MIB_START 0x400 #define UMAC_MDIO_CMD 0x614 @@ -515,8 +536,7 @@ struct bcmgenet_skb_cb { struct bcmgenet_tx_ring { spinlock_t lock; /* ring lock */ struct napi_struct napi; /* NAPI per tx queue */ - unsigned long packets; - unsigned long bytes; + struct bcmgenet_tx_stats64 stats64; unsigned int index; /* ring index */ struct enet_cb *cbs; /* tx ring buffer control block*/ unsigned int size; /* size of each tx ring */ @@ -540,10 +560,7 @@ struct bcmgenet_net_dim { struct bcmgenet_rx_ring { struct napi_struct napi; /* Rx NAPI struct */ - unsigned long bytes; - unsigned long packets; - unsigned long errors; - unsigned long dropped; + struct bcmgenet_rx_stats64 stats64; unsigned int index; /* Rx ring index */ struct enet_cb *cbs; /* Rx ring buffer control block */ unsigned int size; /* Rx ring size */ -- 2.51.0 From e985b97ac1b13e9653b326f62eab1d44cd34e203 Mon Sep 17 00:00:00 2001 From: Zak Kemble Date: Mon, 19 May 2025 12:32:56 +0100 Subject: [PATCH 08/16] net: bcmgenet: count hw discarded packets in missed stat Hardware discarded packets are now counted in their own missed stat instead of being lumped in with general errors. Signed-off-by: Zak Kemble Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250519113257.1031-3-zakkemble@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++-- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 101ba6b2f70f..578db62304e4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2297,7 +2297,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, DMA_P_INDEX_DISCARD_CNT_MASK; if (discards > ring->old_discards) { discards = discards - ring->old_discards; - BCMGENET_STATS64_ADD(stats, errors, discards); + BCMGENET_STATS64_ADD(stats, missed, discards); ring->old_discards += discards; /* Clear HW register when we reach 75% of maximum 0xFFFF */ @@ -3577,6 +3577,7 @@ static void bcmgenet_get_stats64(struct net_device *dev, unsigned int start; unsigned int q; u64 multicast; + u64 rx_missed; for (q = 0; q <= priv->hw_params->tx_queues; q++) { tx_stats = &priv->tx_rings[q].stats64; @@ -3602,6 +3603,7 @@ static void bcmgenet_get_stats64(struct net_device *dev, rx_packets = u64_stats_read(&rx_stats->packets); rx_errors = u64_stats_read(&rx_stats->errors); rx_dropped = u64_stats_read(&rx_stats->dropped); + rx_missed = u64_stats_read(&rx_stats->missed); rx_length_errors = u64_stats_read(&rx_stats->length_errors); rx_over_errors = u64_stats_read(&rx_stats->over_errors); rx_crc_errors = u64_stats_read(&rx_stats->crc_errors); @@ -3617,7 +3619,7 @@ static void bcmgenet_get_stats64(struct net_device *dev, stats->rx_packets += rx_packets; stats->rx_errors += rx_errors; stats->rx_dropped += rx_dropped; - stats->rx_missed_errors += rx_errors; + stats->rx_missed_errors += rx_missed; stats->rx_length_errors += rx_length_errors; stats->rx_over_errors += rx_over_errors; stats->rx_crc_errors += rx_crc_errors; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 27d4fcecca8b..10bbb3eb8efd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -170,6 +170,7 @@ struct bcmgenet_rx_stats64 { u64_stats_t errors; u64_stats_t dropped; u64_stats_t multicast; + u64_stats_t missed; u64_stats_t length_errors; u64_stats_t over_errors; u64_stats_t crc_errors; -- 2.51.0 From bbdf9ec61053ae0e3731634905c51964d3fc43f1 Mon Sep 17 00:00:00 2001 From: Zak Kemble Date: Mon, 19 May 2025 12:32:57 +0100 Subject: [PATCH 09/16] net: bcmgenet: expose more stats in ethtool Expose more per-queue and overall stats in ethtool Signed-off-by: Zak Kemble Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20250519113257.1031-4-zakkemble@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/genet/bcmgenet.c | 35 +++++++++++++++++-- .../net/ethernet/broadcom/genet/bcmgenet.h | 2 ++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 578db62304e4..fa0077bc67b7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1029,6 +1029,10 @@ struct bcmgenet_stats { tx_rings[num].stats64, packets), \ STAT_GENET_SOFT_MIB64("txq" __stringify(num) "_bytes", \ tx_rings[num].stats64, bytes), \ + STAT_GENET_SOFT_MIB64("txq" __stringify(num) "_errors", \ + tx_rings[num].stats64, errors), \ + STAT_GENET_SOFT_MIB64("txq" __stringify(num) "_dropped", \ + tx_rings[num].stats64, dropped), \ STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_bytes", \ rx_rings[num].stats64, bytes), \ STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_packets", \ @@ -1036,7 +1040,23 @@ struct bcmgenet_stats { STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_errors", \ rx_rings[num].stats64, errors), \ STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_dropped", \ - rx_rings[num].stats64, dropped) + rx_rings[num].stats64, dropped), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_multicast", \ + rx_rings[num].stats64, multicast), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_missed", \ + rx_rings[num].stats64, missed), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_length_errors", \ + rx_rings[num].stats64, length_errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_over_errors", \ + rx_rings[num].stats64, over_errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_crc_errors", \ + rx_rings[num].stats64, crc_errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_frame_errors", \ + rx_rings[num].stats64, frame_errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_fragmented_errors", \ + rx_rings[num].stats64, fragmented_errors), \ + STAT_GENET_SOFT_MIB64("rxq" __stringify(num) "_broadcast", \ + rx_rings[num].stats64, broadcast) /* There is a 0xC gap between the end of RX and beginning of TX stats and then * between the end of TX stats and the beginning of the RX RUNT @@ -1057,6 +1077,11 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_RTNL(rx_dropped), STAT_RTNL(tx_dropped), STAT_RTNL(multicast), + STAT_RTNL(rx_missed_errors), + STAT_RTNL(rx_length_errors), + STAT_RTNL(rx_over_errors), + STAT_RTNL(rx_crc_errors), + STAT_RTNL(rx_frame_errors), /* UniMAC RSV counters */ STAT_GENET_MIB_RX("rx_64_octets", mib.rx.pkt_cnt.cnt_64), STAT_GENET_MIB_RX("rx_65_127_oct", mib.rx.pkt_cnt.cnt_127), @@ -2358,7 +2383,7 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, if (unlikely(!(dma_flag & DMA_EOP) || !(dma_flag & DMA_SOP))) { netif_err(priv, rx_status, dev, "dropping fragmented packet!\n"); - BCMGENET_STATS64_INC(stats, errors); + BCMGENET_STATS64_INC(stats, fragmented_errors); dev_kfree_skb_any(skb); goto next; } @@ -2412,6 +2437,8 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, u64_stats_add(&stats->bytes, len); if (dma_flag & DMA_RX_MULT) u64_stats_inc(&stats->multicast); + else if (dma_flag & DMA_RX_BRDCAST) + u64_stats_inc(&stats->broadcast); u64_stats_update_end(&stats->syncp); /* Notify kernel */ @@ -3569,6 +3596,7 @@ static void bcmgenet_get_stats64(struct net_device *dev, struct bcmgenet_tx_stats64 *tx_stats; struct bcmgenet_rx_stats64 *rx_stats; u64 rx_length_errors, rx_over_errors; + u64 rx_missed, rx_fragmented_errors; u64 rx_crc_errors, rx_frame_errors; u64 tx_errors, tx_dropped; u64 rx_errors, rx_dropped; @@ -3577,7 +3605,6 @@ static void bcmgenet_get_stats64(struct net_device *dev, unsigned int start; unsigned int q; u64 multicast; - u64 rx_missed; for (q = 0; q <= priv->hw_params->tx_queues; q++) { tx_stats = &priv->tx_rings[q].stats64; @@ -3608,12 +3635,14 @@ static void bcmgenet_get_stats64(struct net_device *dev, rx_over_errors = u64_stats_read(&rx_stats->over_errors); rx_crc_errors = u64_stats_read(&rx_stats->crc_errors); rx_frame_errors = u64_stats_read(&rx_stats->frame_errors); + rx_fragmented_errors = u64_stats_read(&rx_stats->fragmented_errors); multicast = u64_stats_read(&rx_stats->multicast); } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); rx_errors += rx_length_errors; rx_errors += rx_crc_errors; rx_errors += rx_frame_errors; + rx_errors += rx_fragmented_errors; stats->rx_bytes += rx_bytes; stats->rx_packets += rx_packets; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 10bbb3eb8efd..5ec3979779ec 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -170,11 +170,13 @@ struct bcmgenet_rx_stats64 { u64_stats_t errors; u64_stats_t dropped; u64_stats_t multicast; + u64_stats_t broadcast; u64_stats_t missed; u64_stats_t length_errors; u64_stats_t over_errors; u64_stats_t crc_errors; u64_stats_t frame_errors; + u64_stats_t fragmented_errors; }; #define UMAC_MIB_START 0x400 -- 2.51.0 From f1a8d107d91db7923518abd987ddcb3cd6ea6af4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:17 -0700 Subject: [PATCH 10/16] ipv6: Remove rcu_read_lock() in fib6_get_table(). Once allocated, the IPv6 routing table is not freed until netns is dismantled. fib6_get_table() uses rcu_read_lock() while iterating net->ipv6.fib_table_hash[], but it's not needed and rather confusing. Because some callers have this pattern, table = fib6_get_table(); rcu_read_lock(); /* ... use table here ... */ rcu_read_unlock(); [ See: addrconf_get_prefix_route(), ip6_route_del(), rt6_get_route_info(), rt6_get_dflt_router() ] and this looks illegal but is actually safe. Let's remove rcu_read_lock() in fib6_get_table() and pass true to the last argument of hlist_for_each_entry_rcu() to bypass the RCU check. Note that protection is not needed but RCU helper is used to avoid data-race. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1f860340690c..88770ecd2da1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -281,22 +281,20 @@ EXPORT_SYMBOL_GPL(fib6_new_table); struct fib6_table *fib6_get_table(struct net *net, u32 id) { - struct fib6_table *tb; struct hlist_head *head; - unsigned int h; + struct fib6_table *tb; - if (id == 0) + if (!id) id = RT6_TABLE_MAIN; - h = id & (FIB6_TABLE_HASHSZ - 1); - rcu_read_lock(); - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, head, tb6_hlist) { - if (tb->tb6_id == id) { - rcu_read_unlock(); + + head = &net->ipv6.fib_table_hash[id & (FIB6_TABLE_HASHSZ - 1)]; + + /* See comment in fib6_link_table(). RCU is not required, + * but rcu_dereference_raw() is used to avoid data-race. + */ + hlist_for_each_entry_rcu(tb, head, tb6_hlist, true) + if (tb->tb6_id == id) return tb; - } - } - rcu_read_unlock(); return NULL; } -- 2.51.0 From f0a56c17e64bb5e7cdb9295df2b5fc21e4949005 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:18 -0700 Subject: [PATCH 11/16] inet: Remove rtnl_is_held arg of lwtunnel_valid_encap_type(_attr)?(). Commit f130a0cc1b4f ("inet: fix lwtunnel_valid_encap_type() lock imbalance") added the rtnl_is_held argument as a temporary fix while I'm converting nexthop and IPv6 routing table to per-netns RTNL or RCU. Now all callers of lwtunnel_valid_encap_type() do not hold RTNL. Let's remove the argument. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/lwtunnel.h | 13 +++++-------- net/core/lwtunnel.c | 15 +++------------ net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/nexthop.c | 3 +-- net/ipv6/route.c | 6 ++---- 5 files changed, 13 insertions(+), 28 deletions(-) diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 39cd50300a18..c306ebe379a0 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -116,11 +116,9 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_valid_encap_type(u16 encap_type, - struct netlink_ext_ack *extack, - bool rtnl_is_held); + struct netlink_ext_ack *extack); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, - struct netlink_ext_ack *extack, - bool rtnl_is_held); + struct netlink_ext_ack *extack); int lwtunnel_build_state(struct net *net, u16 encap_type, struct nlattr *encap, unsigned int family, const void *cfg, @@ -203,15 +201,14 @@ static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, } static inline int lwtunnel_valid_encap_type(u16 encap_type, - struct netlink_ext_ack *extack, - bool rtnl_is_held) + struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel"); return -EOPNOTSUPP; } + static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len, - struct netlink_ext_ack *extack, - bool rtnl_is_held) + struct netlink_ext_ack *extack) { /* return 0 since we are not walking attr looking for * RTA_ENCAP_TYPE attribute on nexthops. diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 60f27cb4e54f..f9d76d85d04f 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -149,8 +149,7 @@ int lwtunnel_build_state(struct net *net, u16 encap_type, } EXPORT_SYMBOL_GPL(lwtunnel_build_state); -int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, - bool rtnl_is_held) +int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { const struct lwtunnel_encap_ops *ops; int ret = -EINVAL; @@ -167,12 +166,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, const char *encap_type_str = lwtunnel_encap_str(encap_type); if (encap_type_str) { - if (rtnl_is_held) - __rtnl_unlock(); request_module("rtnl-lwt-%s", encap_type_str); - if (rtnl_is_held) - rtnl_lock(); - ops = rcu_access_pointer(lwtun_encaps[encap_type]); } } @@ -186,8 +180,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack, EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, - struct netlink_ext_ack *extack, - bool rtnl_is_held) + struct netlink_ext_ack *extack) { struct rtnexthop *rtnh = (struct rtnexthop *)attr; struct nlattr *nla_entype; @@ -208,9 +201,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, } encap_type = nla_get_u16(nla_entype); - if (lwtunnel_valid_encap_type(encap_type, - extack, - rtnl_is_held) != 0) + if (lwtunnel_valid_encap_type(encap_type, extack)) return -EOPNOTSUPP; } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 57f088e5540e..fd1e1507a224 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -807,7 +807,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_MULTIPATH: err = lwtunnel_valid_encap_type_attr(nla_data(attr), nla_len(attr), - extack, false); + extack); if (err < 0) goto errout; cfg->fc_mp = nla_data(attr); @@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_ENCAP_TYPE: cfg->fc_encap_type = nla_get_u16(attr); err = lwtunnel_valid_encap_type(cfg->fc_encap_type, - extack, false); + extack); if (err < 0) goto errout; break; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 823e4a783d2b..4397e89d3123 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3180,8 +3180,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, } cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->nh_encap_type, - extack, false); + err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); if (err < 0) goto out; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 44300962230b..6baf177c529b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5172,8 +5172,7 @@ static int rtm_to_fib6_multipath_config(struct fib6_config *cfg, rtnh = rtnh_next(rtnh, &remaining); } while (rtnh_ok(rtnh, remaining)); - return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, - extack, false); + return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, extack); } static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -5310,8 +5309,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_ENCAP_TYPE]) { cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); - err = lwtunnel_valid_encap_type(cfg->fc_encap_type, - extack, false); + err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); if (err < 0) goto errout; } -- 2.51.0 From 8e5f1bb812741821e2a8ac221fba45cab6c73e43 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:19 -0700 Subject: [PATCH 12/16] ipv6: Narrow down RCU critical section in inet6_rtm_newroute(). Commit 169fd62799e8 ("ipv6: Get rid of RTNL for SIOCADDRT and RTM_NEWROUTE.") added rcu_read_lock() covering ip6_route_info_create_nh() and __ip6_ins_rt() to guarantee that nexthop and netdev will not go away. However, as reported by syzkaller [0], ip_tun_build_state() calls dst_cache_init() with GFP_KERNEL during the RCU critical section. ip6_route_info_create_nh() fetches nexthop or netdev depending on whether RTA_NH_ID is set, and struct fib6_info holds a refcount of either of them by nexthop_get() or netdev_get_by_index(). netdev_get_by_index() looks up a dev and calls dev_hold() under RCU. So, we need RCU only around nexthop_find_by_id() and nexthop_get() ( and a few more nexthop code). Let's add rcu_read_lock() there and remove rcu_read_lock() in ip6_route_add() and ip6_route_multipath_add(). Now these functions called from fib6_add() need RCU: - inet6_rt_notify() - fib6_drop_pcpu_from() (via fib6_purge_rt()) - rt6_flush_exceptions() (via fib6_purge_rt()) - ip6_ignore_linkdown() (via rt6_multipath_rebalance()) All callers of inet6_rt_notify() need RCU, so rcu_read_lock() is added there. [0]: [ BUG: Invalid wait context ] 6.15.0-rc4-syzkaller-00746-g836b313a14a3 #0 Tainted: G W ---------------------------- syz-executor234/5832 is trying to lock: ffffffff8e021688 (pcpu_alloc_mutex){+.+.}-{4:4}, at: pcpu_alloc_noprof+0x284/0x16b0 mm/percpu.c:1782 other info that might help us debug this: context-{5:5} 1 lock held by syz-executor234/5832: 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire include/linux/rcupdate.h:331 [inline] 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock include/linux/rcupdate.h:841 [inline] 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at: ip6_route_add+0x4d/0x2f0 net/ipv6/route.c:3913 stack backtrace: CPU: 0 UID: 0 PID: 5832 Comm: syz-executor234 Tainted: G W 6.15.0-rc4-syzkaller-00746-g836b313a14a3 #0 PREEMPT(full) Tainted: [W]=WARN Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/29/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_lock_invalid_wait_context kernel/locking/lockdep.c:4831 [inline] check_wait_context kernel/locking/lockdep.c:4903 [inline] __lock_acquire+0xbcf/0xd20 kernel/locking/lockdep.c:5185 lock_acquire+0x120/0x360 kernel/locking/lockdep.c:5866 __mutex_lock_common kernel/locking/mutex.c:601 [inline] __mutex_lock+0x182/0xe80 kernel/locking/mutex.c:746 pcpu_alloc_noprof+0x284/0x16b0 mm/percpu.c:1782 dst_cache_init+0x37/0xc0 net/core/dst_cache.c:145 ip_tun_build_state+0x193/0x6b0 net/ipv4/ip_tunnel_core.c:687 lwtunnel_build_state+0x381/0x4c0 net/core/lwtunnel.c:137 fib_nh_common_init+0x129/0x460 net/ipv4/fib_semantics.c:635 fib6_nh_init+0x15e4/0x2030 net/ipv6/route.c:3669 ip6_route_info_create_nh+0x139/0x870 net/ipv6/route.c:3866 ip6_route_add+0xf6/0x2f0 net/ipv6/route.c:3915 inet6_rtm_newroute+0x284/0x1c50 net/ipv6/route.c:5732 rtnetlink_rcv_msg+0x7cc/0xb70 net/core/rtnetlink.c:6955 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x758/0x8d0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 ____sys_sendmsg+0x505/0x830 net/socket.c:2566 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2620 __sys_sendmsg net/socket.c:2652 [inline] __do_sys_sendmsg net/socket.c:2657 [inline] __se_sys_sendmsg net/socket.c:2655 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2655 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xf6/0x210 arch/x86/entry/syscall_64.c:94 Fixes: 169fd62799e8 ("ipv6: Get rid of RTNL for SIOCADDRT and RTM_NEWROUTE.") Reported-by: syzbot+bcc12d6799364500fbec@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bcc12d6799364500fbec Reported-by: Eric Dumazet Closes: https://lore.kernel.org/netdev/CANn89i+r1cGacVC_6n3-A-WSkAa_Nr+pmxJ7Gt+oP-P9by2aGw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 9 +++++++-- net/ipv6/route.c | 31 ++++++++++++++++++------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 88770ecd2da1..7094d7708686 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1027,8 +1027,9 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, .table = table }; - nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, - &arg); + rcu_read_lock(); + nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg); + rcu_read_unlock(); } else { struct fib6_nh *fib6_nh; @@ -1221,7 +1222,9 @@ next_iter: fib6_nsiblings++; } BUG_ON(fib6_nsiblings != rt->fib6_nsiblings); + rcu_read_lock(); rt6_multipath_rebalance(temp_sibling); + rcu_read_unlock(); } /* @@ -1264,7 +1267,9 @@ add: sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; list_del_rcu(&rt->fib6_siblings); + rcu_read_lock(); rt6_multipath_rebalance(next_sibling); + rcu_read_unlock(); return err; } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6baf177c529b..a87091dd06b1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1820,11 +1820,13 @@ static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg) void rt6_flush_exceptions(struct fib6_info *f6i) { - if (f6i->nh) - nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, - f6i); - else + if (f6i->nh) { + rcu_read_lock(); + nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i); + rcu_read_unlock(); + } else { fib6_nh_flush_exceptions(f6i->fib6_nh, f6i); + } } /* Find cached rt in the hash table inside passed in rt @@ -3841,6 +3843,8 @@ static int ip6_route_info_create_nh(struct fib6_info *rt, if (cfg->fc_nh_id) { struct nexthop *nh; + rcu_read_lock(); + nh = nexthop_find_by_id(net, cfg->fc_nh_id); if (!nh) { err = -EINVAL; @@ -3860,6 +3864,8 @@ static int ip6_route_info_create_nh(struct fib6_info *rt, rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); + + rcu_read_unlock(); } else { int addr_type; @@ -3895,6 +3901,7 @@ out_release: fib6_info_release(rt); return err; out_free: + rcu_read_unlock(); ip_fib_metrics_put(rt->fib6_metrics); kfree(rt); return err; @@ -3910,16 +3917,12 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, if (IS_ERR(rt)) return PTR_ERR(rt); - rcu_read_lock(); - err = ip6_route_info_create_nh(rt, cfg, extack); if (err) - goto unlock; + return err; err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); fib6_info_release(rt); -unlock: - rcu_read_unlock(); return err; } @@ -5534,8 +5537,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, if (err) return err; - rcu_read_lock(); - err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack); if (err) goto cleanup; @@ -5627,8 +5628,6 @@ add_errout: } cleanup: - rcu_read_unlock(); - list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) { fib6_info_release(nh->fib6_info); list_del(&nh->list); @@ -6410,6 +6409,8 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; + rcu_read_lock(); + skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); if (!skb) goto errout; @@ -6422,10 +6423,14 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, kfree_skb(skb); goto errout; } + + rcu_read_unlock(); + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, GFP_ATOMIC); return; errout: + rcu_read_unlock(); rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); } -- 2.51.0 From cefe6e131cc4f032110efe1687295e133f3d5964 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:20 -0700 Subject: [PATCH 13/16] Revert "ipv6: sr: switch to GFP_ATOMIC flag to allocate memory during seg6local LWT setup" The previous patch fixed the same issue mentioned in commit 14a0087e7236 ("ipv6: sr: switch to GFP_ATOMIC flag to allocate memory during seg6local LWT setup"). Let's revert it. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Andrea Mayer Link: https://patch.msgid.link/20250516022759.44392-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_local.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ee5e448cc7a8..ac1dbd492c22 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1671,7 +1671,7 @@ static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt, if (!seg6_validate_srh(srh, len, false)) return -EINVAL; - slwt->srh = kmemdup(srh, len, GFP_ATOMIC); + slwt->srh = kmemdup(srh, len, GFP_KERNEL); if (!slwt->srh) return -ENOMEM; @@ -1911,7 +1911,7 @@ static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt, if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME]) return -EINVAL; - slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_ATOMIC); + slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL); if (!slwt->bpf.name) return -ENOMEM; @@ -1994,7 +1994,7 @@ static int parse_nla_counters(struct nlattr **attrs, return -EINVAL; /* counters are always zero initialized */ - pcounters = seg6_local_alloc_pcpu_counters(GFP_ATOMIC); + pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL); if (!pcounters) return -ENOMEM; -- 2.51.0 From 5e4a8cc7beb8567293e6d4230b14e95167759214 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:21 -0700 Subject: [PATCH 14/16] Revert "ipv6: Factorise ip6_route_multipath_add()." Commit 71c0efb6d12f ("ipv6: Factorise ip6_route_multipath_add().") split a loop in ip6_route_multipath_add() so that we can put rcu_read_lock() between ip6_route_info_create() and ip6_route_info_create_nh(). We no longer need to do so as ip6_route_info_create_nh() does not require RCU now. Let's revert the commit to simplify the code. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 193 +++++++++++++++++------------------------------ 1 file changed, 70 insertions(+), 123 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a87091dd06b1..96ae21da9961 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5335,131 +5335,29 @@ struct rt6_nh { struct fib6_info *fib6_info; struct fib6_config r_cfg; struct list_head list; - int weight; }; -static void ip6_route_mpath_info_cleanup(struct list_head *rt6_nh_list) +static int ip6_route_info_append(struct list_head *rt6_nh_list, + struct fib6_info *rt, + struct fib6_config *r_cfg) { - struct rt6_nh *nh, *nh_next; + struct rt6_nh *nh; - list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) { - struct fib6_info *rt = nh->fib6_info; - - if (rt) { - free_percpu(rt->fib6_nh->nh_common.nhc_pcpu_rth_output); - free_percpu(rt->fib6_nh->rt6i_pcpu); - ip_fib_metrics_put(rt->fib6_metrics); - kfree(rt); - } - - list_del(&nh->list); - kfree(nh); + list_for_each_entry(nh, rt6_nh_list, list) { + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh->fib6_info, rt)) + return -EEXIST; } -} - -static int ip6_route_mpath_info_create(struct list_head *rt6_nh_list, - struct fib6_config *cfg, - struct netlink_ext_ack *extack) -{ - struct rtnexthop *rtnh; - int remaining; - int err; - - remaining = cfg->fc_mp_len; - rtnh = (struct rtnexthop *)cfg->fc_mp; - - /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * fib6_info structs per nexthop - */ - while (rtnh_ok(rtnh, remaining)) { - struct fib6_config r_cfg; - struct fib6_info *rt; - struct rt6_nh *nh; - int attrlen; - - nh = kzalloc(sizeof(*nh), GFP_KERNEL); - if (!nh) { - err = -ENOMEM; - goto err; - } - list_add_tail(&nh->list, rt6_nh_list); - - memcpy(&r_cfg, cfg, sizeof(*cfg)); - if (rtnh->rtnh_ifindex) - r_cfg.fc_ifindex = rtnh->rtnh_ifindex; - - attrlen = rtnh_attrlen(rtnh); - if (attrlen > 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); - - nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla) { - r_cfg.fc_gateway = nla_get_in6_addr(nla); - r_cfg.fc_flags |= RTF_GATEWAY; - } - - r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); - nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); - if (nla) - r_cfg.fc_encap_type = nla_get_u16(nla); - } - - r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - - rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - goto err; - } - - nh->fib6_info = rt; - nh->weight = rtnh->rtnh_hops + 1; - memcpy(&nh->r_cfg, &r_cfg, sizeof(r_cfg)); + nh = kzalloc(sizeof(*nh), GFP_KERNEL); + if (!nh) + return -ENOMEM; - rtnh = rtnh_next(rtnh, &remaining); - } + nh->fib6_info = rt; + memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); + list_add_tail(&nh->list, rt6_nh_list); return 0; -err: - ip6_route_mpath_info_cleanup(rt6_nh_list); - return err; -} - -static int ip6_route_mpath_info_create_nh(struct list_head *rt6_nh_list, - struct netlink_ext_ack *extack) -{ - struct rt6_nh *nh, *nh_next, *nh_tmp; - LIST_HEAD(tmp); - int err; - - list_for_each_entry_safe(nh, nh_next, rt6_nh_list, list) { - struct fib6_info *rt = nh->fib6_info; - - err = ip6_route_info_create_nh(rt, &nh->r_cfg, extack); - if (err) { - nh->fib6_info = NULL; - goto err; - } - - rt->fib6_nh->fib_nh_weight = nh->weight; - - list_move_tail(&nh->list, &tmp); - - list_for_each_entry(nh_tmp, rt6_nh_list, list) { - /* check if fib6_info already exists */ - if (rt6_duplicate_nexthop(nh_tmp->fib6_info, rt)) { - err = -EEXIST; - goto err; - } - } - } -out: - list_splice(&tmp, rt6_nh_list); - return err; -err: - ip6_route_mpath_info_cleanup(rt6_nh_list); - goto out; } static void ip6_route_mpath_notify(struct fib6_info *rt, @@ -5519,11 +5417,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; struct rt6_nh *nh, *nh_safe; + struct fib6_config r_cfg; + struct rtnexthop *rtnh; LIST_HEAD(rt6_nh_list); struct rt6_nh *err_nh; + struct fib6_info *rt; __u16 nlflags; - int nhn = 0; + int remaining; + int attrlen; int replace; + int nhn = 0; int err; replace = (cfg->fc_nlinfo.nlh && @@ -5533,13 +5436,57 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) nlflags |= NLM_F_APPEND; - err = ip6_route_mpath_info_create(&rt6_nh_list, cfg, extack); - if (err) - return err; + remaining = cfg->fc_mp_len; + rtnh = (struct rtnexthop *)cfg->fc_mp; - err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack); - if (err) - goto cleanup; + /* Parse a Multipath Entry and build a list (rt6_nh_list) of + * fib6_info structs per nexthop + */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + r_cfg.fc_gateway = nla_get_in6_addr(nla); + r_cfg.fc_flags |= RTF_GATEWAY; + } + + r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + if (nla) + r_cfg.fc_encap_type = nla_get_u16(nla); + } + + r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; + goto cleanup; + } + + err = ip6_route_info_create_nh(rt, &r_cfg, extack); + if (err) { + rt = NULL; + goto cleanup; + } + + rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1; + + err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + if (err) { + fib6_info_release(rt); + goto cleanup; + } + + rtnh = rtnh_next(rtnh, &remaining); + } /* for add and replace send one notification with all nexthops. * Skip the notification in fib6_add_rt2node and send one with -- 2.51.0 From d465bd07d16e37cd3aa25539ab187b372853808d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:22 -0700 Subject: [PATCH 15/16] ipv6: Pass gfp_flags down to ip6_route_info_create_nh(). Since commit c4837b9853e5 ("ipv6: Split ip6_route_info_create()."), ip6_route_info_create_nh() uses GFP_ATOMIC as it was expected to be called under RCU. Now, we can call it without RCU and use GFP_KERNEL. Let's pass gfp_flags to ip6_route_info_create_nh(). Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96ae21da9961..dda913ebd2d3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3834,6 +3834,7 @@ err: static int ip6_route_info_create_nh(struct fib6_info *rt, struct fib6_config *cfg, + gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; @@ -3869,7 +3870,7 @@ static int ip6_route_info_create_nh(struct fib6_info *rt, } else { int addr_type; - err = fib6_nh_init(net, rt->fib6_nh, cfg, GFP_ATOMIC, extack); + err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); if (err) goto out_release; @@ -3917,7 +3918,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, if (IS_ERR(rt)) return PTR_ERR(rt); - err = ip6_route_info_create_nh(rt, cfg, extack); + err = ip6_route_info_create_nh(rt, cfg, gfp_flags, extack); if (err) return err; @@ -4707,7 +4708,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, if (IS_ERR(f6i)) return f6i; - err = ip6_route_info_create_nh(f6i, &cfg, extack); + err = ip6_route_info_create_nh(f6i, &cfg, gfp_flags, extack); if (err) return ERR_PTR(err); @@ -5471,7 +5472,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, goto cleanup; } - err = ip6_route_info_create_nh(rt, &r_cfg, extack); + err = ip6_route_info_create_nh(rt, &r_cfg, GFP_KERNEL, extack); if (err) { rt = NULL; goto cleanup; -- 2.51.0 From 002dba13c824f1cf86f618f0d23d1f0ad3c93bbb Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 May 2025 19:27:23 -0700 Subject: [PATCH 16/16] ipv6: Revert two per-cpu var allocation for RTM_NEWROUTE. These two commits preallocated two per-cpu variables in ip6_route_info_create() as fib_nh_common_init() and fib6_nh_init() were expected to be called under RCU. * commit d27b9c40dbd6 ("ipv6: Preallocate nhc_pcpu_rth_output in ip6_route_info_create().") * commit 5720a328c3e9 ("ipv6: Preallocate rt->fib6_nh->rt6i_pcpu in ip6_route_info_create().") Now these functions can be called without RCU and can use GFP_KERNEL. Let's revert the commits. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250516022759.44392-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_semantics.c | 10 ++++------ net/ipv6/route.c | 34 +++------------------------------- 2 files changed, 7 insertions(+), 37 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index dabe2b7044ab..d643bd1a0d9d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -617,12 +617,10 @@ int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, { int err; - if (!nhc->nhc_pcpu_rth_output) { - nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, - gfp_flags); - if (!nhc->nhc_pcpu_rth_output) - return -ENOMEM; - } + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; if (encap) { struct lwtunnel_state *lwtstate; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dda913ebd2d3..0143262094b0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3674,12 +3674,10 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, goto out; pcpu_alloc: + fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); if (!fib6_nh->rt6i_pcpu) { - fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); - if (!fib6_nh->rt6i_pcpu) { - err = -ENOMEM; - goto out; - } + err = -ENOMEM; + goto out; } fib6_nh->fib_nh_dev = dev; @@ -3739,24 +3737,6 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) } } -static int fib6_nh_prealloc_percpu(struct fib6_nh *fib6_nh, gfp_t gfp_flags) -{ - struct fib_nh_common *nhc = &fib6_nh->nh_common; - - fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); - if (!fib6_nh->rt6i_pcpu) - return -ENOMEM; - - nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, - gfp_flags); - if (!nhc->nhc_pcpu_rth_output) { - free_percpu(fib6_nh->rt6i_pcpu); - return -ENOMEM; - } - - return 0; -} - static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -3794,12 +3774,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto free; } - if (!cfg->fc_nh_id) { - err = fib6_nh_prealloc_percpu(&rt->fib6_nh[0], gfp_flags); - if (err) - goto free_metrics; - } - if (cfg->fc_flags & RTF_ADDRCONF) rt->dst_nocount = true; @@ -3824,8 +3798,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->fib6_src.plen = cfg->fc_src_len; #endif return rt; -free_metrics: - ip_fib_metrics_put(rt->fib6_metrics); free: kfree(rt); err: -- 2.51.0