From 7213a1c417d2c690de2c5aaa05b9dbec0d68a1b1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 10:47:32 -0700 Subject: [PATCH 01/16] ip6mr: Add __init to ip6_mr_cleanup(). kernel test robot reported a section mismatch in ip6_mr_cleanup(). WARNING: modpost: vmlinux: section mismatch in reference: ip6_mr_cleanup+0x0 (section: .text) -> 0xffffffff (section: .init.rodata) WARNING: modpost: vmlinux: section mismatch in reference: ip6_mr_cleanup+0x14 (section: .text) -> ip6mr_rtnl_msg_handlers (section: .init.rodata) ip6_mr_cleanup() uses ip6mr_rtnl_msg_handlers[] that has __initconst_or_module qualifier. ip6_mr_cleanup() is only called from inet6_init() but does not have __init qualifier. Let's add __init to ip6_mr_cleanup(). Fixes: 3ac84e31b33e ("ipmr: Use rtnl_register_many().") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410180139.B3HeemsC-lkp@intel.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241017174732.39487-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 437a9fdb67f5..8add0f45aa52 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1411,7 +1411,7 @@ reg_pernet_fail: return err; } -void ip6_mr_cleanup(void) +void __init ip6_mr_cleanup(void) { rtnl_unregister_many(ip6mr_rtnl_msg_handlers); #ifdef CONFIG_IPV6_PIMSM_V2 -- 2.51.0 From c972c1c41d9b20fb38b54e77dcee763e27e715a9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 18:41:00 -0700 Subject: [PATCH 02/16] ipv4: Switch inet_addr_hash() to less predictable hash. Recently, commit 4a0ec2aa0704 ("ipv6: switch inet6_addr_hash() to less predictable hash") and commit 4daf4dc275f1 ("ipv6: switch inet6_acaddr_hash() to less predictable hash") hardened IPv6 address hash functions. inet_addr_hash() is also highly predictable, and a malicious use could abuse a specific bucket. Let's follow the change on IPv4 by using jhash_1word(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241018014100.93776-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 5 +++++ net/ipv4/devinet.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/ip.h b/include/net/ip.h index 4be0a6a603b2..0e548c1f2a0e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -690,6 +690,11 @@ static inline unsigned int ipv4_addr_hash(__be32 ip) return (__force unsigned int) ip; } +static inline u32 __ipv4_addr_hash(const __be32 ip, const u32 initval) +{ + return jhash_1word((__force u32)ip, initval); +} + static inline u32 ipv4_portaddr_hash(const struct net *net, __be32 saddr, unsigned int port) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 0ff9c0abfaa0..5f859d01cbbe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -121,7 +121,7 @@ struct inet_fill_args { static u32 inet_addr_hash(const struct net *net, __be32 addr) { - u32 val = (__force u32) addr ^ net_hash_mix(net); + u32 val = __ipv4_addr_hash(addr, net_hash_mix(net)); return hash_32(val, IN4_ADDR_HSIZE_SHIFT); } -- 2.51.0 From e44ef3f66c5472c2cbc6957c684d7279c26b0db1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:21:08 +0000 Subject: [PATCH 03/16] netpoll: remove ndo_netpoll_setup() second argument npinfo is not used in any of the ndo_netpoll_setup() methods. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241018052108.2610827-1-edumazet@google.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/team/team_core.c | 3 +-- include/linux/netdevice.h | 3 +-- net/8021q/vlan_dev.c | 2 +- net/bridge/br_device.c | 2 +- net/core/netpoll.c | 2 +- net/dsa/user.c | 3 +-- 8 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b1bffd8e9a95..3928287f5865 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1476,7 +1476,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev) slave_disable_netpoll(slave); } -static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int bond_netpoll_setup(struct net_device *dev) { struct bonding *bond = netdev_priv(dev); struct list_head *iter; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cf18e66de142..edbd5afcec41 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1117,7 +1117,7 @@ static void macvlan_dev_poll_controller(struct net_device *dev) return; } -static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int macvlan_dev_netpoll_setup(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *real_dev = vlan->lowerdev; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 18191d5a8bd4..a1b27b69f010 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1946,8 +1946,7 @@ static void team_netpoll_cleanup(struct net_device *dev) mutex_unlock(&team->lock); } -static int team_netpoll_setup(struct net_device *dev, - struct netpoll_info *npifo) +static int team_netpoll_setup(struct net_device *dev) { struct team *team = netdev_priv(dev); struct team_port *port; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8feaca12655e..86a0b7eb9461 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1425,8 +1425,7 @@ struct net_device_ops { __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); - int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info); + int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 458040e8a0e0..91d134961357 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -725,7 +725,7 @@ static void vlan_dev_poll_controller(struct net_device *dev) return; } -static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) +static int vlan_dev_netpoll_setup(struct net_device *dev) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 26b79feb385d..0ab4613aa07a 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -328,7 +328,7 @@ int br_netpoll_enable(struct net_bridge_port *p) return __br_netpoll_enable(p); } -static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) +static int br_netpoll_setup(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index aa49b92e9194..94b7f07a952f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -641,7 +641,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) ops = ndev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo); + err = ops->ndo_netpoll_setup(ndev); if (err) goto free_npinfo; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 64f660d2334b..91a1fa5f8ab0 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1308,8 +1308,7 @@ static int dsa_user_set_pauseparam(struct net_device *dev, } #ifdef CONFIG_NET_POLL_CONTROLLER -static int dsa_user_netpoll_setup(struct net_device *dev, - struct netpoll_info *ni) +static int dsa_user_netpoll_setup(struct net_device *dev) { struct net_device *conduit = dsa_user_to_conduit(dev); struct dsa_user_priv *p = netdev_priv(dev); -- 2.51.0 From 7cfc1b1fa8673fe386304194d4cc2c8fe555bbf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Oct 2024 05:23:10 +0000 Subject: [PATCH 04/16] net: netdev_tx_sent_queue() small optimization Change smp_mb() imediately following a set_bit() with smp_mb__after_atomic(). Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241018052310.2612084-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 86a0b7eb9461..bbd30f3c5d29 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3517,7 +3517,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag. */ - smp_mb(); + smp_mb__after_atomic(); /* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0)) -- 2.51.0 From b0b3683419b45e2971b6d413c506cb818b268d35 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 18 Oct 2024 10:06:30 +0100 Subject: [PATCH 05/16] netlink: specs: Add missing bitset attrs to ethtool spec There are a couple of attributes missing from the 'bitset' attribute-set in the ethtool netlink spec. Add them to the spec. Reported-by: Kory Maincent Closes: https://lore.kernel.org/netdev/20241017180551.1259bf5c@kmaincent-XPS-13-7390/ Signed-off-by: Donald Hunter Reviewed-by: Kory Maincent Tested-by: Kory Maincent Link: https://patch.msgid.link/20241018090630.22212-1-donald.hunter@gmail.com Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/ethtool.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 6a050d755b9c..f6c5d8214c7e 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -96,7 +96,12 @@ attribute-sets: name: bits type: nest nested-attributes: bitset-bits - + - + name: value + type: binary + - + name: mask + type: binary - name: string attributes: -- 2.51.0 From 1d2709d6d3902786bfc3e9ede627e7364633cff7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:52:39 +0100 Subject: [PATCH 06/16] net: pcs: xpcs: use generic register definitions As a general policy, we refer our generic register definitions over vendor specific definitions. In XPCS, it appears that the register layout follows a BMCR, BMSR and ADVERTISE register definition. We already refer to this BMCR register using several different macros which is confusing. Convert the following register definitions to generic versions: DW_VR_MII_MMD_CTRL => MII_BMCR MDIO_CTRL1 => MII_BMCR AN_CL37_EN => BMCR_ANENABLE SGMII_SPEED_SS6 => BMCR_SPEED1000 SGMII_SPEED_SS13 => BMCR_SPEED100 MDIO_CTRL1_RESET => BMCR_RESET DW_VR_MII_MMD_STS => MII_BMSR DW_VR_MII_MMD_STS_LINK_STS => BMSR_LSTATUS DW_FULL_DUPLEX => ADVERTISE_1000XFULL iDW_HALF_DUPLEX => ADVERTISE_1000XHALF Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 64 ++++++++++++++++++++------------------ drivers/net/pcs/pcs-xpcs.h | 12 ------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index c69421e80d19..a5e2d93db285 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -223,8 +223,8 @@ static int xpcs_poll_reset(struct dw_xpcs *xpcs, int dev) int ret, val; ret = read_poll_timeout(xpcs_read, val, - val < 0 || !(val & MDIO_CTRL1_RESET), - 50000, 600000, true, xpcs, dev, MDIO_CTRL1); + val < 0 || !(val & BMCR_RESET), + 50000, 600000, true, xpcs, dev, MII_BMCR); if (val < 0) ret = val; @@ -250,7 +250,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs, return -EINVAL; } - ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET); + ret = xpcs_write(xpcs, dev, MII_BMCR, BMCR_RESET); if (ret < 0) return ret; @@ -343,7 +343,7 @@ static void xpcs_config_usxgmii(struct dw_xpcs *xpcs, int speed) if (ret < 0) goto out; - ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, DW_USXGMII_SS_MASK, + ret = xpcs_modify(xpcs, MDIO_MMD_VEND2, MII_BMCR, DW_USXGMII_SS_MASK, speed_sel | DW_USXGMII_FULL); if (ret < 0) goto out; @@ -646,19 +646,21 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, * speed/duplex mode change by HW after SGMII AN complete) * 5) VR_MII_MMD_CTRL Bit(12) [AN_ENABLE] = 1b (Enable SGMII AN) * + * Note that VR_MII_MMD_CTRL is MII_BMCR. + * * Note: Since it is MAC side SGMII, there is no need to set * SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from * PHY about the link state change after C28 AN is completed * between PHY and Link Partner. There is also no need to * trigger AN restart for MAC-side SGMII. */ - mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL); + mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMCR); if (mdio_ctrl < 0) return mdio_ctrl; - if (mdio_ctrl & AN_CL37_EN) { - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, - mdio_ctrl & ~AN_CL37_EN); + if (mdio_ctrl & BMCR_ANENABLE) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, + mdio_ctrl & ~BMCR_ANENABLE); if (ret < 0) return ret; } @@ -696,8 +698,8 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, return ret; if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, - mdio_ctrl | AN_CL37_EN); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, + mdio_ctrl | BMCR_ANENABLE); return ret; } @@ -715,14 +717,16 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, * be disabled first:- * 1) VR_MII_MMD_CTRL Bit(12)[AN_ENABLE] = 0b * 2) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 00b (1000BASE-X C37) + * + * Note that VR_MII_MMD_CTRL is MII_BMCR. */ - mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL); + mdio_ctrl = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMCR); if (mdio_ctrl < 0) return mdio_ctrl; - if (mdio_ctrl & AN_CL37_EN) { - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, - mdio_ctrl & ~AN_CL37_EN); + if (mdio_ctrl & BMCR_ANENABLE) { + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, + mdio_ctrl & ~BMCR_ANENABLE); if (ret < 0) return ret; } @@ -760,8 +764,8 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, return ret; if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, - mdio_ctrl | AN_CL37_EN); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, + mdio_ctrl | BMCR_ANENABLE); if (ret < 0) return ret; } @@ -780,9 +784,9 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs) if (ret < 0) return ret; - return xpcs_modify(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL, - AN_CL37_EN | SGMII_SPEED_SS6 | SGMII_SPEED_SS13, - SGMII_SPEED_SS6); + return xpcs_modify(xpcs, MDIO_MMD_VEND2, MII_BMCR, + BMCR_ANENABLE | BMCR_SPEED1000 | BMCR_SPEED100, + BMCR_SPEED1000); } static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, @@ -972,14 +976,14 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, state->link = true; - speed = xpcs_read(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1); + speed = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMCR); if (speed < 0) return speed; - speed &= SGMII_SPEED_SS13 | SGMII_SPEED_SS6; - if (speed == SGMII_SPEED_SS6) + speed &= BMCR_SPEED100 | BMCR_SPEED1000; + if (speed == BMCR_SPEED1000) state->speed = SPEED_1000; - else if (speed == SGMII_SPEED_SS13) + else if (speed == BMCR_SPEED100) state->speed = SPEED_100; else if (speed == 0) state->speed = SPEED_10; @@ -988,9 +992,9 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, if (duplex < 0) return duplex; - if (duplex & DW_FULL_DUPLEX) + if (duplex & ADVERTISE_1000XFULL) state->duplex = DUPLEX_FULL; - else if (duplex & DW_HALF_DUPLEX) + else if (duplex & ADVERTISE_1000XHALF) state->duplex = DUPLEX_HALF; xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS, 0); @@ -1039,13 +1043,13 @@ static int xpcs_get_state_2500basex(struct dw_xpcs *xpcs, { int ret; - ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_STS); + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_BMSR); if (ret < 0) { state->link = 0; return ret; } - state->link = !!(ret & DW_VR_MII_MMD_STS_LINK_STS); + state->link = !!(ret & BMSR_LSTATUS); if (!state->link) return 0; @@ -1109,7 +1113,7 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int neg_mode, return; val = mii_bmcr_encode_fixed(speed, duplex); - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); if (ret) dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); @@ -1141,7 +1145,7 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, dev_err(&xpcs->mdiodev->dev, "%s: half duplex not supported\n", __func__); - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, val); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); if (ret) dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); @@ -1164,7 +1168,7 @@ static void xpcs_an_restart(struct phylink_pcs *pcs) { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); - xpcs_modify(xpcs, MDIO_MMD_VEND2, MDIO_CTRL1, BMCR_ANRESTART, + xpcs_modify(xpcs, MDIO_MMD_VEND2, MII_BMCR, BMCR_ANRESTART, BMCR_ANRESTART); } diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 9a22eed4404d..adc5a0b3c883 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -54,9 +54,6 @@ /* Clause 37 Defines */ /* VR MII MMD registers offsets */ -#define DW_VR_MII_MMD_CTRL 0x0000 -#define DW_VR_MII_MMD_STS 0x0001 -#define DW_VR_MII_MMD_STS_LINK_STS BIT(2) #define DW_VR_MII_DIG_CTRL1 0x8000 #define DW_VR_MII_AN_CTRL 0x8001 #define DW_VR_MII_AN_INTR_STS 0x8002 @@ -93,15 +90,6 @@ #define DW_VR_MII_C37_ANSGM_SP_1000 0x2 #define DW_VR_MII_C37_ANSGM_SP_LNKSTS BIT(4) -/* SR MII MMD Control defines */ -#define AN_CL37_EN BIT(12) /* Enable Clause 37 auto-nego */ -#define SGMII_SPEED_SS13 BIT(13) /* SGMII speed along with SS6 */ -#define SGMII_SPEED_SS6 BIT(6) /* SGMII speed along with SS13 */ - -/* SR MII MMD AN Advertisement defines */ -#define DW_HALF_DUPLEX BIT(6) -#define DW_FULL_DUPLEX BIT(5) - /* VR MII EEE Control 0 defines */ #define DW_VR_MII_EEE_LTX_EN BIT(0) /* LPI Tx Enable */ #define DW_VR_MII_EEE_LRX_EN BIT(1) /* LPI Rx Enable */ -- 2.51.0 From 8d2aeab4ce782df9d7cd035938f4545af7db260e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:52:44 +0100 Subject: [PATCH 07/16] net: pcs: xpcs: remove switch() in xpcs_link_up_1000basex() Remove an unnecessary switch() statement in xpcs_link_up_1000basex(). The only value this switch statement is interested in is SPEED_1000, all other values lead to an error. Replace this with a simple if() statement. Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index a5e2d93db285..183df8f8c50f 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1127,18 +1127,13 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) return; - switch (speed) { - case SPEED_1000: - val = BMCR_SPEED1000; - break; - case SPEED_100: - case SPEED_10: - default: - dev_err(&xpcs->mdiodev->dev, "%s: speed = %d\n", + if (speed != SPEED_1000) { + dev_err(&xpcs->mdiodev->dev, "%s: speed %dMbps not supported\n", __func__, speed); return; } + val = BMCR_SPEED1000; if (duplex == DUPLEX_FULL) val |= BMCR_FULLDPLX; else -- 2.51.0 From b61a465a761921d11f99492ce41b85cfba7d6161 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:52:49 +0100 Subject: [PATCH 08/16] net: pcs: xpcs: rearrange xpcs_link_up_1000basex() Rearrange xpcs_link_up_1000basex() to make it more obvious what will happen in the following commit. Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 183df8f8c50f..3222b8851bff 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1133,12 +1133,13 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, return; } + if (duplex != DUPLEX_FULL) + dev_err(&xpcs->mdiodev->dev, "%s: half duplex not supported\n", + __func__); + val = BMCR_SPEED1000; if (duplex == DUPLEX_FULL) val |= BMCR_FULLDPLX; - else - dev_err(&xpcs->mdiodev->dev, "%s: half duplex not supported\n", - __func__); ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); if (ret) -- 2.51.0 From 1c17f9d3fe17d296ff2d93740ee96a52a2343628 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:52:54 +0100 Subject: [PATCH 09/16] net: pcs: xpcs: replace open-coded mii_bmcr_encode_fixed() We can now see that we have an open-coded version of mii_bmcr_encode_fixed() when this is called with SPEED_1000: val = BMCR_SPEED1000; if (duplex == DUPLEX_FULL) val |= BMCR_FULLDPLX; Replace this with a call to mii_bmcr_encode_fixed(). Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 3222b8851bff..5b38f9019f83 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1137,10 +1137,7 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, dev_err(&xpcs->mdiodev->dev, "%s: half duplex not supported\n", __func__); - val = BMCR_SPEED1000; - if (duplex == DUPLEX_FULL) - val |= BMCR_FULLDPLX; - + val = mii_bmcr_encode_fixed(speed, duplex); ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); if (ret) dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n", -- 2.51.0 From 4145921c305545cf86d49c0dd665084fb7245225 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:52:59 +0100 Subject: [PATCH 10/16] net: pcs: xpcs: combine xpcs_link_up_{1000basex,sgmii}() xpcs_link_up_sgmii() and xpcs_link_up_1000basex() are almost identical with the exception of checking the speed and duplex for 1000BASE-X. Combine the two functions. Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 54 ++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 5b38f9019f83..6cc658f8366c 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1104,41 +1104,32 @@ static void xpcs_get_state(struct phylink_pcs *pcs, } } -static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int neg_mode, - int speed, int duplex) +static void xpcs_link_up_sgmii_1000basex(struct dw_xpcs *xpcs, + unsigned int neg_mode, + phy_interface_t interface, + int speed, int duplex) { - int val, ret; + int ret; if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) return; - val = mii_bmcr_encode_fixed(speed, duplex); - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); - if (ret) - dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n", - __func__, ERR_PTR(ret)); -} - -static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode, - int speed, int duplex) -{ - int val, ret; - - if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) - return; + if (interface == PHY_INTERFACE_MODE_1000BASEX) { + if (speed != SPEED_1000) { + dev_err(&xpcs->mdiodev->dev, + "%s: speed %dMbps not supported\n", + __func__, speed); + return; + } - if (speed != SPEED_1000) { - dev_err(&xpcs->mdiodev->dev, "%s: speed %dMbps not supported\n", - __func__, speed); - return; + if (duplex != DUPLEX_FULL) + dev_err(&xpcs->mdiodev->dev, + "%s: half duplex not supported\n", + __func__); } - if (duplex != DUPLEX_FULL) - dev_err(&xpcs->mdiodev->dev, "%s: half duplex not supported\n", - __func__); - - val = mii_bmcr_encode_fixed(speed, duplex); - ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, val); + ret = xpcs_write(xpcs, MDIO_MMD_VEND2, MII_BMCR, + mii_bmcr_encode_fixed(speed, duplex)); if (ret) dev_err(&xpcs->mdiodev->dev, "%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret)); @@ -1151,10 +1142,11 @@ static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, if (interface == PHY_INTERFACE_MODE_USXGMII) return xpcs_config_usxgmii(xpcs, speed); - if (interface == PHY_INTERFACE_MODE_SGMII) - return xpcs_link_up_sgmii(xpcs, neg_mode, speed, duplex); - if (interface == PHY_INTERFACE_MODE_1000BASEX) - return xpcs_link_up_1000basex(xpcs, neg_mode, speed, duplex); + + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_1000BASEX) + return xpcs_link_up_sgmii_1000basex(xpcs, neg_mode, interface, + speed, duplex); } static void xpcs_an_restart(struct phylink_pcs *pcs) -- 2.51.0 From 11afdf3b2ecee038dda8a38b6b6e6d232e64a210 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:53:05 +0100 Subject: [PATCH 11/16] net: pcs: xpcs: rename xpcs_config_usxgmii() xpcs_config_usxgmii() is only called from the xpcs_link_up() method, so let's name it similarly to the SGMII and 1000BASEX functions. Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 6cc658f8366c..89ceedc0f18b 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -311,7 +311,7 @@ static int xpcs_read_fault_c73(struct dw_xpcs *xpcs, return 0; } -static void xpcs_config_usxgmii(struct dw_xpcs *xpcs, int speed) +static void xpcs_link_up_usxgmii(struct dw_xpcs *xpcs, int speed) { int ret, speed_sel; @@ -1141,7 +1141,7 @@ static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); if (interface == PHY_INTERFACE_MODE_USXGMII) - return xpcs_config_usxgmii(xpcs, speed); + return xpcs_link_up_usxgmii(xpcs, speed); if (interface == PHY_INTERFACE_MODE_SGMII || interface == PHY_INTERFACE_MODE_1000BASEX) -- 2.51.0 From fd4056db7aee901677a3c62534b2d31b38678cb4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 17 Oct 2024 12:53:10 +0100 Subject: [PATCH 12/16] net: pcs: xpcs: remove return statements in void function While using "return" when calling a void returning function inside a function that returns void doesn't cause a compiler warning, it looks weird. Convert the bunch of if() statements to a switch() and remove these return statements. Signed-off-by: Russell King (Oracle) Tested-by: Serge Semin Signed-off-by: Paolo Abeni --- drivers/net/pcs/pcs-xpcs.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 89ceedc0f18b..7246a910728d 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1140,13 +1140,20 @@ static void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); - if (interface == PHY_INTERFACE_MODE_USXGMII) - return xpcs_link_up_usxgmii(xpcs, speed); + switch (interface) { + case PHY_INTERFACE_MODE_USXGMII: + xpcs_link_up_usxgmii(xpcs, speed); + break; + + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + xpcs_link_up_sgmii_1000basex(xpcs, neg_mode, interface, speed, + duplex); + break; - if (interface == PHY_INTERFACE_MODE_SGMII || - interface == PHY_INTERFACE_MODE_1000BASEX) - return xpcs_link_up_sgmii_1000basex(xpcs, neg_mode, interface, - speed, duplex); + default: + break; + } } static void xpcs_an_restart(struct phylink_pcs *pcs) -- 2.51.0 From 1bf70e6c3a5346966c25e0a1ff492945b25d3f80 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 18 Oct 2024 10:32:28 +0100 Subject: [PATCH 13/16] tools/net/ynl: improve async notification handling The notification handling in ynl is currently very simple, using sleep() to wait a period of time and then handling all the buffered messages in a single batch. This patch changes the notification handling so that messages are processed as they are received. This makes it possible to use ynl as a library that supplies notifications in a timely manner. - Change check_ntf() to be a generator that yields 1 notification at a time and blocks until a notification is available. - Use the --sleep parameter to set an alarm and exit when it fires. This means that the CLI has the same interface, but notifications get printed as they are received: ./tools/net/ynl/cli.py --spec --subscribe [ --sleep ] Here is an example python snippet that shows how to use ynl as a library for receiving notifications: ynl = YnlFamily(f"{dir}/rt_route.yaml") ynl.ntf_subscribe('rtnlgrp-ipv4-route') for event in ynl.check_ntf(): handle(event) Signed-off-by: Donald Hunter Tested-by: Kory Maincent Link: https://patch.msgid.link/20241018093228.25477-1-donald.hunter@gmail.com Signed-off-by: Paolo Abeni --- tools/net/ynl/cli.py | 10 +++++--- tools/net/ynl/lib/ynl.py | 49 ++++++++++++++++++++++++---------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index b8481f401376..9e95016b85b3 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -5,6 +5,7 @@ import argparse import json import pprint import time +import signal from lib import YnlFamily, Netlink, NlError @@ -17,6 +18,8 @@ class YnlEncoder(json.JSONEncoder): return list(obj) return json.JSONEncoder.default(self, obj) +def handle_timeout(sig, frame): + exit(0) def main(): description = """ @@ -81,7 +84,8 @@ def main(): ynl.ntf_subscribe(args.ntf) if args.sleep: - time.sleep(args.sleep) + signal.signal(signal.SIGALRM, handle_timeout) + signal.alarm(args.sleep) if args.list_ops: for op_name, op in ynl.ops.items(): @@ -106,8 +110,8 @@ def main(): exit(1) if args.ntf: - ynl.check_ntf() - output(ynl.async_msg_queue) + for msg in ynl.check_ntf(): + output(msg) if __name__ == "__main__": diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index c22c22bf2cb7..92f85698c50e 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -12,6 +12,8 @@ import sys import yaml import ipaddress import uuid +import queue +import time from .nlspec import SpecFamily @@ -489,7 +491,7 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_GET_STRICT_CHK, 1) self.async_msg_ids = set() - self.async_msg_queue = [] + self.async_msg_queue = queue.Queue() for msg in self.msgs.values(): if msg.is_async: @@ -903,32 +905,39 @@ class YnlFamily(SpecFamily): msg['name'] = op['name'] msg['msg'] = attrs - self.async_msg_queue.append(msg) + self.async_msg_queue.put(msg) - def check_ntf(self): + def check_ntf(self, interval=0.1): while True: try: reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) - except BlockingIOError: - return + nms = NlMsgs(reply) + self._recv_dbg_print(reply, nms) + for nl_msg in nms: + if nl_msg.error: + print("Netlink error in ntf!?", os.strerror(-nl_msg.error)) + print(nl_msg) + continue + if nl_msg.done: + print("Netlink done while checking for ntf!?") + continue - nms = NlMsgs(reply) - self._recv_dbg_print(reply, nms) - for nl_msg in nms: - if nl_msg.error: - print("Netlink error in ntf!?", os.strerror(-nl_msg.error)) - print(nl_msg) - continue - if nl_msg.done: - print("Netlink done while checking for ntf!?") - continue + decoded = self.nlproto.decode(self, nl_msg, None) + if decoded.cmd() not in self.async_msg_ids: + print("Unexpected msg id while checking for ntf", decoded) + continue - decoded = self.nlproto.decode(self, nl_msg, None) - if decoded.cmd() not in self.async_msg_ids: - print("Unexpected msg id done while checking for ntf", decoded) - continue + self.handle_ntf(decoded) + except BlockingIOError: + pass - self.handle_ntf(decoded) + try: + yield self.async_msg_queue.get_nowait() + except queue.Empty: + try: + time.sleep(interval) + except KeyboardInterrupt: + return def operation_do_attributes(self, name): """ -- 2.51.0 From 08a9572be36819b5d9011604edfa5db6c5062a7a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:32 -0700 Subject: [PATCH 14/16] phonet: Pass ifindex to fill_addr(). We will convert addr_doit() and getaddr_dumpit() to RCU, both of which call fill_addr(). The former will call phonet_address_notify() outside of RCU due to GFP_KERNEL, so dev will not be available in fill_addr(). Let's pass ifindex directly to fill_addr(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/phonet/pn_netlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 894e5c72d6bf..3205d2457477 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -19,7 +19,7 @@ /* Device address handling */ -static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, +static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) @@ -31,7 +31,8 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr) nla_total_size(1), GFP_KERNEL); if (skb == NULL) goto errout; - err = fill_addr(skb, dev, addr, 0, 0, event); + + err = fill_addr(skb, dev->ifindex, addr, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); @@ -92,8 +93,8 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 portid, u32 seq, int event) +static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; @@ -107,7 +108,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, ifm->ifa_prefixlen = 0; ifm->ifa_flags = IFA_F_PERMANENT; ifm->ifa_scope = RT_SCOPE_LINK; - ifm->ifa_index = dev->ifindex; + ifm->ifa_index = ifindex; if (nla_put_u8(skb, IFA_LOCAL, addr)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -140,7 +141,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_addr(skb, pnd->netdev, addr << 2, + if (fill_addr(skb, pnd->netdev->ifindex, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; -- 2.51.0 From 68ed5c38b512b734caf3da1f87db4a99fcfe3002 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:33 -0700 Subject: [PATCH 15/16] phonet: Pass net and ifindex to phonet_address_notify(). Currently, phonet_address_notify() fetches netns and ifindex from dev. Once addr_doit() is converted to RCU, phonet_address_notify() will be called outside of RCU due to GFP_KERNEL, and dev will be unavailable there. Let's pass net and ifindex to phonet_address_notify(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 2 +- net/phonet/pn_dev.c | 10 +++++++--- net/phonet/pn_netlink.c | 12 ++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index e9dc8dca5817..6b2102b4ece3 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -38,7 +38,7 @@ int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); int phonet_address_lookup(struct net *net, u8 addr); -void phonet_address_notify(int event, struct net_device *dev, u8 addr); +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr); int phonet_route_add(struct net_device *dev, u8 daddr); int phonet_route_del(struct net_device *dev, u8 daddr); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index cde671d29d5d..2e7d850dc726 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -98,10 +98,13 @@ static void phonet_device_destroy(struct net_device *dev) mutex_unlock(&pndevs->lock); if (pnd) { + struct net *net = dev_net(dev); + u32 ifindex = dev->ifindex; u8 addr; for_each_set_bit(addr, pnd->addrs, 64) - phonet_address_notify(RTM_DELADDR, dev, addr); + phonet_address_notify(net, RTM_DELADDR, ifindex, addr); + kfree(pnd); } } @@ -244,8 +247,9 @@ static int phonet_device_autoconf(struct net_device *dev) ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); if (ret) return ret; - phonet_address_notify(RTM_NEWADDR, dev, - req.ifr_phonet_autoconf.device); + + phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, + req.ifr_phonet_autoconf.device); return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 3205d2457477..23097085ad38 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -22,7 +22,7 @@ static int fill_addr(struct sk_buff *skb, u32 ifindex, u8 addr, u32 portid, u32 seq, int event); -void phonet_address_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(struct net *net, int event, u32 ifindex, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -32,17 +32,17 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr) if (skb == NULL) goto errout; - err = fill_addr(skb, dev->ifindex, addr, 0, 0, event); + err = fill_addr(skb, ifindex, addr, 0, 0, event); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + + rtnl_notify(skb, net, 0, RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); return; errout: - rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); + rtnl_set_sk_err(net, RTNLGRP_PHONET_IFADDR, err); } static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { @@ -89,7 +89,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, else err = phonet_address_del(dev, pnaddr); if (!err) - phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(net, nlh->nlmsg_type, ifm->ifa_index, pnaddr); return err; } -- 2.51.0 From 42f5fe1dc4babad1c49bcc4121983fffccee3cd9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Oct 2024 11:31:34 -0700 Subject: [PATCH 16/16] phonet: Convert phonet_device_list.lock to spinlock_t. addr_doit() calls phonet_address_add() or phonet_address_del() for RTM_NEWADDR or RTM_DELADDR, respectively. Both functions only touch phonet_device_list(dev_net(dev)), which is currently protected by RTNL and its dedicated mutex, phonet_device_list.lock. We will convert addr_doit() to RCU and cannot use mutex inside RCU. Let's convert the mutex to spinlock_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/phonet/pn_dev.h | 3 ++- net/phonet/pn_dev.c | 26 +++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 6b2102b4ece3..ac0331d83a81 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -12,12 +12,13 @@ #include #include +#include struct net; struct phonet_device_list { struct list_head list; - struct mutex lock; + spinlock_t lock; }; struct phonet_device_list *phonet_device_list(struct net *net); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 2e7d850dc726..545279ef5910 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -54,7 +54,7 @@ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); list_add_rcu(&pnd->list, &pndevs->list); return pnd; } @@ -64,7 +64,8 @@ static struct phonet_device *__phonet_get(struct net_device *dev) struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; - BUG_ON(!mutex_is_locked(&pndevs->lock)); + lockdep_assert_held(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; @@ -91,11 +92,13 @@ static void phonet_device_destroy(struct net_device *dev) ASSERT_RTNL(); - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (pnd) list_del_rcu(&pnd->list); - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) { struct net *net = dev_net(dev); @@ -136,7 +139,8 @@ int phonet_address_add(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -145,7 +149,9 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); + return err; } @@ -155,7 +161,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) struct phonet_device *pnd; int err = 0; - mutex_lock(&pndevs->lock); + spin_lock(&pndevs->lock); + pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; @@ -164,7 +171,8 @@ int phonet_address_del(struct net_device *dev, u8 addr) list_del_rcu(&pnd->list); else pnd = NULL; - mutex_unlock(&pndevs->lock); + + spin_unlock(&pndevs->lock); if (pnd) kfree_rcu(pnd, rcu); @@ -313,7 +321,7 @@ static int __net_init phonet_init_net(struct net *net) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); - mutex_init(&pnn->pndevs.lock); + spin_lock_init(&pnn->pndevs.lock); mutex_init(&pnn->routes.lock); return 0; } -- 2.51.0