From 70b6f46a4ed8bd56c85ffff22df91e20e8c85e33 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 17 Dec 2024 20:56:55 +0100 Subject: [PATCH 01/16] netfilter: ipset: Fix for recursive locking warning With CONFIG_PROVE_LOCKING, when creating a set of type bitmap:ip, adding it to a set of type list:set and populating it from iptables SET target triggers a kernel warning: | WARNING: possible recursive locking detected | 6.12.0-rc7-01692-g5e9a28f41134-dirty #594 Not tainted | -------------------------------------------- | ping/4018 is trying to acquire lock: | ffff8881094a6848 (&set->lock){+.-.}-{2:2}, at: ip_set_add+0x28c/0x360 [ip_set] | | but task is already holding lock: | ffff88811034c048 (&set->lock){+.-.}-{2:2}, at: ip_set_add+0x28c/0x360 [ip_set] This is a false alarm: ipset does not allow nested list:set type, so the loop in list_set_kadd() can never encounter the outer set itself. No other set type supports embedded sets, so this is the only case to consider. To avoid the false report, create a distinct lock class for list:set type ipset locks. Fixes: f830837f0eed ("netfilter: ipset: list:set set type support") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index bfae7066936b..db794fe1300e 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -611,6 +611,8 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) return true; } +static struct lock_class_key list_set_lockdep_key; + static int list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) @@ -627,6 +629,7 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; + lockdep_set_class(&set->lock, &list_set_lockdep_key); set->variant = &set_variant; set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem), __alignof__(struct set_elem)); -- 2.51.0 From cff865c700711ecc3824b2dfe181637f3ed23c80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Dec 2024 09:10:34 +0100 Subject: [PATCH 02/16] net: phy: avoid undefined behavior in *_led_polarity_set() gcc runs into undefined behavior at the end of the three led_polarity_set() callback functions if it were called with a zero 'modes' argument and it just ends the function there without returning from it. This gets flagged by 'objtool' as a function that continues on to the next one: drivers/net/phy/aquantia/aquantia_leds.o: warning: objtool: aqr_phy_led_polarity_set+0xf: can't find jump dest instruction at .text+0x5d9 drivers/net/phy/intel-xway.o: warning: objtool: xway_gphy_led_polarity_set() falls through to next function xway_gphy_config_init() drivers/net/phy/mxl-gpy.o: warning: objtool: gpy_led_polarity_set() falls through to next function gpy_led_hw_control_get() There is no point to micro-optimize the behavior here to save a single-digit number of bytes in the kernel, so just change this to a "return -EINVAL" as we do when any unexpected bits are set. Fixes: 1758af47b98c ("net: phy: intel-xway: add support for PHY LEDs") Fixes: 9d55e68b19f2 ("net: phy: aquantia: correctly describe LED polarity override") Fixes: eb89c79c1b8f ("net: phy: mxl-gpy: correctly describe LED polarity") Signed-off-by: Arnd Bergmann Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241217081056.238792-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/aquantia/aquantia_leds.c | 2 +- drivers/net/phy/intel-xway.c | 2 +- drivers/net/phy/mxl-gpy.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/aquantia/aquantia_leds.c b/drivers/net/phy/aquantia/aquantia_leds.c index 00ad2313fed3..951f46104eff 100644 --- a/drivers/net/phy/aquantia/aquantia_leds.c +++ b/drivers/net/phy/aquantia/aquantia_leds.c @@ -156,5 +156,5 @@ int aqr_phy_led_polarity_set(struct phy_device *phydev, int index, unsigned long if (force_active_high || force_active_low) return aqr_phy_led_active_low_set(phydev, index, force_active_low); - unreachable(); + return -EINVAL; } diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c index b672c55a7a4e..e6ed2413e514 100644 --- a/drivers/net/phy/intel-xway.c +++ b/drivers/net/phy/intel-xway.c @@ -529,7 +529,7 @@ static int xway_gphy_led_polarity_set(struct phy_device *phydev, int index, if (force_active_high) return phy_clear_bits(phydev, XWAY_MDIO_LED, XWAY_GPHY_LED_INV(index)); - unreachable(); + return -EINVAL; } static struct phy_driver xway_gphy[] = { diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index db3c1f72b407..a8ccf257c109 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -1014,7 +1014,7 @@ static int gpy_led_polarity_set(struct phy_device *phydev, int index, if (force_active_high) return phy_clear_bits(phydev, PHY_LED, PHY_LED_POLARITY(index)); - unreachable(); + return -EINVAL; } static struct phy_driver gpy_drivers[] = { -- 2.51.0 From 5c964c8a97c12145104f5d2782aa1ffccf3a93dd Mon Sep 17 00:00:00 2001 From: Martin Hou Date: Mon, 16 Dec 2024 11:06:18 +0800 Subject: [PATCH 03/16] net: usb: qmi_wwan: add Quectel RG255C Add support for Quectel RG255C which is based on Qualcomm SDX35 chip. The composition is DM / NMEA / AT / QMI. T: Bus=01 Lev=01 Prnt=01 Port=04 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.01 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0316 Rev= 5.15 S: Manufacturer=Quectel S: Product=RG255C-CN S: SerialNumber=c68192c1 C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Martin Hou Link: https://patch.msgid.link/tencent_17DDD787B48E8A5AB8379ED69E23A0CD9309@qq.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9fe7f704a2f7..e9208a8d2bfa 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1429,6 +1429,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */ + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0316, 3)}, /* Quectel RG255C */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0112, 0)}, /* Fibocom FG132 */ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ -- 2.51.0 From a17975992cc11588767175247ccaae1213a8b582 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Tue, 17 Dec 2024 22:16:51 +0100 Subject: [PATCH 04/16] selftests: openvswitch: fix tcpdump execution Fix the way tcpdump is executed by: - Using the right variable for the namespace. Currently the use of the empty "ns" makes the command fail. - Waiting until it starts to capture to ensure the interesting traffic is caught on slow systems. - Using line-buffered output to ensure logs are available when the test is paused with "-p". Otherwise the last chunk of data might only be written when tcpdump is killed. Fixes: 74cc26f416b9 ("selftests: openvswitch: add interface support") Signed-off-by: Adrian Moreno Acked-by: Eelco Chaudron Link: https://patch.msgid.link/20241217211652.483016-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..960e1ab4dd04 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -171,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } -- 2.51.0 From 16f027cd40eeedd2325f7e720689462ca8d9d13e Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 16 Dec 2024 15:50:59 +0200 Subject: [PATCH 05/16] net: dsa: restore dsa_software_vlan_untag() ability to operate on VLAN-untagged traffic Robert Hodaszi reports that locally terminated traffic towards VLAN-unaware bridge ports is broken with ocelot-8021q. He is describing the same symptoms as for commit 1f9fc48fd302 ("net: dsa: sja1105: fix reception from VLAN-unaware bridges"). For context, the set merged as "VLAN fixes for Ocelot driver": https://lore.kernel.org/netdev/20240815000707.2006121-1-vladimir.oltean@nxp.com/ was developed in a slightly different form earlier this year, in January. Initially, the switch was unconditionally configured to set OCELOT_ES0_TAG when using ocelot-8021q, regardless of port operating mode. This led to the situation where VLAN-unaware bridge ports would always push their PVID - see ocelot_vlan_unaware_pvid() - a negligible value anyway - into RX packets. To strip this in software, we would have needed DSA to know what private VID the switch chose for VLAN-unaware bridge ports, and pushed into the packets. This was implemented downstream, and a remnant of it remains in the form of a comment mentioning ds->ops->get_private_vid(), as something which would maybe need to be considered in the future. However, for upstream, it was deemed inappropriate, because it would mean introducing yet another behavior for stripping VLAN tags from VLAN-unaware bridge ports, when one already existed (ds->untag_bridge_pvid). The latter has been marked as obsolete along with an explanation why it is logically broken, but still, it would have been confusing. So, for upstream, felix_update_tag_8021q_rx_rule() was developed, which essentially changed the state of affairs from "Felix with ocelot-8021q delivers all packets as VLAN-tagged towards the CPU" into "Felix with ocelot-8021q delivers all packets from VLAN-aware bridge ports towards the CPU". This was done on the premise that in VLAN-unaware mode, there's nothing useful in the VLAN tags, and we can avoid introducing ds->ops->get_private_vid() in the DSA receive path if we configure the switch to not push those VLAN tags into packets in the first place. Unfortunately, and this is when the trainwreck started, the selftests developed initially and posted with the series were not re-ran. dsa_software_vlan_untag() was initially written given the assumption that users of this feature would send _all_ traffic as VLAN-tagged. It was only partially adapted to the new scheme, by removing ds->ops->get_private_vid(), which also used to be necessary in standalone ports mode. Where the trainwreck became even worse is that I had a second opportunity to think about this, when the dsa_software_vlan_untag() logic change initially broke sja1105, in commit 1f9fc48fd302 ("net: dsa: sja1105: fix reception from VLAN-unaware bridges"). I did not connect the dots that it also breaks ocelot-8021q, for pretty much the same reason that not all received packets will be VLAN-tagged. To be compatible with the optimized Felix control path which runs felix_update_tag_8021q_rx_rule() to only push VLAN tags when useful (in VLAN-aware mode), we need to restore the old dsa_software_vlan_untag() logic. The blamed commit introduced the assumption that dsa_software_vlan_untag() will see only VLAN-tagged packets, assumption which is false. What corrupts RX traffic is the fact that we call skb_vlan_untag() on packets which are not VLAN-tagged in the first place. Fixes: 93e4649efa96 ("net: dsa: provide a software untagging function on RX for VLAN-aware bridges") Reported-by: Robert Hodaszi Closes: https://lore.kernel.org/netdev/20241215163334.615427-1-robert.hodaszi@digi.com/ Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20241216135059.1258266-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/dsa/tag.h b/net/dsa/tag.h index d5707870906b..5d80ddad4ff6 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -138,9 +138,10 @@ static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path * @skb: Pointer to socket buffer (packet) * - * Receive path method for switches which cannot avoid tagging all packets - * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or - * ds->untag_vlan_aware_bridge_pvid is set to true. + * Receive path method for switches which send some packets as VLAN-tagged + * towards the CPU port (generally from VLAN-aware bridge ports) even when the + * packet was not tagged on the wire. Called when ds->untag_bridge_pvid + * (legacy) or ds->untag_vlan_aware_bridge_pvid is set to true. * * As a side effect of this method, any VLAN tag from the skb head is moved * to hwaccel. @@ -149,14 +150,19 @@ static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) { struct dsa_port *dp = dsa_user_to_port(skb->dev); struct net_device *br = dsa_port_bridge_dev_get(dp); - u16 vid; + u16 vid, proto; + int err; /* software untagging for standalone ports not yet necessary */ if (!br) return skb; + err = br_vlan_get_proto(br, &proto); + if (err) + return skb; + /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb)) { + if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { skb = skb_vlan_untag(skb); if (!skb) return NULL; -- 2.51.0 From 5eb70dbebf32c2fd1f2814c654ae17fc47d6e859 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Dec 2024 18:25:08 -0800 Subject: [PATCH 06/16] netdev-genl: avoid empty messages in queue dump Empty netlink responses from do() are not correct (as opposed to dump() where not dumping anything is perfectly fine). We should return an error if the target object does not exist, in this case if the netdev is down it has no queues. Fixes: 6b6171db7fc8 ("netdev-genl: Add netlink framework functions for queue") Reported-by: syzbot+0a884bc2d304ce4af70f@syzkaller.appspotmail.com Reviewed-by: Eric Dumazet Reviewed-by: Joe Damato Link: https://patch.msgid.link/20241218022508.815344-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 1be8c7c21d19..2d3ae0cd3ad2 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -430,10 +430,10 @@ static int netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - int err = 0; + int err; if (!(netdev->flags & IFF_UP)) - return err; + return -ENOENT; err = netdev_nl_queue_validate(netdev, q_idx, q_type); if (err) -- 2.51.0 From 5eecd85c77a254a43bde3212da8047b001745c9f Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Tue, 17 Dec 2024 12:37:39 +0100 Subject: [PATCH 07/16] psample: adjust size if rate_as_probability is set If PSAMPLE_ATTR_SAMPLE_PROBABILITY flag is to be sent, the available size for the packet data has to be adjusted accordingly. Also, check the error code returned by nla_put_flag. Fixes: 7b1b2b60c63f ("net: psample: allow using rate as probability") Signed-off-by: Adrian Moreno Reviewed-by: Aaron Conole Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20241217113739.3929300-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- net/psample/psample.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/psample/psample.c b/net/psample/psample.c index a0ddae8a65f9..25f92ba0840c 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -393,7 +393,9 @@ void psample_sample_packet(struct psample_group *group, nla_total_size_64bit(sizeof(u64)) + /* timestamp */ nla_total_size(sizeof(u16)) + /* protocol */ (md->user_cookie_len ? - nla_total_size(md->user_cookie_len) : 0); /* user cookie */ + nla_total_size(md->user_cookie_len) : 0) + /* user cookie */ + (md->rate_as_probability ? + nla_total_size(0) : 0); /* rate as probability */ #ifdef CONFIG_INET tun_info = skb_tunnel_info(skb); @@ -498,8 +500,9 @@ void psample_sample_packet(struct psample_group *group, md->user_cookie)) goto error; - if (md->rate_as_probability) - nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + if (md->rate_as_probability && + nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY)) + goto error; genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, -- 2.51.0 From 51df947678360faf1967fe0bd1a40c681f634104 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 16 Dec 2024 21:23:24 -0800 Subject: [PATCH 08/16] octeontx2-pf: fix netdev memory leak in rvu_rep_create() When rvu_rep_devlink_port_register() fails, free_netdev(ndev) for this incomplete iteration before going to "exit:" label. Fixes: 9ed0343f561e ("octeontx2-pf: Add devlink port support") Reviewed-by: Przemek Kitszel Signed-off-by: Harshit Mogalapalli Link: https://patch.msgid.link/20241217052326.1086191-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 232b10740c13..9e3fcbae5dee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -680,8 +680,10 @@ int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack) ndev->features |= ndev->hw_features; eth_hw_addr_random(ndev); err = rvu_rep_devlink_port_register(rep); - if (err) + if (err) { + free_netdev(ndev); goto exit; + } SET_NETDEV_DEVLINK_PORT(ndev, &rep->dl_port); err = register_netdev(ndev); -- 2.51.0 From b95c8c33ae687fcd3007cefa93907a6bd270119b Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 16 Dec 2024 21:23:25 -0800 Subject: [PATCH 09/16] octeontx2-pf: fix error handling of devlink port in rvu_rep_create() Unregister the devlink port when register_netdev() fails. Fixes: 9ed0343f561e ("octeontx2-pf: Add devlink port support") Reviewed-by: Przemek Kitszel Signed-off-by: Harshit Mogalapalli Link: https://patch.msgid.link/20241217052326.1086191-2-harshit.m.mogalapalli@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 9e3fcbae5dee..04e08e06f30f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -690,6 +690,7 @@ int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack) if (err) { NL_SET_ERR_MSG_MOD(extack, "PFVF representor registration failed"); + rvu_rep_devlink_port_unregister(rep); free_netdev(ndev); goto exit; } -- 2.51.0 From 572af9f284669d31d9175122bbef9bc62cea8ded Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Wed, 18 Dec 2024 12:51:06 +0900 Subject: [PATCH 10/16] net: mdiobus: fix an OF node reference leak fwnode_find_mii_timestamper() calls of_parse_phandle_with_fixed_args() but does not decrement the refcount of the obtained OF node. Add an of_node_put() call before returning from the function. This bug was detected by an experimental static analysis tool that I am developing. Fixes: bc1bee3b87ee ("net: mdiobus: Introduce fwnode_mdiobus_register_phy()") Signed-off-by: Joe Hattori Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218035106.1436405-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Paolo Abeni --- drivers/net/mdio/fwnode_mdio.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index b156493d7084..aea0f0357568 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -40,6 +40,7 @@ fwnode_find_pse_control(struct fwnode_handle *fwnode) static struct mii_timestamper * fwnode_find_mii_timestamper(struct fwnode_handle *fwnode) { + struct mii_timestamper *mii_ts; struct of_phandle_args arg; int err; @@ -53,10 +54,16 @@ fwnode_find_mii_timestamper(struct fwnode_handle *fwnode) else if (err) return ERR_PTR(err); - if (arg.args_count != 1) - return ERR_PTR(-EINVAL); + if (arg.args_count != 1) { + mii_ts = ERR_PTR(-EINVAL); + goto put_node; + } + + mii_ts = register_mii_timestamper(arg.np, arg.args[0]); - return register_mii_timestamper(arg.np, arg.args[0]); +put_node: + of_node_put(arg.np); + return mii_ts; } int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, -- 2.51.0 From ce1219c3f76bb131d095e90521506d3c6ccfa086 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 18 Dec 2024 11:53:01 +0800 Subject: [PATCH 11/16] net: mctp: handle skb cleanup on sock_queue failures Currently, we don't use the return value from sock_queue_rcv_skb, which means we may leak skbs if a message is not successfully queued to a socket. Instead, ensure that we're freeing the skb where the sock hasn't otherwise taken ownership of the skb by adding checks on the sock_queue_rcv_skb() to invoke a kfree on failure. In doing so, rather than using the 'rc' value to trigger the kfree_skb(), use the skb pointer itself, which is more explicit. Also, add a kunit test for the sock delivery failure cases. Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Kerr Link: https://patch.msgid.link/20241218-mctp-next-v2-1-1c1729645eaa@codeconstruct.com.au Signed-off-by: Paolo Abeni --- net/mctp/route.c | 36 +++++++++++----- net/mctp/test/route-test.c | 86 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 10 deletions(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 597e9cf5aa64..3f2bd65ff5e3 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -374,8 +374,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) msk = NULL; rc = -EINVAL; - /* we may be receiving a locally-routed packet; drop source sk - * accounting + /* We may be receiving a locally-routed packet; drop source sk + * accounting. + * + * From here, we will either queue the skb - either to a frag_queue, or + * to a receiving socket. When that succeeds, we clear the skb pointer; + * a non-NULL skb on exit will be otherwise unowned, and hence + * kfree_skb()-ed. */ skb_orphan(skb); @@ -434,7 +439,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * pending key. */ if (flags & MCTP_HDR_FLAG_EOM) { - sock_queue_rcv_skb(&msk->sk, skb); + rc = sock_queue_rcv_skb(&msk->sk, skb); + if (!rc) + skb = NULL; if (key) { /* we've hit a pending reassembly; not much we * can do but drop it @@ -443,7 +450,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) MCTP_TRACE_KEY_REPLIED); key = NULL; } - rc = 0; goto out_unlock; } @@ -470,8 +476,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * this function. */ rc = mctp_key_add(key, msk); - if (!rc) + if (!rc) { trace_mctp_key_acquire(key); + skb = NULL; + } /* we don't need to release key->lock on exit, so * clean up here and suppress the unlock via @@ -489,6 +497,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) key = NULL; } else { rc = mctp_frag_queue(key, skb); + if (!rc) + skb = NULL; } } @@ -503,12 +513,19 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) else rc = mctp_frag_queue(key, skb); + if (rc) + goto out_unlock; + + /* we've queued; the queue owns the skb now */ + skb = NULL; + /* end of message? deliver to socket, and we're done with * the reassembly/response key */ - if (!rc && flags & MCTP_HDR_FLAG_EOM) { - sock_queue_rcv_skb(key->sk, key->reasm_head); - key->reasm_head = NULL; + if (flags & MCTP_HDR_FLAG_EOM) { + rc = sock_queue_rcv_skb(key->sk, key->reasm_head); + if (!rc) + key->reasm_head = NULL; __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); key = NULL; } @@ -527,8 +544,7 @@ out_unlock: if (any_key) mctp_key_unref(any_key); out: - if (rc) - kfree_skb(skb); + kfree_skb(skb); return rc; } diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 8551dab1d1e6..17165b86ce22 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -837,6 +837,90 @@ static void mctp_test_route_input_multiple_nets_key(struct kunit *test) mctp_test_route_input_multiple_nets_key_fini(test, &t2); } +/* Input route to socket, using a single-packet message, where sock delivery + * fails. Ensure we're handling the failure appropriately. + */ +static void mctp_test_route_input_sk_fail_single(struct kunit *test) +{ + const struct mctp_hdr hdr = RX_HDR(1, 10, 8, FL_S | FL_E | FL_TO); + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + struct sk_buff *skb; + int rc; + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + /* No rcvbuf space, so delivery should fail. __sock_set_rcvbuf will + * clamp the minimum to SOCK_MIN_RCVBUF, so we open-code this. + */ + lock_sock(sock->sk); + WRITE_ONCE(sock->sk->sk_rcvbuf, 0); + release_sock(sock->sk); + + skb = mctp_test_create_skb(&hdr, 10); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + skb_get(skb); + + mctp_test_skb_set_dev(skb, dev); + + /* do route input, which should fail */ + rc = mctp_route_input(&rt->rt, skb); + KUNIT_EXPECT_NE(test, rc, 0); + + /* we should hold the only reference to skb */ + KUNIT_EXPECT_EQ(test, refcount_read(&skb->users), 1); + kfree_skb(skb); + + __mctp_route_test_fini(test, dev, rt, sock); +} + +/* Input route to socket, using a fragmented message, where sock delivery fails. + */ +static void mctp_test_route_input_sk_fail_frag(struct kunit *test) +{ + const struct mctp_hdr hdrs[2] = { RX_FRAG(FL_S, 0), RX_FRAG(FL_E, 1) }; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct sk_buff *skbs[2]; + struct socket *sock; + unsigned int i; + int rc; + + __mctp_route_test_init(test, &dev, &rt, &sock, MCTP_NET_ANY); + + lock_sock(sock->sk); + WRITE_ONCE(sock->sk->sk_rcvbuf, 0); + release_sock(sock->sk); + + for (i = 0; i < ARRAY_SIZE(skbs); i++) { + skbs[i] = mctp_test_create_skb(&hdrs[i], 10); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skbs[i]); + skb_get(skbs[i]); + + mctp_test_skb_set_dev(skbs[i], dev); + } + + /* first route input should succeed, we're only queueing to the + * frag list + */ + rc = mctp_route_input(&rt->rt, skbs[0]); + KUNIT_EXPECT_EQ(test, rc, 0); + + /* final route input should fail to deliver to the socket */ + rc = mctp_route_input(&rt->rt, skbs[1]); + KUNIT_EXPECT_NE(test, rc, 0); + + /* we should hold the only reference to both skbs */ + KUNIT_EXPECT_EQ(test, refcount_read(&skbs[0]->users), 1); + kfree_skb(skbs[0]); + + KUNIT_EXPECT_EQ(test, refcount_read(&skbs[1]->users), 1); + kfree_skb(skbs[1]); + + __mctp_route_test_fini(test, dev, rt, sock); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -1053,6 +1137,8 @@ static struct kunit_case mctp_test_cases[] = { mctp_route_input_sk_reasm_gen_params), KUNIT_CASE_PARAM(mctp_test_route_input_sk_keys, mctp_route_input_sk_keys_gen_params), + KUNIT_CASE(mctp_test_route_input_sk_fail_single), + KUNIT_CASE(mctp_test_route_input_sk_fail_frag), KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), KUNIT_CASE(mctp_test_route_input_multiple_nets_key), KUNIT_CASE(mctp_test_packet_flow), -- 2.51.0 From 25c6a5ab151fb9c886552bf5aa7cbf2a5c6e96af Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Tue, 17 Dec 2024 14:35:00 +0800 Subject: [PATCH 12/16] net: phy: micrel: Dynamically control external clock of KSZ PHY On the i.MX6ULL-14x14-EVK board, enet1_ref and enet2_ref are used as the clock sources for two external KSZ PHYs. However, after closing the two FEC ports, the clk_enable_count of the enet1_ref and enet2_ref clocks is not 0. The root cause is that since the commit 985329462723 ("net: phy: micrel: use devm_clk_get_optional_enabled for the rmii-ref clock"), the external clock of KSZ PHY has been enabled when the PHY driver probes, and it can only be disabled when the PHY driver is removed. This causes the clock to continue working when the system is suspended or the network port is down. Although Heiko explained in the commit message that the patch was because some clock suppliers need to enable the clock to get the valid clock rate , it seems that the simple fix is to disable the clock after getting the clock rate to solve the current problem. This is indeed true, but we need to admit that Heiko's patch has been applied for more than a year, and we cannot guarantee whether there are platforms that only enable rmii-ref in the KSZ PHY driver during this period. If this is the case, disabling rmii-ref will cause RMII on these platforms to not work. Secondly, commit 99ac4cbcc2a5 ("net: phy: micrel: allow usage of generic ethernet-phy clock") just simply enables the generic clock permanently, which seems like the generic clock may only be enabled in the PHY driver. If we simply disable the generic clock, RMII may not work. If we keep it as it is, the platform using the generic clock will have the same problem as the i.MX6ULL platform. To solve this problem, the clock is enabled when phy_driver::resume() is called, and the clock is disabled when phy_driver::suspend() is called. Since phy_driver::resume() and phy_driver::suspend() are not called in pairs, an additional clk_enable flag is added. When phy_driver::suspend() is called, the clock is disabled only if clk_enable is true. Conversely, when phy_driver::resume() is called, the clock is enabled if clk_enable is false. The changes that introduced the problem were only a few lines, while the current fix is about a hundred lines, which seems out of proportion, but it is necessary because kszphy_probe() is used by multiple KSZ PHYs and we need to fix all of them. Fixes: 985329462723 ("net: phy: micrel: use devm_clk_get_optional_enabled for the rmii-ref clock") Fixes: 99ac4cbcc2a5 ("net: phy: micrel: allow usage of generic ethernet-phy clock") Signed-off-by: Wei Fang Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20241217063500.1424011-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 114 ++++++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 3ef508840674..eeb33eb181ac 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -432,10 +432,12 @@ struct kszphy_ptp_priv { struct kszphy_priv { struct kszphy_ptp_priv ptp_priv; const struct kszphy_type *type; + struct clk *clk; int led_mode; u16 vct_ctrl1000; bool rmii_ref_clk_sel; bool rmii_ref_clk_sel_val; + bool clk_enable; u64 stats[ARRAY_SIZE(kszphy_hw_stats)]; }; @@ -2050,6 +2052,46 @@ static void kszphy_get_stats(struct phy_device *phydev, data[i] = kszphy_get_stat(phydev, i); } +static void kszphy_enable_clk(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + + if (!priv->clk_enable && priv->clk) { + clk_prepare_enable(priv->clk); + priv->clk_enable = true; + } +} + +static void kszphy_disable_clk(struct phy_device *phydev) +{ + struct kszphy_priv *priv = phydev->priv; + + if (priv->clk_enable && priv->clk) { + clk_disable_unprepare(priv->clk); + priv->clk_enable = false; + } +} + +static int kszphy_generic_resume(struct phy_device *phydev) +{ + kszphy_enable_clk(phydev); + + return genphy_resume(phydev); +} + +static int kszphy_generic_suspend(struct phy_device *phydev) +{ + int ret; + + ret = genphy_suspend(phydev); + if (ret) + return ret; + + kszphy_disable_clk(phydev); + + return 0; +} + static int kszphy_suspend(struct phy_device *phydev) { /* Disable PHY Interrupts */ @@ -2059,7 +2101,7 @@ static int kszphy_suspend(struct phy_device *phydev) phydev->drv->config_intr(phydev); } - return genphy_suspend(phydev); + return kszphy_generic_suspend(phydev); } static void kszphy_parse_led_mode(struct phy_device *phydev) @@ -2090,7 +2132,9 @@ static int kszphy_resume(struct phy_device *phydev) { int ret; - genphy_resume(phydev); + ret = kszphy_generic_resume(phydev); + if (ret) + return ret; /* After switching from power-down to normal mode, an internal global * reset is automatically generated. Wait a minimum of 1 ms before @@ -2112,6 +2156,24 @@ static int kszphy_resume(struct phy_device *phydev) return 0; } +/* Because of errata DS80000700A, receiver error following software + * power down. Suspend and resume callbacks only disable and enable + * external rmii reference clock. + */ +static int ksz8041_resume(struct phy_device *phydev) +{ + kszphy_enable_clk(phydev); + + return 0; +} + +static int ksz8041_suspend(struct phy_device *phydev) +{ + kszphy_disable_clk(phydev); + + return 0; +} + static int ksz9477_resume(struct phy_device *phydev) { int ret; @@ -2159,7 +2221,10 @@ static int ksz8061_resume(struct phy_device *phydev) if (!(ret & BMCR_PDOWN)) return 0; - genphy_resume(phydev); + ret = kszphy_generic_resume(phydev); + if (ret) + return ret; + usleep_range(1000, 2000); /* Re-program the value after chip is reset. */ @@ -2177,6 +2242,11 @@ static int ksz8061_resume(struct phy_device *phydev) return 0; } +static int ksz8061_suspend(struct phy_device *phydev) +{ + return kszphy_suspend(phydev); +} + static int kszphy_probe(struct phy_device *phydev) { const struct kszphy_type *type = phydev->drv->driver_data; @@ -2217,10 +2287,14 @@ static int kszphy_probe(struct phy_device *phydev) } else if (!clk) { /* unnamed clock from the generic ethernet-phy binding */ clk = devm_clk_get_optional_enabled(&phydev->mdio.dev, NULL); - if (IS_ERR(clk)) - return PTR_ERR(clk); } + if (IS_ERR(clk)) + return PTR_ERR(clk); + + clk_disable_unprepare(clk); + priv->clk = clk; + if (ksz8041_fiber_mode(phydev)) phydev->port = PORT_FIBRE; @@ -5290,6 +5364,21 @@ static int lan8841_probe(struct phy_device *phydev) return 0; } +static int lan8804_resume(struct phy_device *phydev) +{ + return kszphy_resume(phydev); +} + +static int lan8804_suspend(struct phy_device *phydev) +{ + return kszphy_generic_suspend(phydev); +} + +static int lan8841_resume(struct phy_device *phydev) +{ + return kszphy_generic_resume(phydev); +} + static int lan8841_suspend(struct phy_device *phydev) { struct kszphy_priv *priv = phydev->priv; @@ -5298,7 +5387,7 @@ static int lan8841_suspend(struct phy_device *phydev) if (ptp_priv->ptp_clock) ptp_cancel_worker_sync(ptp_priv->ptp_clock); - return genphy_suspend(phydev); + return kszphy_generic_suspend(phydev); } static struct phy_driver ksphy_driver[] = { @@ -5358,9 +5447,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - /* No suspend/resume callbacks because of errata DS80000700A, - * receiver error following software power down. - */ + .suspend = ksz8041_suspend, + .resume = ksz8041_resume, }, { .phy_id = PHY_ID_KSZ8041RNLI, .phy_id_mask = MICREL_PHY_ID_MASK, @@ -5436,7 +5524,7 @@ static struct phy_driver ksphy_driver[] = { .soft_reset = genphy_soft_reset, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, - .suspend = kszphy_suspend, + .suspend = ksz8061_suspend, .resume = ksz8061_resume, }, { .phy_id = PHY_ID_KSZ9021, @@ -5507,8 +5595,8 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = kszphy_resume, + .suspend = lan8804_suspend, + .resume = lan8804_resume, .config_intr = lan8804_config_intr, .handle_interrupt = lan8804_handle_interrupt, }, { @@ -5526,7 +5614,7 @@ static struct phy_driver ksphy_driver[] = { .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, .suspend = lan8841_suspend, - .resume = genphy_resume, + .resume = lan8841_resume, .cable_test_start = lan8814_cable_test_start, .cable_test_get_status = ksz886x_cable_test_get_status, }, { -- 2.51.0 From 262bfba8ab820641c8cfbbf03b86d6c00242c078 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:02:23 -0800 Subject: [PATCH 13/16] net: dsa: microchip: Fix KSZ9477 set_ageing_time function The aging count is not a simple 11-bit value but comprises a 3-bit multiplier and an 8-bit second count. The code tries to use the original multiplier which is 4 as the second count is still 300 seconds by default. Fixes: 2c119d9982b1 ("net: dsa: microchip: add the support for set_ageing_time") Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218020224.70590-2-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 47 +++++++++++++++++++------ drivers/net/dsa/microchip/ksz9477_reg.h | 4 +-- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d16817e0476f..29fe79ea74cd 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 switch driver main logic * - * Copyright (C) 2017-2019 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #include @@ -983,26 +983,51 @@ void ksz9477_get_caps(struct ksz_device *dev, int port, int ksz9477_set_ageing_time(struct ksz_device *dev, unsigned int msecs) { u32 secs = msecs / 1000; - u8 value; - u8 data; + u8 data, mult, value; + u32 max_val; int ret; - value = FIELD_GET(SW_AGE_PERIOD_7_0_M, secs); +#define MAX_TIMER_VAL ((1 << 8) - 1) - ret = ksz_write8(dev, REG_SW_LUE_CTRL_3, value); - if (ret < 0) - return ret; + /* The aging timer comprises a 3-bit multiplier and an 8-bit second + * value. Either of them cannot be zero. The maximum timer is then + * 7 * 255 = 1785 seconds. + */ + if (!secs) + secs = 1; - data = FIELD_GET(SW_AGE_PERIOD_10_8_M, secs); + /* Return error if too large. */ + else if (secs > 7 * MAX_TIMER_VAL) + return -EINVAL; ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &value); if (ret < 0) return ret; - value &= ~SW_AGE_CNT_M; - value |= FIELD_PREP(SW_AGE_CNT_M, data); + /* Check whether there is need to update the multiplier. */ + mult = FIELD_GET(SW_AGE_CNT_M, value); + max_val = MAX_TIMER_VAL; + if (mult > 0) { + /* Try to use the same multiplier already in the register as + * the hardware default uses multiplier 4 and 75 seconds for + * 300 seconds. + */ + max_val = DIV_ROUND_UP(secs, mult); + if (max_val > MAX_TIMER_VAL || max_val * mult != secs) + max_val = MAX_TIMER_VAL; + } + + data = DIV_ROUND_UP(secs, max_val); + if (mult != data) { + value &= ~SW_AGE_CNT_M; + value |= FIELD_PREP(SW_AGE_CNT_M, data); + ret = ksz_write8(dev, REG_SW_LUE_CTRL_0, value); + if (ret < 0) + return ret; + } - return ksz_write8(dev, REG_SW_LUE_CTRL_0, value); + value = DIV_ROUND_UP(secs, data); + return ksz_write8(dev, REG_SW_LUE_CTRL_3, value); } void ksz9477_port_queue_split(struct ksz_device *dev, int port) diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 04235c22bf40..ff579920078e 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2018 Microchip Technology Inc. + * Copyright (C) 2017-2024 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -165,8 +165,6 @@ #define SW_VLAN_ENABLE BIT(7) #define SW_DROP_INVALID_VID BIT(6) #define SW_AGE_CNT_M GENMASK(5, 3) -#define SW_AGE_CNT_S 3 -#define SW_AGE_PERIOD_10_8_M GENMASK(10, 8) #define SW_RESV_MCAST_ENABLE BIT(2) #define SW_HASH_OPTION_M 0x03 #define SW_HASH_OPTION_CRC 1 -- 2.51.0 From bb9869043438af5b94230f94fb4c39206525d758 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:02:24 -0800 Subject: [PATCH 14/16] net: dsa: microchip: Fix LAN937X set_ageing_time function The aging count is not a simple 20-bit value but comprises a 3-bit multiplier and a 20-bit second time. The code tries to use the original multiplier which is 4 as the second count is still 300 seconds by default. As the 20-bit number is now too large for practical use there is an option to interpret it as microseconds instead of seconds. Fixes: 2c119d9982b1 ("net: dsa: microchip: add the support for set_ageing_time") Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218020224.70590-3-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/lan937x_main.c | 62 ++++++++++++++++++++++-- drivers/net/dsa/microchip/lan937x_reg.h | 9 ++-- 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c index b7652efd632e..b1ae3b9de3d1 100644 --- a/drivers/net/dsa/microchip/lan937x_main.c +++ b/drivers/net/dsa/microchip/lan937x_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Microchip LAN937X switch driver main logic - * Copyright (C) 2019-2022 Microchip Technology Inc. + * Copyright (C) 2019-2024 Microchip Technology Inc. */ #include #include @@ -461,10 +461,66 @@ int lan937x_change_mtu(struct ksz_device *dev, int port, int new_mtu) int lan937x_set_ageing_time(struct ksz_device *dev, unsigned int msecs) { - u32 secs = msecs / 1000; - u32 value; + u8 data, mult, value8; + bool in_msec = false; + u32 max_val, value; + u32 secs = msecs; int ret; +#define MAX_TIMER_VAL ((1 << 20) - 1) + + /* The aging timer comprises a 3-bit multiplier and a 20-bit second + * value. Either of them cannot be zero. The maximum timer is then + * 7 * 1048575 = 7340025 seconds. As this value is too large for + * practical use it can be interpreted as microseconds, making the + * maximum timer 7340 seconds with finer control. This allows for + * maximum 122 minutes compared to 29 minutes in KSZ9477 switch. + */ + if (msecs % 1000) + in_msec = true; + else + secs /= 1000; + if (!secs) + secs = 1; + + /* Return error if too large. */ + else if (secs > 7 * MAX_TIMER_VAL) + return -EINVAL; + + /* Configure how to interpret the number value. */ + ret = ksz_rmw8(dev, REG_SW_LUE_CTRL_2, SW_AGE_CNT_IN_MICROSEC, + in_msec ? SW_AGE_CNT_IN_MICROSEC : 0); + if (ret < 0) + return ret; + + ret = ksz_read8(dev, REG_SW_LUE_CTRL_0, &value8); + if (ret < 0) + return ret; + + /* Check whether there is need to update the multiplier. */ + mult = FIELD_GET(SW_AGE_CNT_M, value8); + max_val = MAX_TIMER_VAL; + if (mult > 0) { + /* Try to use the same multiplier already in the register as + * the hardware default uses multiplier 4 and 75 seconds for + * 300 seconds. + */ + max_val = DIV_ROUND_UP(secs, mult); + if (max_val > MAX_TIMER_VAL || max_val * mult != secs) + max_val = MAX_TIMER_VAL; + } + + data = DIV_ROUND_UP(secs, max_val); + if (mult != data) { + value8 &= ~SW_AGE_CNT_M; + value8 |= FIELD_PREP(SW_AGE_CNT_M, data); + ret = ksz_write8(dev, REG_SW_LUE_CTRL_0, value8); + if (ret < 0) + return ret; + } + + secs = DIV_ROUND_UP(secs, data); + value = FIELD_GET(SW_AGE_PERIOD_7_0_M, secs); ret = ksz_write8(dev, REG_SW_AGE_PERIOD__1, value); diff --git a/drivers/net/dsa/microchip/lan937x_reg.h b/drivers/net/dsa/microchip/lan937x_reg.h index 4ec93e421da4..72042fd64e5b 100644 --- a/drivers/net/dsa/microchip/lan937x_reg.h +++ b/drivers/net/dsa/microchip/lan937x_reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Microchip LAN937X switch register definitions - * Copyright (C) 2019-2021 Microchip Technology Inc. + * Copyright (C) 2019-2024 Microchip Technology Inc. */ #ifndef __LAN937X_REG_H #define __LAN937X_REG_H @@ -56,8 +56,7 @@ #define SW_VLAN_ENABLE BIT(7) #define SW_DROP_INVALID_VID BIT(6) -#define SW_AGE_CNT_M 0x7 -#define SW_AGE_CNT_S 3 +#define SW_AGE_CNT_M GENMASK(5, 3) #define SW_RESV_MCAST_ENABLE BIT(2) #define REG_SW_LUE_CTRL_1 0x0311 @@ -70,6 +69,10 @@ #define SW_FAST_AGING BIT(1) #define SW_LINK_AUTO_AGING BIT(0) +#define REG_SW_LUE_CTRL_2 0x0312 + +#define SW_AGE_CNT_IN_MICROSEC BIT(7) + #define REG_SW_AGE_PERIOD__1 0x0313 #define SW_AGE_PERIOD_7_0_M GENMASK(7, 0) -- 2.51.0 From 6321f5fb70d502d95de8a212a7b484c297ec9644 Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Wed, 18 Dec 2024 05:34:11 -0800 Subject: [PATCH 15/16] gve: clean XDP queues in gve_tx_stop_ring_gqi When stopping XDP TX rings, the XDP clean function needs to be called to clean out the entire queue, similar to what happens in the normal TX queue case. Otherwise, the FIFO won't be cleared correctly, and xsk_tx_completed won't be reported. Fixes: 75eaae158b1b ("gve: Add XDP DROP and TX support for GQI-QPL format") Cc: stable@vger.kernel.org Signed-off-by: Joshua Washington Signed-off-by: Praveen Kaligineedi Reviewed-by: Praveen Kaligineedi Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_tx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index e7fb7d6d283d..83ad278ec91f 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -206,7 +206,10 @@ void gve_tx_stop_ring_gqi(struct gve_priv *priv, int idx) return; gve_remove_napi(priv, ntfy_idx); - gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + if (tx->q_num < priv->tx_cfg.num_queues) + gve_clean_tx_done(priv, tx, priv->tx_desc_cnt, false); + else + gve_clean_xdp_done(priv, tx, priv->tx_desc_cnt); netdev_tx_reset_queue(tx->netdev_txq); gve_tx_remove_from_block(priv, idx); } -- 2.51.0 From ff7c2dea9dd1a436fc79d6273adffdcc4a7ffea3 Mon Sep 17 00:00:00 2001 From: Joshua Washington Date: Wed, 18 Dec 2024 05:34:12 -0800 Subject: [PATCH 16/16] gve: guard XDP xmit NDO on existence of xdp queues In GVE, dedicated XDP queues only exist when an XDP program is installed and the interface is up. As such, the NDO XDP XMIT callback should return early if either of these conditions are false. In the case of no loaded XDP program, priv->num_xdp_queues=0 which can cause a divide-by-zero error, and in the case of interface down, num_xdp_queues remains untouched to persist XDP queue count for the next interface up, but the TX pointer itself would be NULL. The XDP xmit callback also needs to synchronize with a device transitioning from open to close. This synchronization will happen via the GVE_PRIV_FLAGS_NAPI_ENABLED bit along with a synchronize_net() call, which waits for any RCU critical sections at call-time to complete. Fixes: 39a7f4aa3e4a ("gve: Add XDP REDIRECT support for GQI-QPL format") Cc: stable@vger.kernel.org Signed-off-by: Joshua Washington Signed-off-by: Praveen Kaligineedi Reviewed-by: Praveen Kaligineedi Reviewed-by: Shailend Chand Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve_main.c | 3 +++ drivers/net/ethernet/google/gve/gve_tx.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index e171ca248f9a..5d7b0cc59959 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1899,6 +1899,9 @@ static void gve_turndown(struct gve_priv *priv) gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); + + /* Make sure that all traffic is finished processing. */ + synchronize_net(); } static void gve_turnup(struct gve_priv *priv) diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 83ad278ec91f..852f8c7e39d2 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -837,9 +837,12 @@ int gve_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct gve_tx_ring *tx; int i, err = 0, qid; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK) || !priv->xdp_prog) return -EINVAL; + if (!gve_get_napi_enabled(priv)) + return -ENETDOWN; + qid = gve_xdp_tx_queue_id(priv, smp_processor_id() % priv->num_xdp_queues); -- 2.51.0