From 5cff9d1756fbe5c3f15e0284f6713f49b5c5164a Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 16 Jan 2025 18:29:49 -0500 Subject: [PATCH 01/16] net: xilinx: axienet: Add some symbolic constants for IRQ delay timer Instead of using literals, add some symbolic constants for the IRQ delay timer calculation. Signed-off-by: Sean Anderson Reviewed by: Shannon Nelson Link: https://patch.msgid.link/20250116232954.2696930-2-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet.h | 3 +++ drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 7 ++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index d64b8abcf018..a3f4f3e42587 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -120,6 +120,9 @@ #define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ #define XAXIDMA_IRQ_ALL_MASK 0x00007000 /* All interrupts */ +/* Constant to convert delay counts to microseconds */ +#define XAXIDMA_DELAY_SCALE (125ULL * USEC_PER_SEC) + /* Default TX/RX Threshold and delay timer values for SGDMA mode */ #define XAXIDMA_DFT_TX_THRESHOLD 24 #define XAXIDMA_DFT_TX_USEC 50 diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 07850ad331dd..0532dc94ee93 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -238,11 +238,8 @@ static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec) /* 1 Timeout Interval = 125 * (clock period of SG clock) */ result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate, - (u64)125000000); - if (result > 255) - result = 255; - - return result; + XAXIDMA_DELAY_SCALE); + return min(result, FIELD_MAX(XAXIDMA_DELAY_MASK)); } /** -- 2.51.0 From 9d301a53a532e60586316f3401ca4912601dbb65 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 16 Jan 2025 18:29:50 -0500 Subject: [PATCH 02/16] net: xilinx: axienet: Report an error for bad coalesce settings Instead of silently ignoring invalid/unsupported settings, report an error. Additionally, relax the check for non-zero usecs to apply only when it will be used (i.e. when frames != 1). Signed-off-by: Sean Anderson Reviewed by: Shannon Nelson Link: https://patch.msgid.link/20250116232954.2696930-3-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- .../net/ethernet/xilinx/xilinx_axienet_main.c | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0532dc94ee93..9e7fa012e4fa 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2059,14 +2059,25 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EINVAL; } - if (ecoalesce->rx_max_coalesced_frames) - lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; - if (ecoalesce->rx_coalesce_usecs) - lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; - if (ecoalesce->tx_max_coalesced_frames) - lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; - if (ecoalesce->tx_coalesce_usecs) - lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; + if (!ecoalesce->rx_max_coalesced_frames || + !ecoalesce->tx_max_coalesced_frames) { + NL_SET_ERR_MSG(extack, "frames must be non-zero"); + return -EINVAL; + } + + if ((ecoalesce->rx_max_coalesced_frames > 1 && + !ecoalesce->rx_coalesce_usecs) || + (ecoalesce->tx_max_coalesced_frames > 1 && + !ecoalesce->tx_coalesce_usecs)) { + NL_SET_ERR_MSG(extack, + "usecs must be non-zero when frames is greater than one"); + return -EINVAL; + } + + lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; + lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs; + lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; + lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs; return 0; } -- 2.51.0 From 5b4fd35343d72f4d4f964de2a9fe36143cc18f39 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 17 Jan 2025 18:40:31 +0100 Subject: [PATCH 03/16] mptcp: sysctl: add syn_retrans_before_tcp_fallback The number of SYN + MPC retransmissions before falling back to TCP was fixed to 2. This is certainly a good default value, but having a fixed number can be a problem in some environments. The current behaviour means that if all packets are dropped, there will be: - The initial SYN + MPC - 2 retransmissions with MPC - The next ones will be without MPTCP. So typically ~3 seconds before falling back to TCP. In some networks where some temporally blackholes are unfortunately frequent, or when a client tries to initiate connections while the network is not ready yet, this can cause new connections not to have MPTCP connections. In such environments, it is now possible to increase the number of SYN retransmissions with MPTCP options to make sure MPTCP is used. Interesting values are: - 0: the first retransmission will be done without MPTCP options: quite aggressive, but also a higher risk of detecting false-positive MPTCP blackholes. - >= 128: all SYN retransmissions will keep the MPTCP options: back to the < 6.12 behaviour. The default behaviour is not changed here. Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250117-net-next-mptcp-syn_retrans_before_tcp_fallback-v1-1-ab4b187099b0@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp-sysctl.rst | 16 ++++++++++++++++ net/mptcp/ctrl.c | 21 +++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 95598c21fc8e..dc45c0211353 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -108,3 +108,19 @@ stale_loss_cnt - INTEGER This is a per-namespace sysctl. Default: 4 + +syn_retrans_before_tcp_fallback - INTEGER + The number of SYN + MP_CAPABLE retransmissions before falling back to + TCP, i.e. dropping the MPTCP options. In other words, if all the packets + are dropped on the way, there will be: + + * The initial SYN with MPTCP support + * This number of SYN retransmitted with MPTCP support + * The next SYN retransmissions will be without MPTCP support + + 0 means the first retransmission will be done without MPTCP options. + >= 128 means that all SYN retransmissions will keep the MPTCP options. A + lower number might increase false-positive MPTCP blackholes detections. + This is a per-namespace sysctl. + + Default: 2 diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index b0dd008e2114..3999e0ba2c35 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -32,6 +32,7 @@ struct mptcp_pernet { unsigned int close_timeout; unsigned int stale_loss_cnt; atomic_t active_disable_times; + u8 syn_retrans_before_tcp_fallback; unsigned long active_disable_stamp; u8 mptcp_enabled; u8 checksum_enabled; @@ -92,6 +93,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->blackhole_timeout = 3600; + pernet->syn_retrans_before_tcp_fallback = 2; atomic_set(&pernet->active_disable_times, 0); pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; @@ -245,6 +247,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .proc_handler = proc_blackhole_detect_timeout, .extra1 = SYSCTL_ZERO, }, + { + .procname = "syn_retrans_before_tcp_fallback", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -269,6 +277,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) /* table[7] is for available_schedulers which is read-only info */ table[8].data = &pernet->close_timeout; table[9].data = &pernet->blackhole_timeout; + table[10].data = &pernet->syn_retrans_before_tcp_fallback; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); @@ -392,17 +401,21 @@ void mptcp_active_enable(struct sock *sk) void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) { struct mptcp_subflow_context *subflow; - u32 timeouts; if (!sk_is_mptcp(ssk)) return; - timeouts = inet_csk(ssk)->icsk_retransmits; subflow = mptcp_subflow_ctx(ssk); if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { - if (timeouts == 2 || (timeouts < 2 && expired)) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); + struct net *net = sock_net(ssk); + u8 timeouts, to_max; + + timeouts = inet_csk(ssk)->icsk_retransmits; + to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; + + if (timeouts == to_max || (timeouts < to_max && expired)) { + MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); } else { -- 2.51.0 From 17656eb5cfe5b7287dc7f1b21ad02acc25ecdb15 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 17 Jan 2025 10:37:26 -0800 Subject: [PATCH 04/16] eth: bnxt: fix string truncation warning in FW version MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit W=1 builds with gcc 14.2.1 report: drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c:4193:32: error: ‘%s’ directive output may be truncated writing up to 31 bytes into a region of size 27 [-Werror=format-truncation=] 4193 | "/pkg %s", buf); It's upset that we let buf be full length but then we use 5 characters for "/pkg ". The builds is also clear with clang version 19.1.5 now. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250117183726.1481524-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 540c140d52dc..65a20931c579 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4184,7 +4184,7 @@ err: static void bnxt_get_pkgver(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - char buf[FW_VER_STR_LEN]; + char buf[FW_VER_STR_LEN - 5]; int len; if (!bnxt_get_pkginfo(dev, buf, sizeof(buf))) { -- 2.51.0 From bff406bc042408c021e41a439698a346119c2f11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jan 2025 22:46:26 +0000 Subject: [PATCH 05/16] net: destroy dev->lock later in free_netdev() syzbot complained that free_netdev() was calling netif_napi_del() after dev->lock mutex has been destroyed. This fires a warning for CONFIG_DEBUG_MUTEXES=y builds. Move mutex_destroy(&dev->lock) near the end of free_netdev(). [1] DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 5971 at kernel/locking/mutex.c:564 __mutex_lock_common kernel/locking/mutex.c:564 [inline] WARNING: CPU: 0 PID: 5971 at kernel/locking/mutex.c:564 __mutex_lock+0xdac/0xee0 kernel/locking/mutex.c:735 Modules linked in: CPU: 0 UID: 0 PID: 5971 Comm: syz-executor Not tainted 6.13.0-rc7-syzkaller-01131-g8d20dcda404d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 RIP: 0010:__mutex_lock_common kernel/locking/mutex.c:564 [inline] RIP: 0010:__mutex_lock+0xdac/0xee0 kernel/locking/mutex.c:735 Code: 0f b6 04 38 84 c0 0f 85 1a 01 00 00 83 3d 6f 40 4c 04 00 75 19 90 48 c7 c7 60 84 0a 8c 48 c7 c6 00 85 0a 8c e8 f5 dc 91 f5 90 <0f> 0b 90 90 90 e9 c7 f3 ff ff 90 0f 0b 90 e9 29 f8 ff ff 90 0f 0b RSP: 0018:ffffc90003317580 EFLAGS: 00010246 RAX: ee0f97edaf7b7d00 RBX: ffff8880299f8cb0 RCX: ffff8880323c9e00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc90003317710 R08: ffffffff81602ac2 R09: 1ffff110170c519a R10: dffffc0000000000 R11: ffffed10170c519b R12: 0000000000000000 R13: 0000000000000000 R14: 1ffff92000662ec4 R15: dffffc0000000000 FS: 000055557a046500(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd581d46ff8 CR3: 000000006f870000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netdev_lock include/linux/netdevice.h:2691 [inline] __netif_napi_del include/linux/netdevice.h:2829 [inline] netif_napi_del include/linux/netdevice.h:2848 [inline] free_netdev+0x2d9/0x610 net/core/dev.c:11621 netdev_run_todo+0xf21/0x10d0 net/core/dev.c:11189 nsim_destroy+0x3c3/0x620 drivers/net/netdevsim/netdev.c:1028 __nsim_dev_port_del+0x14b/0x1b0 drivers/net/netdevsim/dev.c:1428 nsim_dev_port_del_all drivers/net/netdevsim/dev.c:1440 [inline] nsim_dev_reload_destroy+0x28a/0x490 drivers/net/netdevsim/dev.c:1661 nsim_drv_remove+0x58/0x160 drivers/net/netdevsim/dev.c:1676 device_remove drivers/base/dd.c:567 [inline] Fixes: 1b23cdbd2bbc ("net: protect netdev->napi_list with netdev_lock()") Reported-by: syzbot+85ff1051228a04613a32@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/678add43.050a0220.303755.0016.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250117224626.1427577-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index fe5f5855593d..fab4899b83f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11593,8 +11593,6 @@ void free_netdev(struct net_device *dev) return; } - mutex_destroy(&dev->lock); - kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11621,6 +11619,8 @@ void free_netdev(struct net_device *dev) netdev_free_phy_link_topology(dev); + mutex_destroy(&dev->lock); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { -- 2.51.0 From 12d5151be01017401b8d2681f3c975265a233eaf Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 16 Jan 2025 22:38:40 +0100 Subject: [PATCH 06/16] net: phy: remove leftovers from switch to linkmode bitmaps We have some leftovers from the switch to linkmode bitmaps which - have never been used - are not used any longer - have no user outside phy_device.c So remove them. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/5493b96e-88bb-4230-a911-322659ec5167@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 54 ++---------------------------------- include/linux/phy.h | 18 ------------ 2 files changed, 2 insertions(+), 70 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 5b34d39d1d52..46713d27412b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -60,15 +60,9 @@ EXPORT_SYMBOL_GPL(phy_gbit_features); __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_gbit_fibre_features); -__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; -EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features); - __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; EXPORT_SYMBOL_GPL(phy_10gbit_features); -__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; -EXPORT_SYMBOL_GPL(phy_10gbit_fec_features); - const int phy_basic_ports_array[3] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, @@ -76,12 +70,7 @@ const int phy_basic_ports_array[3] = { }; EXPORT_SYMBOL_GPL(phy_basic_ports_array); -const int phy_fibre_port_array[1] = { - ETHTOOL_LINK_MODE_FIBRE_BIT, -}; -EXPORT_SYMBOL_GPL(phy_fibre_port_array); - -const int phy_all_ports_features_array[7] = { +static const int phy_all_ports_features_array[7] = { ETHTOOL_LINK_MODE_Autoneg_BIT, ETHTOOL_LINK_MODE_TP_BIT, ETHTOOL_LINK_MODE_MII_BIT, @@ -90,7 +79,6 @@ const int phy_all_ports_features_array[7] = { ETHTOOL_LINK_MODE_BNC_BIT, ETHTOOL_LINK_MODE_Backplane_BIT, }; -EXPORT_SYMBOL_GPL(phy_all_ports_features_array); const int phy_10_100_features_array[4] = { ETHTOOL_LINK_MODE_10baseT_Half_BIT, @@ -124,20 +112,6 @@ const int phy_10gbit_features_array[1] = { }; EXPORT_SYMBOL_GPL(phy_10gbit_features_array); -static const int phy_10gbit_fec_features_array[1] = { - ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, -}; - -__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; -EXPORT_SYMBOL_GPL(phy_10gbit_full_features); - -static const int phy_10gbit_full_features_array[] = { - ETHTOOL_LINK_MODE_10baseT_Full_BIT, - ETHTOOL_LINK_MODE_100baseT_Full_BIT, - ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - ETHTOOL_LINK_MODE_10000baseT_Full_BIT, -}; - static const int phy_eee_cap1_features_array[] = { ETHTOOL_LINK_MODE_100baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT, @@ -199,20 +173,7 @@ static void features_init(void) linkmode_set_bit_array(phy_gbit_features_array, ARRAY_SIZE(phy_gbit_features_array), phy_gbit_fibre_features); - linkmode_set_bit_array(phy_fibre_port_array, - ARRAY_SIZE(phy_fibre_port_array), - phy_gbit_fibre_features); - - /* 10/100 half/full + 1000 half/full + TP/MII/FIBRE/AUI/BNC/Backplane*/ - linkmode_set_bit_array(phy_all_ports_features_array, - ARRAY_SIZE(phy_all_ports_features_array), - phy_gbit_all_ports_features); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - phy_gbit_all_ports_features); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - phy_gbit_all_ports_features); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phy_gbit_fibre_features); /* 10/100 half/full + 1000 half/full + 10G full*/ linkmode_set_bit_array(phy_all_ports_features_array, @@ -228,17 +189,6 @@ static void features_init(void) ARRAY_SIZE(phy_10gbit_features_array), phy_10gbit_features); - /* 10/100/1000/10G full */ - linkmode_set_bit_array(phy_all_ports_features_array, - ARRAY_SIZE(phy_all_ports_features_array), - phy_10gbit_full_features); - linkmode_set_bit_array(phy_10gbit_full_features_array, - ARRAY_SIZE(phy_10gbit_full_features_array), - phy_10gbit_full_features); - /* 10G FEC only */ - linkmode_set_bit_array(phy_10gbit_fec_features_array, - ARRAY_SIZE(phy_10gbit_fec_features_array), - phy_10gbit_fec_features); linkmode_set_bit_array(phy_eee_cap1_features_array, ARRAY_SIZE(phy_eee_cap1_features_array), phy_eee_cap1_features); diff --git a/include/linux/phy.h b/include/linux/phy.h index 244f747b3cd9..19f076a71f94 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -32,19 +32,6 @@ #include #include -#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ - SUPPORTED_TP | \ - SUPPORTED_MII) - -#define PHY_10BT_FEATURES (SUPPORTED_10baseT_Half | \ - SUPPORTED_10baseT_Full) - -#define PHY_100BT_FEATURES (SUPPORTED_100baseT_Half | \ - SUPPORTED_100baseT_Full) - -#define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ - SUPPORTED_1000baseT_Full) - extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; @@ -62,16 +49,11 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_T1S_P2MP_FEATURES ((unsigned long *)&phy_basic_t1s_p2mp_features) #define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) -#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) -#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) -#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) #define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; -extern const int phy_fibre_port_array[1]; -extern const int phy_all_ports_features_array[7]; extern const int phy_10_100_features_array[4]; extern const int phy_basic_t1_features_array[3]; extern const int phy_basic_t1s_p2mp_features_array[2]; -- 2.51.0 From 185e5b869071812bcb278cf0973b8a7bbce0dc27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Jan 2025 23:21:13 +0000 Subject: [PATCH 07/16] net: introduce netdev_napi_exit() After 1b23cdbd2bbc ("net: protect netdev->napi_list with netdev_lock()") it makes sense to iterate through dev->napi_list while holding the device lock. Also call synchronize_net() at most one time. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250117232113.1612899-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index fab4899b83f7..d7cbe6ff5249 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11568,6 +11568,22 @@ free_dev: } EXPORT_SYMBOL(alloc_netdev_mqs); +static void netdev_napi_exit(struct net_device *dev) +{ + if (!list_empty(&dev->napi_list)) { + struct napi_struct *p, *n; + + netdev_lock(dev); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) + __netif_napi_del_locked(p); + netdev_unlock(dev); + + synchronize_net(); + } + + kvfree(dev->napi_config); +} + /** * free_netdev - free network device * @dev: device @@ -11579,8 +11595,6 @@ EXPORT_SYMBOL(alloc_netdev_mqs); */ void free_netdev(struct net_device *dev) { - struct napi_struct *p, *n; - might_sleep(); /* When called immediately after register_netdevice() failed the unwind @@ -11602,10 +11616,7 @@ void free_netdev(struct net_device *dev) /* Flush device addresses */ dev_addr_flush(dev); - list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) - netif_napi_del(p); - - kvfree(dev->napi_config); + netdev_napi_exit(dev); ref_tracker_dir_exit(&dev->refcnt_tracker); #ifdef CONFIG_PCPU_DEV_REFCNT -- 2.51.0 From a12c76a03386e32413ae8eaaefa337e491880632 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 15 Jan 2025 09:27:54 -0500 Subject: [PATCH 08/16] net: sched: refine software bypass handling in tc_run MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch addresses issues with filter counting in block (tcf_block), particularly for software bypass scenarios, by introducing a more accurate mechanism using useswcnt. Previously, filtercnt and skipswcnt were introduced by: Commit 2081fd3445fe ("net: sched: cls_api: add filter counter") and Commit f631ef39d819 ("net: sched: cls_api: add skip_sw counter") filtercnt tracked all tp (tcf_proto) objects added to a block, and skipswcnt counted tp objects with the skipsw attribute set. The problem is: a single tp can contain multiple filters, some with skipsw and others without. The current implementation fails in the case: When the first filter in a tp has skipsw, both skipswcnt and filtercnt are incremented, then adding a second filter without skipsw to the same tp does not modify these counters because tp->counted is already set. This results in bypass software behavior based solely on skipswcnt equaling filtercnt, even when the block includes filters without skipsw. Consequently, filters without skipsw are inadvertently bypassed. To address this, the patch introduces useswcnt in block to explicitly count tp objects containing at least one filter without skipsw. Key changes include: Whenever a filter without skipsw is added, its tp is marked with usesw and counted in useswcnt. tc_run() now uses useswcnt to determine software bypass, eliminating reliance on filtercnt and skipswcnt. This refined approach prevents software bypass for blocks containing mixed filters, ensuring correct behavior in tc_run(). Additionally, as atomic operations on useswcnt ensure thread safety and tp->lock guards access to tp->usesw and tp->counted, the broader lock down_write(&block->cb_lock) is no longer required in tc_new_tfilter(), and this resolves a performance regression caused by the filter counting mechanism during parallel filter insertions. The improvement can be demonstrated using the following script: # cat insert_tc_rules.sh tc qdisc add dev ens1f0np0 ingress for i in $(seq 16); do taskset -c $i tc -b rules_$i.txt & done wait Each of rules_$i.txt files above includes 100000 tc filter rules to a mlx5 driver NIC ens1f0np0. Without this patch: # time sh insert_tc_rules.sh real 0m50.780s user 0m23.556s sys 4m13.032s With this patch: # time sh insert_tc_rules.sh real 0m17.718s user 0m7.807s sys 3m45.050s Fixes: 047f340b36fc ("net: sched: make skip_sw actually skip software") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Reviewed-by: Asbjørn Sloth Tønnesen Tested-by: Asbjørn Sloth Tønnesen Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 13 +++++++-- include/net/sch_generic.h | 5 ++-- net/core/dev.c | 15 ++++++----- net/sched/cls_api.c | 57 ++++++++++++++++----------------------- net/sched/cls_bpf.c | 2 ++ net/sched/cls_flower.c | 2 ++ net/sched/cls_matchall.c | 2 ++ net/sched/cls_u32.c | 4 +++ 8 files changed, 55 insertions(+), 45 deletions(-) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 22c5ab4269d7..c64fd896b1f9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block) } #ifdef CONFIG_NET_CLS_ACT -DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); +DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key); static inline bool tcf_block_bypass_sw(struct tcf_block *block) { - return block && block->bypass_wanted; + return block && !atomic_read(&block->useswcnt); } #endif @@ -760,6 +760,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags) +{ + if (tp->usesw) + return; + if (tc_skip_sw(flags) && tc_in_hw(flags)) + return; + tp->usesw = true; +} + #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 8074322dd636..d635c5b47eba 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -425,6 +425,7 @@ struct tcf_proto { spinlock_t lock; bool deleting; bool counted; + bool usesw; refcount_t refcnt; struct rcu_head rcu; struct hlist_node destroy_ht_node; @@ -474,9 +475,7 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - bool bypass_wanted; - atomic_t filtercnt; /* Number of filters */ - atomic_t skipswcnt; /* Number of skip_sw filters */ + atomic_t useswcnt; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ diff --git a/net/core/dev.c b/net/core/dev.c index d7cbe6ff5249..3dab6699b1c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2248,8 +2248,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue); #endif #ifdef CONFIG_NET_CLS_ACT -DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key); -EXPORT_SYMBOL(tcf_bypass_check_needed_key); +DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key); +EXPORT_SYMBOL(tcf_sw_enabled_key); #endif DEFINE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4144,10 +4144,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, if (!miniq) return ret; - if (static_branch_unlikely(&tcf_bypass_check_needed_key)) { - if (tcf_block_bypass_sw(miniq->block)) - return ret; - } + /* Global bypass */ + if (!static_branch_likely(&tcf_sw_enabled_key)) + return ret; + + /* Block-wise bypass */ + if (tcf_block_bypass_sw(miniq->block)) + return ret; tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7578e27260c9..8e47e5355be6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + tp->usesw = !tp->ops->reoffload; spin_lock_init(&tp->lock); refcount_set(&tp->refcnt, 1); @@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp) refcount_inc(&tp->refcnt); } -static void tcf_maintain_bypass(struct tcf_block *block) +static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add) { - int filtercnt = atomic_read(&block->filtercnt); - int skipswcnt = atomic_read(&block->skipswcnt); - bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt; - - if (bypass_wanted != block->bypass_wanted) { #ifdef CONFIG_NET_CLS_ACT - if (bypass_wanted) - static_branch_inc(&tcf_bypass_check_needed_key); - else - static_branch_dec(&tcf_bypass_check_needed_key); -#endif - block->bypass_wanted = bypass_wanted; + struct tcf_block *block = tp->chain->block; + bool counted = false; + + if (!add) { + if (tp->usesw && tp->counted) { + if (!atomic_dec_return(&block->useswcnt)) + static_branch_dec(&tcf_sw_enabled_key); + tp->counted = false; + } + return; } -} - -static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add) -{ - lockdep_assert_not_held(&block->cb_lock); - down_write(&block->cb_lock); - if (*counted != add) { - if (add) { - atomic_inc(&block->filtercnt); - *counted = true; - } else { - atomic_dec(&block->filtercnt); - *counted = false; - } + spin_lock(&tp->lock); + if (tp->usesw && !tp->counted) { + counted = true; + tp->counted = true; } - tcf_maintain_bypass(block); - up_write(&block->cb_lock); + spin_unlock(&tp->lock); + + if (counted && atomic_inc_return(&block->useswcnt) == 1) + static_branch_inc(&tcf_sw_enabled_key); +#endif } static void tcf_chain_put(struct tcf_chain *chain); @@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); - tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false); + tcf_proto_count_usesw(tp, false); if (sig_destroy) tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); @@ -2409,7 +2402,7 @@ replay: tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held, extack); tfilter_put(tp, fh); - tcf_block_filter_cnt_update(block, &tp->counted, true); + tcf_proto_count_usesw(tp, true); /* q pointer is NULL for shared blocks */ if (q) q->flags &= ~TCQ_F_CAN_BYPASS; @@ -3532,8 +3525,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_inc(&block->skipswcnt); atomic_inc(&block->offloadcnt); } @@ -3542,8 +3533,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; - if (tc_skip_sw(*flags)) - atomic_dec(&block->skipswcnt); atomic_dec(&block->offloadcnt); } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 1941ebec23ff..7fbe42f0e5c2 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(prog->gen_flags)) prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, prog->gen_flags); + if (oldprog) { idr_replace(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1008ec8a464c..03505673d523 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(fnew->flags)) fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, fnew->flags); + spin_lock(&tp->lock); /* tp was deleted concurrently. -EAGAIN will cause caller to lookup diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9f1e62ca508d..f03bf5da39ee 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + *arg = head; rcu_assign_pointer(tp->root, new); return 0; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d3a03c57545b..2a1c00048fd6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, new->flags); + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); tcf_exts_get_net(&n->exts); @@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (!tc_in_hw(n->flags)) n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + tcf_proto_update_usesw(tp, n->flags); + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) -- 2.51.0 From 544c9394065fdbb733349bcd1464087d6e12d28e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jan 2025 20:47:03 +0100 Subject: [PATCH 09/16] dsa: Use str_enable_disable-like helpers Replace ternary (condition ? "enable" : "disable") syntax with helpers from string_choices.h because: 1. Simple function call with one argument is easier to read. Ternary operator has three arguments and with wrapping might lead to quite long code. 2. Is slightly shorter thus also easier to read. 3. It brings uniformity in the text - same string. 4. Allows deduping by the linker, which results in a smaller binary file. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Linus Walleij Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/pcs-639x.c | 3 ++- drivers/net/dsa/mv88e6xxx/port.c | 3 ++- drivers/net/dsa/realtek/rtl8366rb.c | 7 ++++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/pcs-639x.c b/drivers/net/dsa/mv88e6xxx/pcs-639x.c index 026b7bfb7ee5..59f63d6beec8 100644 --- a/drivers/net/dsa/mv88e6xxx/pcs-639x.c +++ b/drivers/net/dsa/mv88e6xxx/pcs-639x.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "chip.h" #include "global2.h" @@ -750,7 +751,7 @@ static int mv88e6393x_sgmii_apply_2500basex_an(struct mv88e639x_pcs *mpcs, if (err) dev_err(mpcs->mdio.dev.parent, "failed to %s 2500basex fix: %pe\n", - enable ? "enable" : "disable", ERR_PTR(err)); + str_enable_disable(enable), ERR_PTR(err)); return err; } diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index dc777ddce1f3..66b1b7277281 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "chip.h" #include "global2.h" @@ -176,7 +177,7 @@ int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link) dev_dbg(chip->dev, "p%d: %s link %s\n", port, reg & MV88E6XXX_PORT_MAC_CTL_FORCE_LINK ? "Force" : "Unforce", - reg & MV88E6XXX_PORT_MAC_CTL_LINK_UP ? "up" : "down"); + str_up_down(reg & MV88E6XXX_PORT_MAC_CTL_LINK_UP)); return 0; } diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index 23374178a176..4c4a95d4380c 100644 --- a/drivers/net/dsa/realtek/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "realtek.h" #include "realtek-smi.h" @@ -1522,7 +1523,7 @@ static int rtl8366rb_vlan_filtering(struct dsa_switch *ds, int port, rb = priv->chip_data; dev_dbg(priv->dev, "port %d: %s VLAN filtering\n", port, - vlan_filtering ? "enable" : "disable"); + str_enable_disable(vlan_filtering)); /* If the port is not in the member set, the frame will be dropped */ ret = regmap_update_bits(priv->map, RTL8366RB_VLAN_INGRESS_CTRL2_REG, @@ -1884,7 +1885,7 @@ static bool rtl8366rb_is_vlan_valid(struct realtek_priv *priv, unsigned int vlan static int rtl8366rb_enable_vlan(struct realtek_priv *priv, bool enable) { - dev_dbg(priv->dev, "%s VLAN\n", enable ? "enable" : "disable"); + dev_dbg(priv->dev, "%s VLAN\n", str_enable_disable(enable)); return regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN, enable ? RTL8366RB_SGCR_EN_VLAN : 0); @@ -1892,7 +1893,7 @@ static int rtl8366rb_enable_vlan(struct realtek_priv *priv, bool enable) static int rtl8366rb_enable_vlan4k(struct realtek_priv *priv, bool enable) { - dev_dbg(priv->dev, "%s VLAN 4k\n", enable ? "enable" : "disable"); + dev_dbg(priv->dev, "%s VLAN 4k\n", str_enable_disable(enable)); return regmap_update_bits(priv->map, RTL8366RB_SGCR, RTL8366RB_SGCR_EN_VLAN_4KTB, enable ? RTL8366RB_SGCR_EN_VLAN_4KTB : 0); -- 2.51.0 From 45bd1c5ba7580f612e46f3c6cb430c64adfd0294 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=E8=B0=A2=E8=87=B4=E9=82=A6=20=28XIE=20Zhibang=29?= Date: Fri, 17 Jan 2025 01:41:40 +0000 Subject: [PATCH 10/16] net: appletalk: Drop aarp_send_probe_phase1() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit aarp_send_probe_phase1() used to work by calling ndo_do_ioctl of appletalk drivers ltpc or cops, but these two drivers have been removed since the following commits: commit 03dcb90dbf62 ("net: appletalk: remove Apple/Farallon LocalTalk PC support") commit 00f3696f7555 ("net: appletalk: remove cops support") Thus aarp_send_probe_phase1() no longer works, so drop it. (found by code inspection) Signed-off-by: 谢致邦 (XIE Zhibang) Signed-off-by: David S. Miller --- net/appletalk/aarp.c | 45 +++++++------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 9fa0b246902b..05cbb3c227c5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -432,49 +432,18 @@ static struct atalk_addr *__aarp_proxy_find(struct net_device *dev, return a ? sa : NULL; } -/* - * Probe a Phase 1 device or a device that requires its Net:Node to - * be set via an ioctl. - */ -static void aarp_send_probe_phase1(struct atalk_iface *iface) -{ - struct ifreq atreq; - struct sockaddr_at *sa = (struct sockaddr_at *)&atreq.ifr_addr; - const struct net_device_ops *ops = iface->dev->netdev_ops; - - sa->sat_addr.s_node = iface->address.s_node; - sa->sat_addr.s_net = ntohs(iface->address.s_net); - - /* We pass the Net:Node to the drivers/cards by a Device ioctl. */ - if (!(ops->ndo_do_ioctl(iface->dev, &atreq, SIOCSIFADDR))) { - ops->ndo_do_ioctl(iface->dev, &atreq, SIOCGIFADDR); - if (iface->address.s_net != htons(sa->sat_addr.s_net) || - iface->address.s_node != sa->sat_addr.s_node) - iface->status |= ATIF_PROBE_FAIL; - - iface->address.s_net = htons(sa->sat_addr.s_net); - iface->address.s_node = sa->sat_addr.s_node; - } -} - - void aarp_probe_network(struct atalk_iface *atif) { - if (atif->dev->type == ARPHRD_LOCALTLK || - atif->dev->type == ARPHRD_PPP) - aarp_send_probe_phase1(atif); - else { - unsigned int count; + unsigned int count; - for (count = 0; count < AARP_RETRANSMIT_LIMIT; count++) { - aarp_send_probe(atif->dev, &atif->address); + for (count = 0; count < AARP_RETRANSMIT_LIMIT; count++) { + aarp_send_probe(atif->dev, &atif->address); - /* Defer 1/10th */ - msleep(100); + /* Defer 1/10th */ + msleep(100); - if (atif->status & ATIF_PROBE_FAIL) - break; - } + if (atif->status & ATIF_PROBE_FAIL) + break; } } -- 2.51.0 From af10e092b77aaa11f056765d979e9be7e8276a3a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 17 Jan 2025 08:44:25 +0000 Subject: [PATCH 11/16] net: phylink: always do a major config when attaching a SFP PHY Background: https://lore.kernel.org/r/20250107123615.161095-1-ericwouds@gmail.com Since adding negotiation of in-band capabilities, it is no longer sufficient to just look at the MLO_AN_xxx mode and PHY interface to decide whether to do a major configuration, since the result now depends on the capabilities of the attaching PHY. Always trigger a major configuration in this case. Testing log: https://lore.kernel.org/r/f20c9744-3953-40e7-a9c9-5534b25d2e2a@gmail.com Reported-by: Eric Woudstra Tested-by: Eric Woudstra Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 66eea3f963d3..d130634d3bc7 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3541,12 +3541,11 @@ static phy_interface_t phylink_choose_sfp_interface(struct phylink *pl, return interface; } -static void phylink_sfp_set_config(struct phylink *pl, - unsigned long *supported, - struct phylink_link_state *state) +static void phylink_sfp_set_config(struct phylink *pl, unsigned long *supported, + struct phylink_link_state *state, + bool changed) { u8 mode = MLO_AN_INBAND; - bool changed = false; phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n", phylink_an_mode_str(mode), phy_modes(state->interface), @@ -3623,7 +3622,7 @@ static int phylink_sfp_config_phy(struct phylink *pl, struct phy_device *phy) pl->link_port = pl->sfp_port; - phylink_sfp_set_config(pl, support, &config); + phylink_sfp_set_config(pl, support, &config, true); return 0; } @@ -3698,7 +3697,7 @@ static int phylink_sfp_config_optical(struct phylink *pl) pl->link_port = pl->sfp_port; - phylink_sfp_set_config(pl, pl->sfp_support, &config); + phylink_sfp_set_config(pl, pl->sfp_support, &config, false); return 0; } -- 2.51.0 From 5fe71fda89745fc3cd95f70d06e9162b595c3702 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Jan 2025 12:36:14 +0300 Subject: [PATCH 12/16] tipc: re-order conditions in tipc_crypto_key_rcv() On a 32bit system the "keylen + sizeof(struct tipc_aead_key)" math could have an integer wrapping issue. It doesn't matter because the "keylen" is checked on the next line, but just to make life easier for static analysis tools, let's re-order these conditions and avoid the integer overflow. Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tipc/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index 43c3f1c971b8..c524421ec652 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -2293,8 +2293,8 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); /* Verify the supplied size values */ - if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || - keylen > TIPC_AEAD_KEY_SIZE_MAX)) { + if (unlikely(keylen > TIPC_AEAD_KEY_SIZE_MAX || + size != keylen + sizeof(struct tipc_aead_key))) { pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); goto exit; } -- 2.51.0 From 457bb7970a0f10effd6b4ca9e1057727963c473a Mon Sep 17 00:00:00 2001 From: Ales Nezbeda Date: Fri, 17 Jan 2025 12:22:28 +0100 Subject: [PATCH 13/16] net: macsec: Add endianness annotations in salt struct This change resolves warning produced by sparse tool as currently there is a mismatch between normal generic type in salt and endian annotated type in macsec driver code. Endian annotated types should be used here. Sparse output: warning: restricted ssci_t degrades to integer warning: incorrect type in assignment (different base types) expected restricted ssci_t [usertype] ssci got unsigned int warning: restricted __be64 degrades to integer warning: incorrect type in assignment (different base types) expected restricted __be64 [usertype] pn got unsigned long long Signed-off-by: Ales Nezbeda Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/macsec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/macsec.h b/include/net/macsec.h index de216cbc6b05..bc7de5b53e54 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -38,8 +38,8 @@ struct metadata_dst; typedef union salt { struct { - u32 ssci; - u64 pn; + ssci_t ssci; + __be64 pn; } __packed; u8 bytes[MACSEC_SALT_LEN]; } __packed salt_t; -- 2.51.0 From 64ff63aeefb03139ae27454bd4208244579ae88e Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Fri, 17 Jan 2025 23:24:21 +0100 Subject: [PATCH 14/16] net: phy: realtek: HWMON support for standalone versions of RTL8221B and RTL8251 HWMON support has been added for the RTL8221/8251 PHYs integrated together with the MAC inside the RTL8125/8126 chips. This patch extends temperature reading support for standalone variants of the mentioned PHYs. I don't know whether the earlier revisions of the RTL8226 also have a built-in temperature sensor, so they have been skipped for now. Tested on RTL8221B-VB-CG. Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/realtek/realtek_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 38149958d95b..11ff44c3be5b 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -1475,6 +1475,7 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vb_cg_c22_match_phy_device, .name = "RTL8221B-VB-CG 2.5Gbps PHY (C22)", + .probe = rtl822x_probe, .get_features = rtl822x_get_features, .config_aneg = rtl822x_config_aneg, .config_init = rtl822xb_config_init, @@ -1487,6 +1488,7 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vb_cg_c45_match_phy_device, .name = "RTL8221B-VB-CG 2.5Gbps PHY (C45)", + .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, .get_features = rtl822x_c45_get_features, @@ -1497,6 +1499,7 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vn_cg_c22_match_phy_device, .name = "RTL8221B-VM-CG 2.5Gbps PHY (C22)", + .probe = rtl822x_probe, .get_features = rtl822x_get_features, .config_aneg = rtl822x_config_aneg, .config_init = rtl822xb_config_init, @@ -1509,6 +1512,7 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8221b_vn_cg_c45_match_phy_device, .name = "RTL8221B-VN-CG 2.5Gbps PHY (C45)", + .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, .get_features = rtl822x_c45_get_features, @@ -1519,6 +1523,7 @@ static struct phy_driver realtek_drvs[] = { }, { .match_phy_device = rtl8251b_c45_match_phy_device, .name = "RTL8251B 5Gbps PHY", + .probe = rtl822x_probe, .get_features = rtl822x_get_features, .config_aneg = rtl822x_config_aneg, .read_status = rtl822x_read_status, -- 2.51.0 From 3a0b7fa095212b51ed63892540c4f249991a2d74 Mon Sep 17 00:00:00 2001 From: Liu Ye Date: Thu, 16 Jan 2025 09:30:37 +0800 Subject: [PATCH 15/16] selftests/net/ipsec: Fix Null pointer dereference in rtattr_pack() Address Null pointer dereference / undefined behavior in rtattr_pack (note that size is 0 in the bad case). Flagged by cppcheck as: tools/testing/selftests/net/ipsec.c:230:25: warning: Possible null pointer dereference: payload [nullPointer] memcpy(RTA_DATA(attr), payload, size); ^ tools/testing/selftests/net/ipsec.c:1618:54: note: Calling function 'rtattr_pack', 4th argument 'NULL' value is 0 if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { ^ tools/testing/selftests/net/ipsec.c:230:25: note: Null pointer dereference memcpy(RTA_DATA(attr), payload, size); ^ Signed-off-by: Liu Ye Link: https://patch.msgid.link/20250116013037.29470-1-liuye@kylinos.cn Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ipsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..9b44a091802c 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } -- 2.51.0 From 454d402481d45af79ee7eea7e64bce02bbbe9766 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 16 Jan 2025 14:34:34 +0900 Subject: [PATCH 16/16] net: dropreason: Gather SOCKET_ drop reasons. The following patch adds a new drop reason starting with the SOCKET_ prefix. Let's gather the existing SOCKET_ reasons. Note that the order is not part of uAPI. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250116053441.5758-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index ed864934e20b..f3714cbea50d 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -6,9 +6,10 @@ #define DEFINE_DROP_REASON(FN, FNe) \ FN(NOT_SPECIFIED) \ FN(NO_SOCKET) \ + FN(SOCKET_FILTER) \ + FN(SOCKET_RCVBUFF) \ FN(PKT_TOO_SMALL) \ FN(TCP_CSUM) \ - FN(SOCKET_FILTER) \ FN(UDP_CSUM) \ FN(NETFILTER_DROP) \ FN(OTHERHOST) \ @@ -18,7 +19,6 @@ FN(UNICAST_IN_L2_MULTICAST) \ FN(XFRM_POLICY) \ FN(IP_NOPROTO) \ - FN(SOCKET_RCVBUFF) \ FN(PROTO_MEM) \ FN(TCP_AUTH_HDR) \ FN(TCP_MD5NOTFOUND) \ @@ -138,12 +138,14 @@ enum skb_drop_reason { * 3) no valid child socket during 3WHS process */ SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ + SKB_DROP_REASON_SOCKET_FILTER, + /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ + SKB_DROP_REASON_SOCKET_RCVBUFF, /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ SKB_DROP_REASON_PKT_TOO_SMALL, /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ SKB_DROP_REASON_TCP_CSUM, - /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ - SKB_DROP_REASON_SOCKET_FILTER, /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */ SKB_DROP_REASON_UDP_CSUM, /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */ @@ -174,8 +176,6 @@ enum skb_drop_reason { SKB_DROP_REASON_XFRM_POLICY, /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */ SKB_DROP_REASON_IP_NOPROTO, - /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /** * @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as * udp packet drop out of udp_memory_allocated. -- 2.51.0