From e2f4ac7bab2205d3c4dd9464e6ffd82502177c51 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 14 Mar 2025 21:11:33 +0100 Subject: [PATCH 01/16] mptcp: sockopt: fix getting freebind & transparent When adding a socket option support in MPTCP, both the get and set parts are supposed to be implemented. IP(V6)_FREEBIND and IP(V6)_TRANSPARENT support for the setsockopt part has been added a while ago, but it looks like the get part got forgotten. It should have been present as a way to verify a setting has been set as expected, and not to act differently from TCP or any other socket types. Everything was in place to expose it, just the last step was missing. Only new code is added to cover these specific getsockopt(), that seems safe. Fixes: c9406a23c116 ("mptcp: sockopt: add SOL_IP freebind & transparent options") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250314-net-mptcp-fix-data-stream-corr-sockopt-v1-3-122dbb249db3@kernel.org Signed-off-by: Paolo Abeni --- net/mptcp/sockopt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 4b99eb796855..3caa0a9d3b38 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1419,6 +1419,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); + case IP_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); + case IP_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); case IP_BIND_ADDRESS_NO_PORT: return mptcp_put_int_option(msk, optval, optlen, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); @@ -1439,6 +1445,12 @@ static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, case IPV6_V6ONLY: return mptcp_put_int_option(msk, optval, optlen, sk->sk_ipv6only); + case IPV6_TRANSPARENT: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(TRANSPARENT, sk)); + case IPV6_FREEBIND: + return mptcp_put_int_option(msk, optval, optlen, + inet_test_bit(FREEBIND, sk)); } return -EOPNOTSUPP; -- 2.51.0 From 8e6f6e92d3fe7032371417d2036b1c6897d2c2e5 Mon Sep 17 00:00:00 2001 From: Yael Chemla Date: Sun, 16 Mar 2025 10:14:33 +0200 Subject: [PATCH 02/16] net/mlx5e: Ensure each counter group uses its PCAM bit The code was incorrectly relying on PCAM bit of ppcnt_statistical_group for accessing per_lane_error_counters. If ppcnt_statistical_group PCAM bit was not set, we would not read per_lane_error_counters, even when its PCAM bit is set. Given the existing device capabilities, it seems to cause no harm, so this change primarily serves as cleanup. Signed-off-by: Yael Chemla Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Link: https://patch.msgid.link/1742112876-2890-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en_stats.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 611ec4b6f370..77d34037b92b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1272,11 +1272,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) ethtool_puts(data, "link_down_events_phy"); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; - - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) @@ -1294,15 +1292,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) data, MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events)); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; - - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - mlx5e_ethtool_put_stat( - data, - MLX5E_READ_CTR64_BE( - &priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_stats_desc, i)); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i)); if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) -- 2.51.0 From da4fa5d8817dafb1a000f67b3fee5993000e8e06 Mon Sep 17 00:00:00 2001 From: Yael Chemla Date: Sun, 16 Mar 2025 10:14:34 +0200 Subject: [PATCH 03/16] net/mlx5e: Access PHY layer counter group as other counter groups Adjust the way physical layer counters group is accessed to match the generic method used for accessing other PPCNT counter groups. Signed-off-by: Yael Chemla Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/1742112876-2890-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en_stats.c | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 77d34037b92b..0cf0c920532f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1227,6 +1227,13 @@ out: mutex_unlock(&priv->state_lock); } +#define PPORT_PHY_LAYER_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_cntrs.c) +static const struct counter_desc pport_phy_layer_cntrs_stats_desc[] = { + { "link_down_events_phy", PPORT_PHY_LAYER_OFF(link_down_events) } +}; + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -1243,6 +1250,8 @@ pport_phy_statistical_err_lanes_stats_desc[] = { { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, }; +#define NUM_PPORT_PHY_LAYER_COUNTERS \ + ARRAY_SIZE(pport_phy_layer_cntrs_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ @@ -1253,8 +1262,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) struct mlx5_core_dev *mdev = priv->mdev; int num_stats; - /* "1" for link_down_events special counter */ - num_stats = 1; + num_stats = NUM_PPORT_PHY_LAYER_COUNTERS; num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; @@ -1270,7 +1278,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) struct mlx5_core_dev *mdev = priv->mdev; int i; - ethtool_puts(data, "link_down_events_phy"); + for (i = 0; i < NUM_PPORT_PHY_LAYER_COUNTERS; i++) + ethtool_puts(data, pport_phy_layer_cntrs_stats_desc[i].format); if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) @@ -1287,10 +1296,12 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) struct mlx5_core_dev *mdev = priv->mdev; int i; - /* link_down_events_phy has special handling since it is not stored in __be64 format */ - mlx5e_ethtool_put_stat( - data, MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, - counter_set.phys_layer_cntrs.link_down_events)); + for (i = 0; i < NUM_PPORT_PHY_LAYER_COUNTERS; i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE(&priv->stats.pport + .phy_counters, + pport_phy_layer_cntrs_stats_desc, i)); if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) -- 2.51.0 From 4c737ceb690cd35a8caf909f2998ef79f15cc6a0 Mon Sep 17 00:00:00 2001 From: Yael Chemla Date: Sun, 16 Mar 2025 10:14:35 +0200 Subject: [PATCH 04/16] net/mlx5e: Get counter group size by FW capability Retrieve the number of fields supported by each PPCNT counter group based on the FW capability for this group. Signed-off-by: Yael Chemla Signed-off-by: Tariq Toukan Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Link: https://patch.msgid.link/1742112876-2890-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/en_stats.c | 58 ++++++++++--------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 0cf0c920532f..a417962acfa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1257,6 +1257,13 @@ pport_phy_statistical_err_lanes_stats_desc[] = { #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, ppcnt_statistical_group) ? \ + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, per_lane_error_counters) ? \ + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0) + static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1264,11 +1271,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) num_stats = NUM_PPORT_PHY_LAYER_COUNTERS; - num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? - NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + num_stats += NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); - num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? - NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + num_stats += NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); return num_stats; } @@ -1281,14 +1286,15 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) for (i = 0; i < NUM_PPORT_PHY_LAYER_COUNTERS; i++) ethtool_puts(data, pport_phy_layer_cntrs_stats_desc[i].format); - if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); i++) + ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); - if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - ethtool_puts(data, - pport_phy_statistical_err_lanes_stats_desc[i].format); + for (i = 0; + i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + i++) + ethtool_puts(data, + pport_phy_statistical_err_lanes_stats_desc[i] + .format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) @@ -1303,23 +1309,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) .phy_counters, pport_phy_layer_cntrs_stats_desc, i)); - if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) - mlx5e_ethtool_put_stat( - data, - MLX5E_READ_CTR64_BE( - &priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_stats_desc, i)); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i)); - if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - mlx5e_ethtool_put_stat( - data, - MLX5E_READ_CTR64_BE( - &priv->stats.pport - .phy_statistical_counters, - pport_phy_statistical_err_lanes_stats_desc, - i)); + for (i = 0; + i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) -- 2.51.0 From c3b999cad7ec39a5487b20e8b6e737d2ab0c5393 Mon Sep 17 00:00:00 2001 From: Yael Chemla Date: Sun, 16 Mar 2025 10:14:36 +0200 Subject: [PATCH 05/16] net/mlx5e: Expose port reset cycle recovery counter via ethtool Display recovery event of PPCNT recovery counters group. Counts (per link) the number of total successful recovery events of any recovery types during port reset cycle. Signed-off-by: Yael Chemla Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Reviewed-by: Jacob Keller Link: https://patch.msgid.link/1742112876-2890-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/counters.rst | 5 +++ .../ethernet/mellanox/mlx5/core/en_stats.c | 44 ++++++++++++++++--- .../ethernet/mellanox/mlx5/core/en_stats.h | 4 ++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst index 99d95be4d159..43d72c8b713b 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst @@ -1082,6 +1082,11 @@ like flow control, FEC and more. need to replace the cable/transceiver. - Error + * - `total_success_recovery_phy` + - The number of total successful recovery events of any type during + ports reset cycle. + - Error + * - `rx_out_of_buffer` - Number of times receive queue had no software buffers allocated for the adapter's incoming traffic. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index a417962acfa9..acb00fd7efa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1250,12 +1250,22 @@ pport_phy_statistical_err_lanes_stats_desc[] = { { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, }; +#define PPORT_PHY_RECOVERY_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, counter_set.phys_layer_recovery_cntrs.c) +static const struct counter_desc +pport_phy_recovery_cntrs_stats_desc[] = { + { "total_success_recovery_phy", + PPORT_PHY_RECOVERY_OFF(total_successful_recovery_events) } +}; + #define NUM_PPORT_PHY_LAYER_COUNTERS \ ARRAY_SIZE(pport_phy_layer_cntrs_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) +#define NUM_PPORT_PHY_RECOVERY_COUNTERS \ + ARRAY_SIZE(pport_phy_recovery_cntrs_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(dev) \ (MLX5_CAP_PCAM_FEATURE(dev, ppcnt_statistical_group) ? \ @@ -1263,6 +1273,9 @@ pport_phy_statistical_err_lanes_stats_desc[] = { #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(dev) \ (MLX5_CAP_PCAM_FEATURE(dev, per_lane_error_counters) ? \ NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0) +#define NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, ppcnt_recovery_counters) ? \ + NUM_PPORT_PHY_RECOVERY_COUNTERS : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) { @@ -1275,6 +1288,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) num_stats += NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + num_stats += NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); return num_stats; } @@ -1295,6 +1309,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) ethtool_puts(data, pport_phy_statistical_err_lanes_stats_desc[i] .format); + + for (i = 0; i < NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); i++) + ethtool_puts(data, + pport_phy_recovery_cntrs_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) @@ -1324,6 +1342,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) MLX5E_READ_CTR64_BE( &priv->stats.pport.phy_statistical_counters, pport_phy_statistical_err_lanes_stats_desc, i)); + + for (i = 0; i < NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE( + &priv->stats.pport.phy_recovery_counters, + pport_phy_recovery_cntrs_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) @@ -1339,12 +1364,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, + MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, + 0); + } - out = pstats->phy_statistical_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_recovery_counters)) { + out = pstats->phy_recovery_counters; + MLX5_SET(ppcnt_reg, in, grp, + MLX5_PHYSICAL_LAYER_RECOVERY_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, + 0); + } } void mlx5e_get_link_ext_stats(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 5961c569cfe0..0d87947e348d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -309,6 +309,9 @@ struct mlx5e_vport_stats { #define PPORT_PHY_STATISTICAL_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PHY_RECOVERY_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_recovery_counters, \ + counter_set.phys_layer_recovery_cntrs.c) #define PPORT_PER_PRIO_GET(pstats, prio, c) \ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -324,6 +327,7 @@ struct mlx5e_pport_stats { __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_recovery_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; -- 2.51.0 From 4b9235a880f13ae3443465192603b298a603b2c5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 16 Mar 2025 11:51:11 +0000 Subject: [PATCH 06/16] net: phy: fix genphy_c45_eee_is_active() for disabled EEE Commit 809265fe96fe ("net: phy: c45: remove local advertisement parameter from genphy_c45_eee_is_active") stopped reading the local advertisement from the PHY earlier in this development cycle, which broke "ethtool --set-eee ethX eee off". When ethtool is used to set EEE off, genphy_c45_eee_is_active() indicates that EEE was active if the link partner reported an advertisement, which causes phylib to set phydev->enable_tx_lpi on link up, despite our local advertisement in hardware being empty. However, phydev->advertising_eee is preserved while EEE is turned off, which leads to genphy_c45_eee_is_active() incorrectly reporting that EEE is active. Fix it by checking phydev->eee_cfg.eee_enabled, and if clear, immediately indicate that EEE is not active. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1ttmWN-0077Mb-Q6@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- drivers/net/phy/phy-c45.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 8a0ffc3174ec..bdd70d424491 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1479,6 +1479,9 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp) __ETHTOOL_DECLARE_LINK_MODE_MASK(common); int ret; + if (!phydev->eee_cfg.eee_enabled) + return 0; + ret = genphy_c45_read_eee_lpa(phydev, tmp_lp); if (ret) return ret; -- 2.51.0 From bfc17c1658353f22843c7c13e27c2d31950f1887 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sun, 16 Mar 2025 12:39:54 +0000 Subject: [PATCH 07/16] net: phy: realtek: disable PHY-mode EEE Realtek RTL8211F has a "PHY-mode" EEE support which interferes with an IEEE 802.3 compliant implementation. This mode defaults to enabled, and results in the MAC receive path not seeing the link transition to LPI state. Fix this by disabling PHY-mode EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1ttnHW-00785s-Uq@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 7a0b19d66aca..893c82479671 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -33,6 +33,9 @@ #define RTL8211F_PHYCR1 0x18 #define RTL8211F_PHYCR2 0x19 +#define RTL8211F_CLKOUT_EN BIT(0) +#define RTL8211F_PHYCR2_PHY_EEE_ENABLE BIT(5) + #define RTL8211F_INSR 0x1d #define RTL8211F_LEDCR 0x10 @@ -55,8 +58,6 @@ #define RTL8211E_TX_DELAY BIT(12) #define RTL8211E_RX_DELAY BIT(11) -#define RTL8211F_CLKOUT_EN BIT(0) - #define RTL8201F_ISR 0x1e #define RTL8201F_ISR_ANERR BIT(15) #define RTL8201F_ISR_DUPLEX BIT(13) @@ -453,6 +454,12 @@ static int rtl8211f_config_init(struct phy_device *phydev) str_enabled_disabled(val_rxdly)); } + /* Disable PHY-mode EEE so LPI is passed to the MAC */ + ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, + RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); + if (ret) + return ret; + if (priv->has_phycr2) { ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, priv->phycr2); -- 2.51.0 From ca1914a32cdcad26c4b003df743fe4f9e4bb2877 Mon Sep 17 00:00:00 2001 From: Ihor Matushchak Date: Sun, 16 Mar 2025 08:15:51 +0100 Subject: [PATCH 08/16] net: phy: phy_interface_t: Fix RGMII_TXID code comment Fix copy-paste error in the code comment for Interface Mode definitions. The code refers to Internal TX delay, not Internal RX delay. It was likely copied from the line above this one. Signed-off-by: Ihor Matushchak Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250316071551.9794-1-ihor.matushchak@foobox.net Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 60d3b8860ea2..bfdbdc538910 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -81,7 +81,7 @@ extern const int phy_basic_ports_array[3]; * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay - * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay + * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal TX delay * @PHY_INTERFACE_MODE_RTBI: Reduced TBI * @PHY_INTERFACE_MODE_SMII: Serial MII * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface -- 2.51.0 From 6bb0dcb3d321c14be7ca33b71a149034d6a2cde8 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 20 Mar 2025 23:42:07 +0100 Subject: [PATCH 09/16] net: openvswitch: fix kernel-doc warnings in internal headers Some field descriptions were missing, some were not very accurate. Not touching the uAPI header or .c files for now. Formatting of those comments isn't great in general, but at least they are not missing anything now. Before: $ ./scripts/kernel-doc -none -Wall net/openvswitch/*.h 2>&1 | wc -l 16 After: $ ./scripts/kernel-doc -none -Wall net/openvswitch/*.h 2>&1 | wc -l 0 Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250320224431.252489-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.h | 20 ++++++++++++++------ net/openvswitch/vport.h | 9 +++++++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 9ca6231ea647..384ca77f4e79 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -29,8 +29,8 @@ * datapath. * @n_hit: Number of received packets for which a matching flow was found in * the flow table. - * @n_miss: Number of received packets that had no matching flow in the flow - * table. The sum of @n_hit and @n_miss is the number of packets that have + * @n_missed: Number of received packets that had no matching flow in the flow + * table. The sum of @n_hit and @n_missed is the number of packets that have * been received by the datapath. * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in @@ -40,6 +40,7 @@ * up per packet. * @n_cache_hit: The number of received packets that had their mask found using * the mask cache. + * @syncp: Synchronization point for 64bit counters. */ struct dp_stats_percpu { u64 n_hit; @@ -74,8 +75,10 @@ struct dp_nlsk_pids { * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. - * @max_headroom: the maximum headroom of all vports in this datapath; it will + * @user_features: Bitmap of enabled %OVS_DP_F_* features. + * @max_headroom: The maximum headroom of all vports in this datapath; it will * be used by all the internal vports in this dp. + * @meter_tbl: Meter table. * @upcall_portids: RCU protected 'struct dp_nlsk_pids'. * * Context: See the comment on locking at the top of datapath.c for additional @@ -128,10 +131,13 @@ struct ovs_skb_cb { #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) /** - * struct dp_upcall - metadata to include with a packet to send to userspace + * struct dp_upcall_info - metadata to include with a packet sent to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. + * @actions: If nonnull, its variable-length value is passed to userspace as + * %OVS_PACKET_ATTR_ACTIONS. + * @actions_len: The length of the @actions. * @portid: Netlink portid to which packet should be sent. If @portid is 0 * then no packet is sent and the packet is accounted in the datapath's @n_lost * counter. @@ -152,6 +158,10 @@ struct dp_upcall_info { * struct ovs_net - Per net-namespace data for ovs. * @dps: List of datapaths to enable dumping them all out. * Protected by genl_mutex. + * @dp_notify_work: A work notifier to handle port unregistering. + * @masks_rebalance: A work to periodically optimize flow table caches. + * @ct_limit_info: A hash table of conntrack zone connection limits. + * @xt_label: Whether connlables are configured for the network or not. */ struct ovs_net { struct list_head dps; @@ -160,8 +170,6 @@ struct ovs_net { #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) struct ovs_ct_limit_info *ct_limit_info; #endif - - /* Module reference for configuring conntrack. */ bool xt_label; }; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3e71ca8ad8a7..9f67b9dd49f9 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -97,6 +97,8 @@ struct vport { * @desired_ifindex: New vport's ifindex. * @dp: New vport's datapath. * @port_no: New vport's port number. + * @upcall_portids: %OVS_VPORT_ATTR_UPCALL_PID attribute from Netlink message, + * %NULL if none was supplied. */ struct vport_parms { const char *name; @@ -125,6 +127,8 @@ struct vport_parms { * have any configuration. * @send: Send a packet on the device. * zero for dropped packets or negative for error. + * @owner: Module that implements this vport type. + * @list: List entry in the global list of vport types. */ struct vport_ops { enum ovs_vport_type type; @@ -144,6 +148,7 @@ struct vport_ops { /** * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for * a given vport. + * @syncp: Synchronization point for 64bit counters. * @n_success: Number of packets that upcall to userspace succeed. * @n_fail: Number of packets that upcall to userspace failed. */ @@ -164,6 +169,8 @@ void ovs_vport_free(struct vport *); * * @vport: vport to access * + * Returns: A void pointer to a private data allocated in the @vport. + * * If a nonzero size was passed in priv_size of vport_alloc() a private data * area was allocated on creation. This allows that area to be accessed and * used for any purpose needed by the vport implementer. @@ -178,6 +185,8 @@ static inline void *vport_priv(const struct vport *vport) * * @priv: Start of private data area. * + * Returns: A reference to a vport structure that contains @priv. + * * It is sometimes useful to translate from a pointer to the private data * area to the vport, such as in the case where the private data pointer is * the result of a hash table lookup. @priv must point to the start of the -- 2.51.0 From d39e08b0893b579ba1fdee5713d011a4585517f7 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 17 Mar 2025 09:11:47 +0200 Subject: [PATCH 10/16] net: ena: resolve WARN_ON when freeing IRQs When IRQs are freed, a WARN_ON is triggered as the affinity notifier is not released. This results in the below stack trace: [ 484.544586] ? __warn+0x84/0x130 [ 484.544843] ? free_irq+0x5c/0x70 [ 484.545105] ? report_bug+0x18a/0x1a0 [ 484.545390] ? handle_bug+0x53/0x90 [ 484.545664] ? exc_invalid_op+0x14/0x70 [ 484.545959] ? asm_exc_invalid_op+0x16/0x20 [ 484.546279] ? free_irq+0x5c/0x70 [ 484.546545] ? free_irq+0x10/0x70 [ 484.546807] ena_free_io_irq+0x5f/0x70 [ena] [ 484.547138] ena_down+0x250/0x3e0 [ena] [ 484.547435] ena_destroy_device+0x118/0x150 [ena] [ 484.547796] __ena_shutoff+0x5a/0xe0 [ena] [ 484.548110] pci_device_remove+0x3b/0xb0 [ 484.548412] device_release_driver_internal+0x193/0x200 [ 484.548804] driver_detach+0x44/0x90 [ 484.549084] bus_remove_driver+0x69/0xf0 [ 484.549386] pci_unregister_driver+0x2a/0xb0 [ 484.549717] ena_cleanup+0xc/0x130 [ena] [ 484.550021] __do_sys_delete_module.constprop.0+0x176/0x310 [ 484.550438] ? syscall_trace_enter+0xfb/0x1c0 [ 484.550782] do_syscall_64+0x5b/0x170 [ 484.551067] entry_SYSCALL_64_after_hwframe+0x76/0x7e Adding a call to `netif_napi_set_irq` with -1 as the IRQ index, which frees the notifier. Fixes: de340d8206bf ("net: ena: use napi's aRFS rmap notifers") Signed-off-by: David Arinzon Reviewed-by: Simon Horman Reviewed-by: Ahmed Zaki Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250317071147.1105-1-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 6aab85a7c60a..70fa3adb4934 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1716,8 +1716,12 @@ static void ena_free_io_irq(struct ena_adapter *adapter) int i; for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { + struct ena_napi *ena_napi; + irq = &adapter->irq_tbl[i]; irq_set_affinity_hint(irq->vector, NULL); + ena_napi = irq->data; + netif_napi_set_irq(&ena_napi->napi, -1); free_irq(irq->vector, irq->data); } } -- 2.51.0 From 8fa649fd7d3009769c7289d0c31c319b72bc42c4 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Mon, 17 Mar 2025 08:48:34 +0100 Subject: [PATCH 11/16] net: phy: dp83822: fix transmit amplitude if CONFIG_OF_MDIO not defined When CONFIG_OF_MDIO is not defined the index for selecting the transmit amplitude voltage for 100BASE-TX is set to 0, but it should be -1, if there is no need to modify the transmit amplitude voltage. Move initialization of the index from dp83822_of_init to dp8382x_probe. Fixes: 4f3735e82d8a ("net: phy: dp83822: Add support for changing the transmit amplitude voltage") Reviewed-by: Gerhard Engleder Signed-off-by: Dimitri Fedrau Link: https://patch.msgid.link/20250317-dp83822-fix-transceiver-mdio-v2-1-fb09454099a4@liebherr.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 3662f3905d5a..14f361549638 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -833,7 +833,6 @@ static int dp83822_of_init(struct phy_device *phydev) dp83822->set_gpio2_clk_out = true; } - dp83822->tx_amplitude_100base_tx_index = -1; ret = phy_get_tx_amplitude_gain(phydev, dev, ETHTOOL_LINK_MODE_100baseT_Full_BIT, &val); @@ -931,6 +930,7 @@ static int dp8382x_probe(struct phy_device *phydev) if (!dp83822) return -ENOMEM; + dp83822->tx_amplitude_100base_tx_index = -1; phydev->priv = dp83822; return 0; -- 2.51.0 From 1937a0be28c01a13e18912602b8eff08d7db77cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Mar 2025 08:53:13 +0000 Subject: [PATCH 12/16] tcp: move icsk_clean_acked to a better location As a followup of my presentation in Zagreb for netdev 0x19: icsk_clean_acked is only used by TCP when/if CONFIG_TLS_DEVICE is enabled from tcp_ack(). Rename it to tcp_clean_acked, move it to tcp_sock structure in the tcp_sock_read_rx for better cache locality in TCP fast path. Define this field only when CONFIG_TLS_DEVICE is enabled saving 8 bytes on configs not using it. Signed-off-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Sabrina Dubroca Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250317085313.2023214-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/tcp_sock.rst | 1 + include/linux/tcp.h | 3 +++ include/net/inet_connection_sock.h | 2 -- include/net/tcp.h | 4 ++-- net/ipv4/tcp.c | 5 +++++ net/ipv4/tcp_input.c | 12 ++++++------ net/tls/tls_device.c | 8 ++++---- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 1f79765072b1..bc9b2131bf7a 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -27,6 +27,7 @@ u32 dsack_dups u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx) u32 snd_sml read_write tcp_minshall_check,tcp_minshall_update u32 rcv_tstamp read_mostly tcp_ack +void * tcp_clean_acked read_mostly tcp_ack u32 lsndtime read_write tcp_slow_start_after_idle_check,tcp_event_data_sent u32 last_oow_ack_time u32 compressed_ack_rcv_nxt diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 159b2c59eb62..1669d95bb0f9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -244,6 +244,9 @@ struct tcp_sock { struct minmax rtt_min; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; +#if defined(CONFIG_TLS_DEVICE) + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); +#endif u32 snd_ssthresh; /* Slow start size threshold */ u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index f736d3097e43..e8ed52fc603f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -66,7 +66,6 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data - * @icsk_clean_acked Clean acked data hook * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@ -97,7 +96,6 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void __rcu *icsk_ulp_data; - void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:5, icsk_ca_initialized:1, diff --git a/include/net/tcp.h b/include/net/tcp.h index d08fbf90495d..f8efe56bbccb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2815,9 +2815,9 @@ extern struct static_key_false tcp_have_smc; #endif #if IS_ENABLED(CONFIG_TLS_DEVICE) -void clean_acked_data_enable(struct inet_connection_sock *icsk, +void clean_acked_data_enable(struct tcp_sock *tp, void (*cad)(struct sock *sk, u32 ack_seq)); -void clean_acked_data_disable(struct inet_connection_sock *icsk); +void clean_acked_data_disable(struct tcp_sock *tp); void clean_acked_data_flush(void); #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 989c3c3d8e75..fde56d28f586 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5026,7 +5026,12 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); +#if IS_ENABLED(CONFIG_TLS_DEVICE) + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77); +#else CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); +#endif /* TX read-write hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 72382ee4456d..a35018e2d0ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -119,18 +119,18 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE; #if IS_ENABLED(CONFIG_TLS_DEVICE) static DEFINE_STATIC_KEY_DEFERRED_FALSE(clean_acked_data_enabled, HZ); -void clean_acked_data_enable(struct inet_connection_sock *icsk, +void clean_acked_data_enable(struct tcp_sock *tp, void (*cad)(struct sock *sk, u32 ack_seq)) { - icsk->icsk_clean_acked = cad; + tp->tcp_clean_acked = cad; static_branch_deferred_inc(&clean_acked_data_enabled); } EXPORT_SYMBOL_GPL(clean_acked_data_enable); -void clean_acked_data_disable(struct inet_connection_sock *icsk) +void clean_acked_data_disable(struct tcp_sock *tp) { static_branch_slow_dec_deferred(&clean_acked_data_enabled); - icsk->icsk_clean_acked = NULL; + tp->tcp_clean_acked = NULL; } EXPORT_SYMBOL_GPL(clean_acked_data_disable); @@ -3987,8 +3987,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) #if IS_ENABLED(CONFIG_TLS_DEVICE) if (static_branch_unlikely(&clean_acked_data_enabled.key)) - if (icsk->icsk_clean_acked) - icsk->icsk_clean_acked(sk, ack); + if (tp->tcp_clean_acked) + tp->tcp_clean_acked(sk, ack); #endif } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index e50b6e71df13..f672a62a9a52 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -157,7 +157,7 @@ static void delete_all_records(struct tls_offload_context_tx *offload_ctx) offload_ctx->retransmit_hint = NULL; } -static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) +static void tls_tcp_clean_acked(struct sock *sk, u32 acked_seq) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_record_info *info, *temp; @@ -204,7 +204,7 @@ void tls_device_sk_destruct(struct sock *sk) destroy_record(ctx->open_record); delete_all_records(ctx); crypto_free_aead(ctx->aead_send); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); } tls_device_queue_ctx_destruction(tls_ctx); @@ -1126,7 +1126,7 @@ int tls_set_device_offload(struct sock *sk) start_marker_record->num_frags = 0; list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); + clean_acked_data_enable(tcp_sk(sk), &tls_tcp_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; /* TLS offload is greatly simplified if we don't send @@ -1172,7 +1172,7 @@ int tls_set_device_offload(struct sock *sk) release_lock: up_read(&device_offload_lock); - clean_acked_data_disable(inet_csk(sk)); + clean_acked_data_disable(tcp_sk(sk)); crypto_free_aead(offload_ctx->aead_send); free_offload_ctx: kfree(offload_ctx); -- 2.51.0 From fd88253605a49034c39b61e1092b52c9f53254e5 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 17 Mar 2025 10:27:17 +0100 Subject: [PATCH 13/16] MAINTAINERS: adjust the file entry in INTEL PMC CORE DRIVER MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Commit 7e2f7e25f6ff ("arch: x86: add IPC mailbox accessor function and add SoC register access") adds a new file entry referring to the non-existent file linux/platform_data/x86/intel_pmc_ipc.h in section INTEL PMC CORE DRIVER rather than referring to the file include/linux/platform_data/x86/intel_pmc_ipc.h added with this commit. Note that it was missing 'include' in the beginning. Adjust the file reference to the intended file. Signed-off-by: Lukas Bulwahn Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250317092717.322862-1-lukas.bulwahn@redhat.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5959513a7359..c3fce4416723 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11873,7 +11873,7 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-platform-intel-pmc F: drivers/platform/x86/intel/pmc/ -F: linux/platform_data/x86/intel_pmc_ipc.h +F: include/linux/platform_data/x86/intel_pmc_ipc.h INTEL PMIC GPIO DRIVERS M: Andy Shevchenko -- 2.51.0 From 7151062c297cc500d2a3b3723c83ff15f65332b4 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Mon, 17 Mar 2025 10:04:00 +0100 Subject: [PATCH 14/16] net: pktgen: add strict buffer parsing index check Add strict buffer parsing index check to avoid the following Smatch warning: net/core/pktgen.c:877 get_imix_entries() warn: check that incremented offset 'i' is capped Checking the buffer index i after every get_user/i++ step and returning with error code immediately avoids the current indirect (but correct) error handling. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/netdev/36cf3ee2-38b1-47e5-a42a-363efeb0ace3@stanley.mountain/ Signed-off-by: Peter Seiderer Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250317090401.1240704-1-ps.report@gmx.net Signed-off-by: Jakub Kicinski --- net/core/pktgen.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e850598db3e7..fe7fdefab994 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -856,6 +856,9 @@ static ssize_t get_imix_entries(const char __user *buffer, if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) return -E2BIG; + if (i >= maxlen) + return -EINVAL; + max = min(10, maxlen - i); len = num_arg(&buffer[i], max, &size); if (len < 0) @@ -869,6 +872,8 @@ static ssize_t get_imix_entries(const char __user *buffer, if (c != ',') return -EINVAL; i++; + if (i >= maxlen) + return -EINVAL; if (size < 14 + 20 + 8) size = 14 + 20 + 8; @@ -911,6 +916,9 @@ static ssize_t get_labels(const char __user *buffer, if (n >= MAX_MPLS_LABELS) return -E2BIG; + if (i >= maxlen) + return -EINVAL; + max = min(8, maxlen - i); len = hex32_arg(&buffer[i], max, &tmp); if (len < 0) -- 2.51.0 From 3099f9e156b3bd37b3825fc527dd018bb76957f7 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Mon, 17 Mar 2025 10:04:01 +0100 Subject: [PATCH 15/16] selftest: net: update proc_net_pktgen (add more imix_weights test cases) Add more imix_weights test cases (for incomplete input). Signed-off-by: Peter Seiderer Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250317090401.1240704-2-ps.report@gmx.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/proc_net_pktgen.c | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c index 462805ac1614..69444fb29577 100644 --- a/tools/testing/selftests/net/proc_net_pktgen.c +++ b/tools/testing/selftests/net/proc_net_pktgen.c @@ -39,6 +39,10 @@ static const char dev_cmd_pkt_size_0[] = "pkt_size 300"; static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1"; static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20"; static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21"; +static const char dev_cmd_imix_weights_3[] = "imix_weights"; +static const char dev_cmd_imix_weights_4[] = "imix_weights "; +static const char dev_cmd_imix_weights_5[] = "imix_weights 0"; +static const char dev_cmd_imix_weights_6[] = "imix_weights 0,"; static const char dev_cmd_debug_0[] = "debug 1"; static const char dev_cmd_debug_1[] = "debug 0"; static const char dev_cmd_frags_0[] = "frags 100"; @@ -284,6 +288,46 @@ TEST_F(proc_net_pktgen, dev_cmd_imix_weights) { len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2)); EXPECT_EQ(len, -1); EXPECT_EQ(errno, E2BIG); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); } TEST_F(proc_net_pktgen, dev_cmd_debug) { -- 2.51.0 From 66034f78a5583bc10c195647629a137e8ed02208 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Mar 2025 23:01:07 -0700 Subject: [PATCH 16/16] tcp/dccp: Remove inet_connection_sock_af_ops.addr2sockaddr(). inet_connection_sock_af_ops.addr2sockaddr() hasn't been used at all in the git era. $ git grep addr2sockaddr $(git rev-list HEAD | tail -n 1) Let's remove it. Note that there was a 4 bytes hole after sockaddr_len and now it's 6 bytes, so the binary layout is not changed. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250318060112.3729-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/inet6_connection_sock.h | 2 -- include/net/inet_connection_sock.h | 4 ---- net/dccp/ipv4.c | 2 -- net/dccp/ipv6.c | 4 ---- net/ipv4/inet_connection_sock.c | 11 ----------- net/ipv4/tcp_ipv4.c | 2 -- net/ipv6/inet6_connection_sock.c | 14 -------------- net/ipv6/tcp_ipv6.c | 4 ---- 8 files changed, 43 deletions(-) diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index 025bd8d3c769..745891d2e113 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -21,8 +21,6 @@ struct sockaddr; struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto); -void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); - int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e8ed52fc603f..09a9d333fa42 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -44,12 +44,10 @@ struct inet_connection_sock_af_ops { struct request_sock *req_unhash, bool *own_req); u16 net_header_len; - u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); - void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); void (*mtu_reduced)(struct sock *sk); }; @@ -315,8 +313,6 @@ static inline __poll_t inet_csk_listen_poll(const struct sock *sk) int inet_csk_listen_start(struct sock *sk); void inet_csk_listen_stop(struct sock *sk); -void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); - /* update the fast reuse flag when adding a socket */ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index bfa529a54aca..2045ddac0fe9 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -934,8 +934,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), }; static int dccp_v4_init_sock(struct sock *sk) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 39ae9d89d7d4..e24dbffabfc1 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -988,8 +988,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), }; /* @@ -1004,8 +1002,6 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), }; static void dccp_v6_sk_destruct(struct sock *sk) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e93c66034077..dd5cf8914a28 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1553,17 +1553,6 @@ skip_child_forget: } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); -void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) -{ - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; - const struct inet_sock *inet = inet_sk(sk); - - sin->sin_family = AF_INET; - sin->sin_addr.s_addr = inet->inet_daddr; - sin->sin_port = inet->inet_dport; -} -EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); - static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4fa4fbb0ad12..1cd0938d47e0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2477,8 +2477,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), .mtu_reduced = tcp_v4_mtu_reduced, }; EXPORT_IPV6_MOD(ipv4_specific); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 80043e46117c..dbcf556a35bb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -56,20 +56,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, } EXPORT_SYMBOL(inet6_csk_route_req); -void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) -{ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr; - - sin6->sin6_family = AF_INET6; - sin6->sin6_addr = sk->sk_v6_daddr; - sin6->sin6_port = inet_sk(sk)->inet_dport; - /* We do not store received flowlabel for TCP */ - sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, - sk->sk_bound_dev_if); -} -EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); - static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e182ee0a2330..c134cf1a603a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2068,8 +2068,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v6_mtu_reduced, }; @@ -2102,8 +2100,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, - .addr2sockaddr = inet6_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v4_mtu_reduced, }; -- 2.51.0