From 3bd9b9abdf1563a22041b7255baea6d449902f1a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Oct 2024 15:58:47 -0600 Subject: [PATCH 01/16] net: ethtool: Avoid thousands of -Wflex-array-member-not-at-end warnings -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Change the type of the middle struct member currently causing trouble from `struct ethtool_link_settings` to `struct ethtool_link_settings_hdr`. Additionally, update the type of some variables in various functions that don't access the flexible-array member, changing them to the newly created `struct ethtool_link_settings_hdr`. These changes are needed because the type of the conflicting middle members changed. So, those instances that expect the type to be `struct ethtool_link_settings` should be adjusted to the newly created type `struct ethtool_link_settings_hdr`. Also, adjust variable declarations to follow the reverse xmas tree convention. Fix 3338 of the following -Wflex-array-member-not-at-end warnings: include/linux/ethtool.h:214:38: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Link: https://patch.msgid.link/0bc2809fe2a6c11dd4c8a9a10d9bd65cccdb559b.1730238285.git.gustavoars@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 +++--- .../net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 4 ++-- .../ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- .../net/ethernet/qlogic/qede/qede_ethtool.c | 4 ++-- include/linux/ethtool.h | 2 +- net/ethtool/ioctl.c | 2 +- net/ethtool/linkinfo.c | 8 ++++---- net/ethtool/linkmodes.c | 18 +++++++++++------- 9 files changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f71cc8188b4e..e0ebe69110bf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2781,7 +2781,7 @@ u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed) static void bnxt_get_default_speeds(struct ethtool_link_ksettings *lk_ksettings, struct bnxt_link_info *link_info) { - struct ethtool_link_settings *base = &lk_ksettings->base; + struct ethtool_link_settings_hdr *base = &lk_ksettings->base; if (link_info->link_state == BNXT_LINK_STATE_UP) { base->speed = bnxt_fw_to_ethtool_speed(link_info->link_speed); @@ -2800,7 +2800,7 @@ static void bnxt_get_default_speeds(struct ethtool_link_ksettings *lk_ksettings, static int bnxt_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *lk_ksettings) { - struct ethtool_link_settings *base = &lk_ksettings->base; + struct ethtool_link_settings_hdr *base = &lk_ksettings->base; enum ethtool_link_mode_bit_indices link_mode; struct bnxt *bp = netdev_priv(dev); struct bnxt_link_info *link_info; @@ -3023,9 +3023,9 @@ u16 bnxt_get_fw_auto_link_speeds(const unsigned long *mode) static int bnxt_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *lk_ksettings) { + const struct ethtool_link_settings_hdr *base = &lk_ksettings->base; struct bnxt *bp = netdev_priv(dev); struct bnxt_link_info *link_info = &bp->link_info; - const struct ethtool_link_settings *base = &lk_ksettings->base; bool set_pause = false; u32 speed, lanes = 0; int rc = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 7f3f5afa864f..45d28a65347e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -662,8 +662,8 @@ static unsigned int lmm_to_fw_caps(const unsigned long *link_mode_mask) static int get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { + struct ethtool_link_settings_hdr *base = &link_ksettings->base; struct port_info *pi = netdev_priv(dev); - struct ethtool_link_settings *base = &link_ksettings->base; /* For the nonce, the Firmware doesn't send up Port State changes * when the Virtual Interface attached to the Port is down. So @@ -717,9 +717,9 @@ static int get_link_ksettings(struct net_device *dev, static int set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *link_ksettings) { + const struct ethtool_link_settings_hdr *base = &link_ksettings->base; struct port_info *pi = netdev_priv(dev); struct link_config *lc = &pi->link_cfg; - const struct ethtool_link_settings *base = &link_ksettings->base; struct link_config old_lc; unsigned int fw_caps; int ret = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2fbe0f059a0b..61d08547e3f9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1436,8 +1436,8 @@ static void fw_caps_to_lmm(enum fw_port_type port_type, static int cxgb4vf_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { + struct ethtool_link_settings_hdr *base = &link_ksettings->base; struct port_info *pi = netdev_priv(dev); - struct ethtool_link_settings *base = &link_ksettings->base; /* For the nonce, the Firmware doesn't send up Port State changes * when the Virtual Interface attached to the Port is down. So diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index f7986f2b6a17..4fe85780a950 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -129,8 +129,8 @@ static void enic_intr_coal_set_rx(struct enic *enic, u32 timer) static int enic_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ecmd) { + struct ethtool_link_settings_hdr *base = &ecmd->base; struct enic *enic = netdev_priv(netdev); - struct ethtool_link_settings *base = &ecmd->base; ethtool_link_ksettings_add_link_mode(ecmd, supported, 10000baseT_Full); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index e50e1df0a433..c553da16d4b1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -504,7 +504,7 @@ static int qede_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { typeof(cmd->link_modes) *link_modes = &cmd->link_modes; - struct ethtool_link_settings *base = &cmd->base; + struct ethtool_link_settings_hdr *base = &cmd->base; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; @@ -537,7 +537,7 @@ static int qede_get_link_ksettings(struct net_device *dev, static int qede_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { - const struct ethtool_link_settings *base = &cmd->base; + const struct ethtool_link_settings_hdr *base = &cmd->base; const struct ethtool_forced_speed_map *map; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 12f6dc567598..1199e308c8dd 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -211,7 +211,7 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); * fields, but they are allowed to overwrite them (will be ignored). */ struct ethtool_link_ksettings { - struct ethtool_link_settings base; + struct ethtool_link_settings_hdr base; struct { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 5cc131cdb1bc..7da94e26ced6 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -425,7 +425,7 @@ convert_link_ksettings_to_legacy_settings( /* layout of the struct passed from/to userland */ struct ethtool_link_usettings { - struct ethtool_link_settings base; + struct ethtool_link_settings_hdr base; struct { __u32 supported[__ETHTOOL_LINK_MODE_MASK_NU32]; __u32 advertising[__ETHTOOL_LINK_MODE_MASK_NU32]; diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 30b8ce275159..2d5bc57160be 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -8,9 +8,9 @@ struct linkinfo_req_info { }; struct linkinfo_reply_data { - struct ethnl_reply_data base; - struct ethtool_link_ksettings ksettings; - struct ethtool_link_settings *lsettings; + struct ethnl_reply_data base; + struct ethtool_link_ksettings ksettings; + struct ethtool_link_settings_hdr *lsettings; }; #define LINKINFO_REPDATA(__reply_base) \ @@ -98,7 +98,7 @@ static int ethnl_set_linkinfo(struct ethnl_req_info *req_info, struct genl_info *info) { struct ethtool_link_ksettings ksettings = {}; - struct ethtool_link_settings *lsettings; + struct ethtool_link_settings_hdr *lsettings; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 259cd9ef1f2a..17e49cf89f03 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -11,10 +11,10 @@ struct linkmodes_req_info { }; struct linkmodes_reply_data { - struct ethnl_reply_data base; - struct ethtool_link_ksettings ksettings; - struct ethtool_link_settings *lsettings; - bool peer_empty; + struct ethnl_reply_data base; + struct ethtool_link_ksettings ksettings; + struct ethtool_link_settings_hdr *lsettings; + bool peer_empty; }; #define LINKMODES_REPDATA(__reply_base) \ @@ -62,10 +62,12 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base, { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; - const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct ethtool_link_settings_hdr *lsettings; int len, ret; + lsettings = &ksettings->base; + len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */ + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */ @@ -103,10 +105,12 @@ static int linkmodes_fill_reply(struct sk_buff *skb, { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; - const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const struct ethtool_link_settings_hdr *lsettings; int ret; + lsettings = &ksettings->base; + if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_AUTONEG, lsettings->autoneg)) return -EMSGSIZE; @@ -237,7 +241,7 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, bool *mod, const struct net_device *dev) { - struct ethtool_link_settings *lsettings = &ksettings->base; + struct ethtool_link_settings_hdr *lsettings = &ksettings->base; bool req_speed, req_lanes, req_duplex; const struct nlattr *master_slave_cfg, *lanes_cfg; int ret; -- 2.51.0 From 1441df3a37eced275a9c096f766dcab6faee54ee Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 29 Oct 2024 16:46:41 -0700 Subject: [PATCH 02/16] net: phy: use ethtool string helpers These are the preferred way to copy ethtool strings. Avoids incrementing pointers all over the place. Signed-off-by: Rosen Penev Link: https://patch.msgid.link/20241029234641.11448-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/adin.c | 6 ++---- drivers/net/phy/icplus.c | 3 +-- drivers/net/phy/marvell.c | 12 ++++-------- drivers/net/phy/micrel.c | 6 ++---- drivers/net/phy/mscc/mscc_main.c | 3 +-- drivers/net/phy/nxp-c45-tja11xx.c | 6 ++---- drivers/net/phy/nxp-cbtx.c | 2 +- drivers/net/phy/qcom/qca83xx.c | 6 ++---- 8 files changed, 15 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/adin.c b/drivers/net/phy/adin.c index 2e1a46e121d9..a2a862bae2ed 100644 --- a/drivers/net/phy/adin.c +++ b/drivers/net/phy/adin.c @@ -801,10 +801,8 @@ static void adin_get_strings(struct phy_device *phydev, u8 *data) { int i; - for (i = 0; i < ARRAY_SIZE(adin_hw_stats); i++) { - strscpy(&data[i * ETH_GSTRING_LEN], - adin_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < ARRAY_SIZE(adin_hw_stats); i++) + ethtool_puts(&data, adin_hw_stats[i].string); } static int adin_read_mmd_stat_regs(struct phy_device *phydev, diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c index a00a667454a9..ee438b71a0b4 100644 --- a/drivers/net/phy/icplus.c +++ b/drivers/net/phy/icplus.c @@ -540,8 +540,7 @@ static void ip101g_get_strings(struct phy_device *phydev, u8 *data) int i; for (i = 0; i < ARRAY_SIZE(ip101g_hw_stats); i++) - strscpy(data + i * ETH_GSTRING_LEN, - ip101g_hw_stats[i].name, ETH_GSTRING_LEN); + ethtool_puts(&data, ip101g_hw_stats[i].name); } static u64 ip101g_get_stat(struct phy_device *phydev, int i) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 28aec37acd2c..cd50cd6a7f75 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2020,10 +2020,8 @@ static void marvell_get_strings(struct phy_device *phydev, u8 *data) int count = marvell_get_sset_count(phydev); int i; - for (i = 0; i < count; i++) { - strscpy(data + i * ETH_GSTRING_LEN, - marvell_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < count; i++) + ethtool_puts(&data, marvell_hw_stats[i].string); } static void marvell_get_strings_simple(struct phy_device *phydev, u8 *data) @@ -2031,10 +2029,8 @@ static void marvell_get_strings_simple(struct phy_device *phydev, u8 *data) int count = marvell_get_sset_count_simple(phydev); int i; - for (i = 0; i < count; i++) { - strscpy(data + i * ETH_GSTRING_LEN, - marvell_hw_stats_simple[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < count; i++) + ethtool_puts(&data, marvell_hw_stats_simple[i].string); } static u64 marvell_get_stat(struct phy_device *phydev, int i) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 65b0a3115e14..43c82a87bc3a 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2018,10 +2018,8 @@ static void kszphy_get_strings(struct phy_device *phydev, u8 *data) { int i; - for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) { - strscpy(data + i * ETH_GSTRING_LEN, - kszphy_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < ARRAY_SIZE(kszphy_hw_stats); i++) + ethtool_puts(&data, kszphy_hw_stats[i].string); } static u64 kszphy_get_stat(struct phy_device *phydev, int i) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 6f74ce0ab1aa..bee381200ab8 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -139,8 +139,7 @@ static void vsc85xx_get_strings(struct phy_device *phydev, u8 *data) return; for (i = 0; i < priv->nstats; i++) - strscpy(data + i * ETH_GSTRING_LEN, priv->hw_stats[i].string, - ETH_GSTRING_LEN); + ethtool_puts(&data, priv->hw_stats[i].string); } static u64 vsc85xx_get_stat(struct phy_device *phydev, int i) diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 7e328c2a29a4..ade544bc007d 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1140,13 +1140,11 @@ static void nxp_c45_get_strings(struct phy_device *phydev, u8 *data) for (i = 0; i < count; i++) { if (i < ARRAY_SIZE(common_hw_stats)) { - strscpy(data + i * ETH_GSTRING_LEN, - common_hw_stats[i].name, ETH_GSTRING_LEN); + ethtool_puts(&data, common_hw_stats[i].name); continue; } idx = i - ARRAY_SIZE(common_hw_stats); - strscpy(data + i * ETH_GSTRING_LEN, - phy_data->stats[idx].name, ETH_GSTRING_LEN); + ethtool_puts(&data, phy_data->stats[idx].name); } } diff --git a/drivers/net/phy/nxp-cbtx.c b/drivers/net/phy/nxp-cbtx.c index 145703f0a406..3d25491043a3 100644 --- a/drivers/net/phy/nxp-cbtx.c +++ b/drivers/net/phy/nxp-cbtx.c @@ -182,7 +182,7 @@ static int cbtx_get_sset_count(struct phy_device *phydev) static void cbtx_get_strings(struct phy_device *phydev, u8 *data) { - strncpy(data, "100btx_rx_err", ETH_GSTRING_LEN); + ethtool_puts(&data, "100btx_rx_err"); } static void cbtx_get_stats(struct phy_device *phydev, diff --git a/drivers/net/phy/qcom/qca83xx.c b/drivers/net/phy/qcom/qca83xx.c index a05d0df6fa16..7a5039920b9f 100644 --- a/drivers/net/phy/qcom/qca83xx.c +++ b/drivers/net/phy/qcom/qca83xx.c @@ -42,10 +42,8 @@ static void qca83xx_get_strings(struct phy_device *phydev, u8 *data) { int i; - for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++) { - strscpy(data + i * ETH_GSTRING_LEN, - qca83xx_hw_stats[i].string, ETH_GSTRING_LEN); - } + for (i = 0; i < ARRAY_SIZE(qca83xx_hw_stats); i++) + ethtool_puts(&data, qca83xx_hw_stats[i].string); } static u64 qca83xx_get_stat(struct phy_device *phydev, int i) -- 2.51.0 From 9b4b2e02c1e19e6a983cf1b3f082315239d38cb0 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 29 Oct 2024 16:32:29 -0700 Subject: [PATCH 03/16] net: bnxt: use ethtool string helpers Avoids having to use manual pointer manipulation. Signed-off-by: Rosen Penev Reviewed-by: Michael Chan Link: https://patch.msgid.link/20241029233229.9385-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 115 ++++++++---------- 1 file changed, 54 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index e0ebe69110bf..6ef06579df53 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -705,112 +705,105 @@ skip_ring_stats: static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { struct bnxt *bp = netdev_priv(dev); - static const char * const *str; u32 i, j, num_str; + const char *str; switch (stringset) { case ETH_SS_STATS: for (i = 0; i < bp->cp_nr_rings; i++) { - if (is_rx_ring(bp, i)) { - num_str = NUM_RING_RX_HW_STATS; - for (j = 0; j < num_str; j++) { - sprintf(buf, "[%d]: %s", i, - bnxt_ring_rx_stats_str[j]); - buf += ETH_GSTRING_LEN; + if (is_rx_ring(bp, i)) + for (j = 0; j < NUM_RING_RX_HW_STATS; j++) { + str = bnxt_ring_rx_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, + str); } - } - if (is_tx_ring(bp, i)) { - num_str = NUM_RING_TX_HW_STATS; - for (j = 0; j < num_str; j++) { - sprintf(buf, "[%d]: %s", i, - bnxt_ring_tx_stats_str[j]); - buf += ETH_GSTRING_LEN; + if (is_tx_ring(bp, i)) + for (j = 0; j < NUM_RING_TX_HW_STATS; j++) { + str = bnxt_ring_tx_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, + str); } - } num_str = bnxt_get_num_tpa_ring_stats(bp); if (!num_str || !is_rx_ring(bp, i)) goto skip_tpa_stats; if (bp->max_tpa_v2) - str = bnxt_ring_tpa2_stats_str; + for (j = 0; j < num_str; j++) { + str = bnxt_ring_tpa2_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, + str); + } else - str = bnxt_ring_tpa_stats_str; - - for (j = 0; j < num_str; j++) { - sprintf(buf, "[%d]: %s", i, str[j]); - buf += ETH_GSTRING_LEN; - } -skip_tpa_stats: - if (is_rx_ring(bp, i)) { - num_str = NUM_RING_RX_SW_STATS; for (j = 0; j < num_str; j++) { - sprintf(buf, "[%d]: %s", i, - bnxt_rx_sw_stats_str[j]); - buf += ETH_GSTRING_LEN; + str = bnxt_ring_tpa_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, + str); } +skip_tpa_stats: + if (is_rx_ring(bp, i)) + for (j = 0; j < NUM_RING_RX_SW_STATS; j++) { + str = bnxt_rx_sw_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, + str); + } + for (j = 0; j < NUM_RING_CMN_SW_STATS; j++) { + str = bnxt_cmn_sw_stats_str[j]; + ethtool_sprintf(&buf, "[%d]: %s", i, str); } - num_str = NUM_RING_CMN_SW_STATS; - for (j = 0; j < num_str; j++) { - sprintf(buf, "[%d]: %s", i, - bnxt_cmn_sw_stats_str[j]); - buf += ETH_GSTRING_LEN; - } - } - for (i = 0; i < BNXT_NUM_RING_ERR_STATS; i++) { - strscpy(buf, bnxt_ring_err_stats_arr[i], ETH_GSTRING_LEN); - buf += ETH_GSTRING_LEN; } + for (i = 0; i < BNXT_NUM_RING_ERR_STATS; i++) + ethtool_puts(&buf, bnxt_ring_err_stats_arr[i]); - if (bp->flags & BNXT_FLAG_PORT_STATS) { + if (bp->flags & BNXT_FLAG_PORT_STATS) for (i = 0; i < BNXT_NUM_PORT_STATS; i++) { - strcpy(buf, bnxt_port_stats_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_port_stats_arr[i].string; + ethtool_puts(&buf, str); } - } + if (bp->flags & BNXT_FLAG_PORT_STATS_EXT) { u32 len; len = min_t(u32, bp->fw_rx_stats_ext_size, ARRAY_SIZE(bnxt_port_stats_ext_arr)); for (i = 0; i < len; i++) { - strcpy(buf, bnxt_port_stats_ext_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_port_stats_ext_arr[i].string; + ethtool_puts(&buf, str); } + len = min_t(u32, bp->fw_tx_stats_ext_size, ARRAY_SIZE(bnxt_tx_port_stats_ext_arr)); for (i = 0; i < len; i++) { - strcpy(buf, - bnxt_tx_port_stats_ext_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_tx_port_stats_ext_arr[i].string; + ethtool_puts(&buf, str); } + if (bp->pri2cos_valid) { for (i = 0; i < 8; i++) { - strcpy(buf, - bnxt_rx_bytes_pri_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_rx_bytes_pri_arr[i].string; + ethtool_puts(&buf, str); } + for (i = 0; i < 8; i++) { - strcpy(buf, - bnxt_rx_pkts_pri_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_rx_pkts_pri_arr[i].string; + ethtool_puts(&buf, str); } + for (i = 0; i < 8; i++) { - strcpy(buf, - bnxt_tx_bytes_pri_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_tx_bytes_pri_arr[i].string; + ethtool_puts(&buf, str); } + for (i = 0; i < 8; i++) { - strcpy(buf, - bnxt_tx_pkts_pri_arr[i].string); - buf += ETH_GSTRING_LEN; + str = bnxt_tx_pkts_pri_arr[i].string; + ethtool_puts(&buf, str); } } } break; case ETH_SS_TEST: if (bp->num_tests) - memcpy(buf, bp->test_info->string, - bp->num_tests * ETH_GSTRING_LEN); + for (i = 0; i < bp->num_tests; i++) + ethtool_puts(&buf, bp->test_info->string[i]); break; default: netdev_err(bp->dev, "bnxt_get_strings invalid request %x\n", -- 2.51.0 From 3affa310de523d63e52ea8e2efb3c476df29e414 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 29 Oct 2024 13:17:09 +0100 Subject: [PATCH 04/16] net: airoha: Read completion queue data in airoha_qdma_tx_napi_poll() In order to avoid any possible race, read completion queue head and pending entry in airoha_qdma_tx_napi_poll routine instead of doing it in airoha_irq_handler. Remove unused airoha_tx_irq_queue unused fields. This is a preliminary patch to add Qdisc offload for airoha_eth driver. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20241029-airoha-en7581-tx-napi-work-v1-1-96ad1686b946@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/airoha_eth.c | 31 +++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index f463a505f5ba..6cd8901ed38f 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -752,11 +752,9 @@ struct airoha_tx_irq_queue { struct airoha_qdma *qdma; struct napi_struct napi; - u32 *q; int size; - int queued; - u16 head; + u32 *q; }; struct airoha_hw_stats { @@ -1656,25 +1654,31 @@ static int airoha_qdma_init_rx(struct airoha_qdma *qdma) static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) { struct airoha_tx_irq_queue *irq_q; + int id, done = 0, irq_queued; struct airoha_qdma *qdma; struct airoha_eth *eth; - int id, done = 0; + u32 status, head; irq_q = container_of(napi, struct airoha_tx_irq_queue, napi); qdma = irq_q->qdma; id = irq_q - &qdma->q_tx_irq[0]; eth = qdma->eth; - while (irq_q->queued > 0 && done < budget) { - u32 qid, last, val = irq_q->q[irq_q->head]; + status = airoha_qdma_rr(qdma, REG_IRQ_STATUS(id)); + head = FIELD_GET(IRQ_HEAD_IDX_MASK, status); + head = head % irq_q->size; + irq_queued = FIELD_GET(IRQ_ENTRY_LEN_MASK, status); + + while (irq_queued > 0 && done < budget) { + u32 qid, last, val = irq_q->q[head]; struct airoha_queue *q; if (val == 0xff) break; - irq_q->q[irq_q->head] = 0xff; /* mark as done */ - irq_q->head = (irq_q->head + 1) % irq_q->size; - irq_q->queued--; + irq_q->q[head] = 0xff; /* mark as done */ + head = (head + 1) % irq_q->size; + irq_queued--; done++; last = FIELD_GET(IRQ_DESC_IDX_MASK, val); @@ -2026,20 +2030,11 @@ static irqreturn_t airoha_irq_handler(int irq, void *dev_instance) if (intr[0] & INT_TX_MASK) { for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) { - struct airoha_tx_irq_queue *irq_q = &qdma->q_tx_irq[i]; - u32 status, head; - if (!(intr[0] & TX_DONE_INT_MASK(i))) continue; airoha_qdma_irq_disable(qdma, QDMA_INT_REG_IDX0, TX_DONE_INT_MASK(i)); - - status = airoha_qdma_rr(qdma, REG_IRQ_STATUS(i)); - head = FIELD_GET(IRQ_HEAD_IDX_MASK, status); - irq_q->head = head % irq_q->size; - irq_q->queued = FIELD_GET(IRQ_ENTRY_LEN_MASK, status); - napi_schedule(&qdma->q_tx_irq[i].napi); } } -- 2.51.0 From 0c729f53b8c33b9e5eadc2d5e673759e3510501e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 29 Oct 2024 13:17:10 +0100 Subject: [PATCH 05/16] net: airoha: Simplify Tx napi logic Simplify Tx napi logic relying just on the packet index provided by completion queue indicating the completed packet that can be removed from the Tx DMA ring. This is a preliminary patch to add Qdisc offload for airoha_eth driver. Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20241029-airoha-en7581-tx-napi-work-v1-2-96ad1686b946@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/airoha_eth.c | 73 ++++++++++++---------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index 6cd8901ed38f..6c683a12d5aa 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -1670,8 +1670,12 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) irq_queued = FIELD_GET(IRQ_ENTRY_LEN_MASK, status); while (irq_queued > 0 && done < budget) { - u32 qid, last, val = irq_q->q[head]; + u32 qid, val = irq_q->q[head]; + struct airoha_qdma_desc *desc; + struct airoha_queue_entry *e; struct airoha_queue *q; + u32 index, desc_ctrl; + struct sk_buff *skb; if (val == 0xff) break; @@ -1681,9 +1685,7 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) irq_queued--; done++; - last = FIELD_GET(IRQ_DESC_IDX_MASK, val); qid = FIELD_GET(IRQ_RING_IDX_MASK, val); - if (qid >= ARRAY_SIZE(qdma->q_tx)) continue; @@ -1691,46 +1693,53 @@ static int airoha_qdma_tx_napi_poll(struct napi_struct *napi, int budget) if (!q->ndesc) continue; + index = FIELD_GET(IRQ_DESC_IDX_MASK, val); + if (index >= q->ndesc) + continue; + spin_lock_bh(&q->lock); - while (q->queued > 0) { - struct airoha_qdma_desc *desc = &q->desc[q->tail]; - struct airoha_queue_entry *e = &q->entry[q->tail]; - u32 desc_ctrl = le32_to_cpu(desc->ctrl); - struct sk_buff *skb = e->skb; - u16 index = q->tail; + if (!q->queued) + goto unlock; - if (!(desc_ctrl & QDMA_DESC_DONE_MASK) && - !(desc_ctrl & QDMA_DESC_DROP_MASK)) - break; + desc = &q->desc[index]; + desc_ctrl = le32_to_cpu(desc->ctrl); - q->tail = (q->tail + 1) % q->ndesc; - q->queued--; + if (!(desc_ctrl & QDMA_DESC_DONE_MASK) && + !(desc_ctrl & QDMA_DESC_DROP_MASK)) + goto unlock; - dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, - DMA_TO_DEVICE); + e = &q->entry[index]; + skb = e->skb; - WRITE_ONCE(desc->msg0, 0); - WRITE_ONCE(desc->msg1, 0); + dma_unmap_single(eth->dev, e->dma_addr, e->dma_len, + DMA_TO_DEVICE); + memset(e, 0, sizeof(*e)); + WRITE_ONCE(desc->msg0, 0); + WRITE_ONCE(desc->msg1, 0); + q->queued--; - if (skb) { - u16 queue = skb_get_queue_mapping(skb); - struct netdev_queue *txq; + /* completion ring can report out-of-order indexes if hw QoS + * is enabled and packets with different priority are queued + * to same DMA ring. Take into account possible out-of-order + * reports incrementing DMA ring tail pointer + */ + while (q->tail != q->head && !q->entry[q->tail].dma_addr) + q->tail = (q->tail + 1) % q->ndesc; - txq = netdev_get_tx_queue(skb->dev, queue); - netdev_tx_completed_queue(txq, 1, skb->len); - if (netif_tx_queue_stopped(txq) && - q->ndesc - q->queued >= q->free_thr) - netif_tx_wake_queue(txq); + if (skb) { + u16 queue = skb_get_queue_mapping(skb); + struct netdev_queue *txq; - dev_kfree_skb_any(skb); - e->skb = NULL; - } + txq = netdev_get_tx_queue(skb->dev, queue); + netdev_tx_completed_queue(txq, 1, skb->len); + if (netif_tx_queue_stopped(txq) && + q->ndesc - q->queued >= q->free_thr) + netif_tx_wake_queue(txq); - if (index == last) - break; + dev_kfree_skb_any(skb); } - +unlock: spin_unlock_bh(&q->lock); } -- 2.51.0 From d051cd72dcb769c842494b1dbe29067aba45474f Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 29 Oct 2024 02:00:28 -0700 Subject: [PATCH 06/16] net: netconsole: selftests: Change the IP subnet Use a less populated IP range to run the tests, as suggested by Petr in Link: https://lore.kernel.org/netdev/87ikvukv3s.fsf@nvidia.com/. Suggested-by: Petr Machata Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20241029090030.1793551-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netcons_basic.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 06021b2059b7..8d28e5189e91 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -20,9 +20,9 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") # Simple script to test dynamic targets in netconsole SRCIF="" # to be populated later -SRCIP=192.168.1.1 +SRCIP=192.0.2.1 DSTIF="" # to be populated later -DSTIP=192.168.1.2 +DSTIP=192.0.2.2 PORT="6666" MSG="netconsole selftest" -- 2.51.0 From afa4ceb0fb648655c9f04921ccc801feb034109c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 29 Oct 2024 02:00:29 -0700 Subject: [PATCH 07/16] net: netconsole: selftests: Add userdata validation Extend netcons_basic selftest to verify the userdata functionality by: 1. Creating a test key in the userdata configfs directory 2. Writing a known value to the key 3. Validating the key-value pair appears in the captured network output This ensures the userdata feature is properly tested during selftests. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20241029090030.1793551-3-leitao@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/netcons_basic.sh | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 8d28e5189e91..182eb1a97e59 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -26,10 +26,13 @@ DSTIP=192.0.2.2 PORT="6666" MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" TARGET=$(mktemp -u netcons_XXXXX) DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) NETCONS_CONFIGFS="/sys/kernel/config/netconsole" NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" @@ -122,6 +125,8 @@ function cleanup() { # delete netconsole dynamic reconfiguration echo 0 > "${NETCONS_PATH}"/enabled + # Remove key + rmdir "${KEY_PATH}" # Remove the configfs entry rmdir "${NETCONS_PATH}" @@ -136,6 +141,18 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + function listen_port_and_save_to() { local OUTPUT=${1} # Just wait for 2 seconds @@ -146,6 +163,10 @@ function listen_port_and_save_to() { function validate_result() { local TMPFILENAME="$1" + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -158,6 +179,12 @@ function validate_result() { exit "${ksft_fail}" fi + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" @@ -220,6 +247,8 @@ trap cleanup EXIT set_network # Create a dynamic target for netconsole create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data # Listed for netconsole port inside the namespace and destination interface listen_port_and_save_to "${OUTPUT_FILE}" & # Wait for socat to start and listen to the port. -- 2.51.0 From 6b2d11e2d8fc130df4708be0b6b53fd3e6b54cf6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 30 Oct 2024 04:22:33 +0000 Subject: [PATCH 08/16] net/tcp: Add missing lockdep annotations for TCP-AO hlist traversals Under CONFIG_PROVE_RCU_LIST + CONFIG_RCU_EXPERT hlist_for_each_entry_rcu() provides very helpful splats, which help to find possible issues. I missed CONFIG_RCU_EXPERT=y in my testing config the same as described in a3e4bf7f9675 ("configs/debug: make sure PROVE_RCU_LIST=y takes effect"). The fix itself is trivial: add the very same lockdep annotations as were used to dereference ao_info from the socket. Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20241028152645.35a8be66@kernel.org/ Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20241030-tcp-ao-hlist-lockdep-annotate-v1-1-bf641a64d7c6@gmail.com Signed-off-by: Jakub Kicinski --- include/net/tcp_ao.h | 3 ++- net/ipv4/tcp_ao.c | 42 +++++++++++++++++++++++------------------- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 4 ++-- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 1d46460d0fef..df655ce6987d 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -183,7 +183,8 @@ int tcp_ao_hash_skb(unsigned short int family, const u8 *tkey, int hash_offset, u32 sne); int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); -struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, +struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, + struct tcp_ao_info *ao, int sndid, int rcvid); int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, struct request_sock *req, struct sk_buff *skb, diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index db6516092daf..bbb8d5f0eae7 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -109,12 +109,13 @@ bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. */ -struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, +struct tcp_ao_key *tcp_ao_established_key(const struct sock *sk, + struct tcp_ao_info *ao, int sndid, int rcvid) { struct tcp_ao_key *key; - hlist_for_each_entry_rcu(key, &ao->head, node) { + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { if ((sndid >= 0 && key->sndid != sndid) || (rcvid >= 0 && key->rcvid != rcvid)) continue; @@ -205,7 +206,7 @@ static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index, if (!ao) return NULL; - hlist_for_each_entry_rcu(key, &ao->head, node) { + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) { u8 prefixlen = min(prefix, key->prefixlen); if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen, @@ -793,7 +794,7 @@ int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, if (!ao_info) return -ENOENT; - *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + *key = tcp_ao_established_key(sk, ao_info, aoh->rnext_keyid, -1); if (!*key) return -ENOENT; *traffic_key = snd_other_key(*key); @@ -979,7 +980,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, */ key = READ_ONCE(info->rnext_key); if (key->rcvid != aoh->keyid) { - key = tcp_ao_established_key(info, -1, aoh->keyid); + key = tcp_ao_established_key(sk, info, -1, aoh->keyid); if (!key) goto key_not_found; } @@ -1003,7 +1004,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, aoh->rnext_keyid, tcp_ao_hdr_maclen(aoh)); /* If the key is not found we do nothing. */ - key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); + key = tcp_ao_established_key(sk, info, aoh->rnext_keyid, -1); if (key) /* pairs with tcp_ao_del_cmd */ WRITE_ONCE(info->current_key, key); @@ -1163,7 +1164,7 @@ void tcp_ao_established(struct sock *sk) if (!ao) return; - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); } @@ -1180,7 +1181,7 @@ void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); ao->rcv_sne = 0; - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); } @@ -1256,14 +1257,14 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); - key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1); + key = tcp_ao_established_key(req_to_sk(req), new_ao, tcp_rsk(req)->ao_keyid, -1); if (key) new_ao->current_key = key; else new_ao->current_key = first_key; /* set rnext_key */ - key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next); + key = tcp_ao_established_key(req_to_sk(req), new_ao, -1, tcp_rsk(req)->ao_rcv_next); if (key) new_ao->rnext_key = key; else @@ -1857,12 +1858,12 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, * if there's any. */ if (cmd.set_current) { - new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1); if (!new_current) return -ENOENT; } if (cmd.set_rnext) { - new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext); if (!new_rnext) return -ENOENT; } @@ -1902,7 +1903,8 @@ static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, * "It is presumed that an MKT affecting a particular * connection cannot be destroyed during an active connection" */ - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_rcu(key, &ao_info->head, node, + lockdep_sock_is_held(sk)) { if (cmd.sndid != key->sndid || cmd.rcvid != key->rcvid) continue; @@ -2000,14 +2002,14 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, * if there's any. */ if (cmd.set_current) { - new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + new_current = tcp_ao_established_key(sk, ao_info, cmd.current_key, -1); if (!new_current) { err = -ENOENT; goto out; } } if (cmd.set_rnext) { - new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + new_rnext = tcp_ao_established_key(sk, ao_info, -1, cmd.rnext); if (!new_rnext) { err = -ENOENT; goto out; @@ -2101,7 +2103,8 @@ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) * The layout of the fields in the user and kernel structures is expected to * be the same (including in the 32bit vs 64bit case). */ -static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, +static int tcp_ao_copy_mkts_to_user(const struct sock *sk, + struct tcp_ao_info *ao_info, sockptr_t optval, sockptr_t optlen) { struct tcp_ao_getsockopt opt_in, opt_out; @@ -2229,7 +2232,8 @@ static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, /* May change in RX, while we're dumping, pre-fetch it */ current_key = READ_ONCE(ao_info->current_key); - hlist_for_each_entry_rcu(key, &ao_info->head, node) { + hlist_for_each_entry_rcu(key, &ao_info->head, node, + lockdep_sock_is_held(sk)) { if (opt_in.get_all) goto match; @@ -2309,7 +2313,7 @@ int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) if (!ao_info) return -ENOENT; - return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen); + return tcp_ao_copy_mkts_to_user(sk, ao_info, optval, optlen); } int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) @@ -2396,7 +2400,7 @@ int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen) WRITE_ONCE(ao->snd_sne, cmd.snd_sne); WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne); - hlist_for_each_entry_rcu(key, &ao->head, node) + hlist_for_each_entry_rcu(key, &ao->head, node, lockdep_sock_is_held(sk)) tcp_ao_cache_traffic_keys(sk, ao, key); return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9d3dd101ea71..a38c8b1f44db 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1053,7 +1053,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) } if (aoh) - key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + key.ao_key = tcp_ao_established_key(sk, ao_info, + aoh->rnext_keyid, -1); } } if (key.ao_key) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 597920061a3a..c748eeae1453 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1172,8 +1172,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto out; if (aoh) - key.ao_key = tcp_ao_established_key(ao_info, - aoh->rnext_keyid, -1); + key.ao_key = tcp_ao_established_key(sk, ao_info, + aoh->rnext_keyid, -1); } } if (key.ao_key) { -- 2.51.0 From a865276872ec4f129f8a582634be82dcc275dc2a Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 30 Oct 2024 18:23:25 -0600 Subject: [PATCH 09/16] dim: make dim_calc_stats() inputs const pointers Make the start and end arguments to dim_calc_stats() const pointers to clarify that the function does not modify their values. Signed-off-by: Caleb Sander Mateos Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Arthur Kiyanovski Link: https://patch.msgid.link/20241031002326.3426181-1-csander@purestorage.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 3 ++- lib/dim/dim.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/dim.h b/include/linux/dim.h index 1b581ff25a15..84579a50ae7f 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -351,7 +351,8 @@ void dim_park_tired(struct dim *dim); * Takes into consideration counter wrap-around. * Returned boolean indicates whether curr_stats are reliable. */ -bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(const struct dim_sample *start, + const struct dim_sample *end, struct dim_stats *curr_stats); /** diff --git a/lib/dim/dim.c b/lib/dim/dim.c index 83b65ac74d73..97c3d084ebf0 100644 --- a/lib/dim/dim.c +++ b/lib/dim/dim.c @@ -54,7 +54,8 @@ void dim_park_tired(struct dim *dim) } EXPORT_SYMBOL(dim_park_tired); -bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(const struct dim_sample *start, + const struct dim_sample *end, struct dim_stats *curr_stats) { /* u32 holds up to 71 minutes, should be enough */ -- 2.51.0 From 61bf0009a7657d394d942c8ee961b9ea5f2168fe Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Wed, 30 Oct 2024 18:23:26 -0600 Subject: [PATCH 10/16] dim: pass dim_sample to net_dim() by reference net_dim() is currently passed a struct dim_sample argument by value. struct dim_sample is 24 bytes. Since this is greater 16 bytes, x86-64 passes it on the stack. All callers have already initialized dim_sample on the stack, so passing it by value requires pushing a duplicated copy to the stack. Either witing to the stack and immediately reading it, or perhaps dereferencing addresses relative to the stack pointer in a chain of push instructions, seems to perform quite poorly. In a heavy TCP workload, mlx5e_handle_rx_dim() consumes 3% of CPU time, 94% of which is attributed to the first push instruction to copy dim_sample on the stack for the call to net_dim(): // Call ktime_get() 0.26 |4ead2: call 4ead7 // Pass the address of struct dim in %rdi |4ead7: lea 0x3d0(%rbx),%rdi // Set dim_sample.pkt_ctr |4eade: mov %r13d,0x8(%rsp) // Set dim_sample.byte_ctr |4eae3: mov %r12d,0xc(%rsp) // Set dim_sample.event_ctr 0.15 |4eae8: mov %bp,0x10(%rsp) // Duplicate dim_sample on the stack 94.16 |4eaed: push 0x10(%rsp) 2.79 |4eaf1: push 0x10(%rsp) 0.07 |4eaf5: push %rax // Call net_dim() 0.21 |4eaf6: call 4eafb To allow the caller to reuse the struct dim_sample already on the stack, pass the struct dim_sample by reference to net_dim(). Signed-off-by: Caleb Sander Mateos Reviewed-by: Vladimir Oltean Reviewed-by: Shannon Nelson Reviewed-by: Florian Fainelli Reviewed-by: Arthur Kiyanovski Reviewed-by: Louis Peens Link: https://patch.msgid.link/20241031002326.3426181-2-csander@purestorage.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_dim.rst | 2 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +- drivers/net/ethernet/freescale/enetc/enetc.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 4 ++-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 4 ++-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 2 +- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++-- drivers/net/ethernet/netronome/nfp/nfd3/dp.c | 4 ++-- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 4 ++-- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +- drivers/net/virtio_net.c | 2 +- drivers/soc/fsl/dpio/dpio-service.c | 2 +- include/linux/dim.h | 2 +- lib/dim/net_dim.c | 10 +++++----- 19 files changed, 31 insertions(+), 31 deletions(-) diff --git a/Documentation/networking/net_dim.rst b/Documentation/networking/net_dim.rst index 8908fd7b0a8d..4377998e6826 100644 --- a/Documentation/networking/net_dim.rst +++ b/Documentation/networking/net_dim.rst @@ -156,7 +156,7 @@ usage is not complete but it should make the outline of the usage clear. my_entity->bytes, &dim_sample); /* Call net DIM */ - net_dim(&my_entity->dim, dim_sample); + net_dim(&my_entity->dim, &dim_sample); ... } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 96df20854eb9..63c8a2328142 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1383,7 +1383,7 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) rx_ring->rx_stats.bytes, &dim_sample); - net_dim(&ena_napi->dim, dim_sample); + net_dim(&ena_napi->dim, &dim_sample); rx_ring->per_napi_packets = 0; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index caff6e87a488..031e9e0cca53 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1029,7 +1029,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) if (priv->dim.use_dim) { dim_update_sample(priv->dim.event_ctr, priv->dim.packets, priv->dim.bytes, &dim_sample); - net_dim(&priv->dim.dim, dim_sample); + net_dim(&priv->dim.dim, &dim_sample); } return work_done; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6dd6541d8619..ca42b81133d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3102,7 +3102,7 @@ static int bnxt_poll(struct napi_struct *napi, int budget) cpr->rx_packets, cpr->rx_bytes, &dim_sample); - net_dim(&cpr->dim, dim_sample); + net_dim(&cpr->dim, &dim_sample); } return work_done; } @@ -3233,7 +3233,7 @@ poll_done: cpr_rx->rx_packets, cpr_rx->rx_bytes, &dim_sample); - net_dim(&cpr->dim, dim_sample); + net_dim(&cpr->dim, &dim_sample); } return work_done; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 10966ab15373..53a949eb9180 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2405,7 +2405,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) if (ring->dim.use_dim) { dim_update_sample(ring->dim.event_ctr, ring->dim.packets, ring->dim.bytes, &dim_sample); - net_dim(&ring->dim.dim, dim_sample); + net_dim(&ring->dim.dim, &dim_sample); } return work_done; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index c09370eab319..05dedea6185a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -718,7 +718,7 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v) v->rx_ring.stats.packets, v->rx_ring.stats.bytes, &dim_sample); - net_dim(&v->rx_dim, dim_sample); + net_dim(&v->rx_dim, &dim_sample); } static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b09f0cca34dc..fbfd3ee5648f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4478,7 +4478,7 @@ static void hns3_update_rx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) dim_update_sample(tqp_vector->event_cnt, rx_group->total_packets, rx_group->total_bytes, &sample); - net_dim(&rx_group->dim, sample); + net_dim(&rx_group->dim, &sample); } static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) @@ -4491,7 +4491,7 @@ static void hns3_update_tx_int_coalesce(struct hns3_enet_tqp_vector *tqp_vector) dim_update_sample(tqp_vector->event_cnt, tx_group->total_packets, tx_group->total_bytes, &sample); - net_dim(&tx_group->dim, sample); + net_dim(&tx_group->dim, &sample); } static int hns3_nic_common_poll(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8208055d6e7f..5d2d7736fd5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1352,14 +1352,14 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct dim_sample dim_sample; __ice_update_sample(q_vector, tx, &dim_sample, true); - net_dim(&tx->dim, dim_sample); + net_dim(&tx->dim, &dim_sample); } if (ITR_IS_DYNAMIC(rx)) { struct dim_sample dim_sample; __ice_update_sample(q_vector, rx, &dim_sample, false); - net_dim(&rx->dim, dim_sample); + net_dim(&rx->dim, &dim_sample); } } diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index d4e6f0e10487..da2a5becf62f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3679,7 +3679,7 @@ static void idpf_net_dim(struct idpf_q_vector *q_vector) idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim, packets, bytes); - net_dim(&q_vector->tx_dim, dim_sample); + net_dim(&q_vector->tx_dim, &dim_sample); check_rx_itr: if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode)) @@ -3698,7 +3698,7 @@ check_rx_itr: idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim, packets, bytes); - net_dim(&q_vector->rx_dim, dim_sample); + net_dim(&q_vector->rx_dim, &dim_sample); } /** diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 933e18ba2fb2..7aaf32e9aa95 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -527,7 +527,7 @@ static void otx2_adjust_adaptive_coalese(struct otx2_nic *pfvf, struct otx2_cq_p rx_frames + tx_frames, rx_bytes + tx_bytes, &dim_sample); - net_dim(&cq_poll->dim, dim_sample); + net_dim(&cq_poll->dim, &dim_sample); } int otx2_napi_handler(struct napi_struct *napi, int budget) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f01ceee5f02d..53485142938c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2227,7 +2227,7 @@ rx_done: eth->rx_bytes += bytes; dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes, &dim_sample); - net_dim(ð->rx_dim, dim_sample); + net_dim(ð->rx_dim, &dim_sample); if (xdp_flush) xdp_do_flush(); @@ -2377,7 +2377,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes, &dim_sample); - net_dim(ð->tx_dim, dim_sample); + net_dim(ð->tx_dim, &dim_sample); if (mtk_queue_stopped(eth) && (atomic_read(&ring->free_count) > ring->thresh)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 5873fde65c2e..417098f0b2bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -55,7 +55,7 @@ static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) return; dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); - net_dim(sq->dim, dim_sample); + net_dim(sq->dim, &dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) @@ -67,7 +67,7 @@ static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) return; dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); - net_dim(rq->dim, dim_sample); + net_dim(rq->dim, &dim_sample); } void mlx5e_trigger_irq(struct mlx5e_icosq *sq) diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index d215efc6cad0..f1c6c47564b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -1179,7 +1179,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->rx_dim, dim_sample); + net_dim(&r_vec->rx_dim, &dim_sample); } if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { @@ -1194,7 +1194,7 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->tx_dim, dim_sample); + net_dim(&r_vec->tx_dim, &dim_sample); } return pkts_polled; diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index dae5af7d1845..ebeb6ab4465c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -1289,7 +1289,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->rx_dim, dim_sample); + net_dim(&r_vec->rx_dim, &dim_sample); } if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { @@ -1304,7 +1304,7 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->tx_dim, dim_sample); + net_dim(&r_vec->tx_dim, &dim_sample); } return pkts_polled; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 0eeda7e502db..2ac59564ded1 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -928,7 +928,7 @@ static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode) dim_update_sample(qcq->cq.bound_intr->rearm_count, pkts, bytes, &dim_sample); - net_dim(&qcq->dim, dim_sample); + net_dim(&qcq->dim, &dim_sample); } int ionic_tx_napi(struct napi_struct *napi, int budget) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 792e9eadbfc3..869586c17ffd 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2804,7 +2804,7 @@ static void virtnet_rx_dim_update(struct virtnet_info *vi, struct receive_queue u64_stats_read(&rq->stats.bytes), &cur_sample); - net_dim(&rq->dim, cur_sample); + net_dim(&rq->dim, &cur_sample); rq->packets_in_napi = 0; } diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index b811446e0fa5..0b60ed16297c 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -891,7 +891,7 @@ void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes) d->frames += frames; dim_update_sample(d->event_ctr, d->frames, d->bytes, &dim_sample); - net_dim(&d->rx_dim, dim_sample); + net_dim(&d->rx_dim, &dim_sample); spin_unlock(&d->dim_lock); } diff --git a/include/linux/dim.h b/include/linux/dim.h index 84579a50ae7f..06543fd40fcc 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -425,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * This is the main logic of the algorithm, where data is processed in order * to decide on next required action. */ -void net_dim(struct dim *dim, struct dim_sample end_sample); +void net_dim(struct dim *dim, const struct dim_sample *end_sample); /* RDMA DIM */ diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index d7e7028e9b19..d6aa09a979b3 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -347,7 +347,7 @@ static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim) return dim->profile_ix != prev_ix; } -void net_dim(struct dim *dim, struct dim_sample end_sample) +void net_dim(struct dim *dim, const struct dim_sample *end_sample) { struct dim_stats curr_stats; u16 nevents; @@ -355,11 +355,11 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) switch (dim->state) { case DIM_MEASURE_IN_PROGRESS: nevents = BIT_GAP(BITS_PER_TYPE(u16), - end_sample.event_ctr, + end_sample->event_ctr, dim->start_sample.event_ctr); if (nevents < DIM_NEVENTS) break; - if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats)) + if (!dim_calc_stats(&dim->start_sample, end_sample, &curr_stats)) break; if (net_dim_decision(&curr_stats, dim)) { dim->state = DIM_APPLY_NEW_PROFILE; @@ -368,8 +368,8 @@ void net_dim(struct dim *dim, struct dim_sample end_sample) } fallthrough; case DIM_START_MEASURE: - dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr, - end_sample.byte_ctr, &dim->start_sample); + dim_update_sample(end_sample->event_ctr, end_sample->pkt_ctr, + end_sample->byte_ctr, &dim->start_sample); dim->state = DIM_MEASURE_IN_PROGRESS; break; case DIM_APPLY_NEW_PROFILE: -- 2.51.0 From 2e570cd187e3b5c8e56627523e0c12e2ffc4745f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 31 Oct 2024 15:28:18 +0100 Subject: [PATCH 11/16] net: dsa: mt7530: Add TBF qdisc offload support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Introduce port_setup_tc callback in mt7530 dsa driver in order to enable dsa ports rate shaping via hw Token Bucket Filter (TBF) for hw switched traffic. Tested-by: Arınç ÜNAL Signed-off-by: Lorenzo Bianconi Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241031-mt7530-tc-offload-v2-1-cb242ad954a0@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 49 ++++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/mt7530.h | 12 ++++++++++ 2 files changed, 61 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d84ee1b419a6..086b8b3d5b40 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "mt7530.h" @@ -3146,6 +3147,53 @@ mt753x_conduit_state_change(struct dsa_switch *ds, mt7530_rmw(priv, MT753X_MFC, MT7530_CPU_EN | MT7530_CPU_PORT_MASK, val); } +static int mt753x_tc_setup_qdisc_tbf(struct dsa_switch *ds, int port, + struct tc_tbf_qopt_offload *qopt) +{ + struct tc_tbf_qopt_offload_replace_params *p = &qopt->replace_params; + struct mt7530_priv *priv = ds->priv; + u32 rate = 0; + + switch (qopt->command) { + case TC_TBF_REPLACE: + rate = div_u64(p->rate.rate_bytes_ps, 1000) << 3; /* kbps */ + fallthrough; + case TC_TBF_DESTROY: { + u32 val, tick; + + mt7530_rmw(priv, MT753X_GERLCR, EGR_BC_MASK, + EGR_BC_CRC_IPG_PREAMBLE); + + /* if rate is greater than 10Mbps tick is 1/32 ms, + * 1ms otherwise + */ + tick = rate > 10000 ? 2 : 7; + val = FIELD_PREP(ERLCR_CIR_MASK, (rate >> 5)) | + FIELD_PREP(ERLCR_EN_MASK, !!rate) | + FIELD_PREP(ERLCR_EXP_MASK, tick) | + ERLCR_TBF_MODE_MASK | + FIELD_PREP(ERLCR_MANT_MASK, 0xf); + mt7530_write(priv, MT753X_ERLCR_P(port), val); + break; + } + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int mt753x_setup_tc(struct dsa_switch *ds, int port, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_TBF: + return mt753x_tc_setup_qdisc_tbf(ds, port, type_data); + default: + return -EOPNOTSUPP; + } +} + static int mt7988_setup(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; @@ -3193,6 +3241,7 @@ const struct dsa_switch_ops mt7530_switch_ops = { .get_mac_eee = mt753x_get_mac_eee, .set_mac_eee = mt753x_set_mac_eee, .conduit_state_change = mt753x_conduit_state_change, + .port_setup_tc = mt753x_setup_tc, }; EXPORT_SYMBOL_GPL(mt7530_switch_ops); diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 6ad33a9f6b1d..448200689f49 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -248,6 +248,18 @@ enum mt7530_vlan_egress_attr { #define AGE_UNIT_MAX 0xfff #define AGE_UNIT(x) (AGE_UNIT_MASK & (x)) +#define MT753X_ERLCR_P(x) (0x1040 + ((x) * 0x100)) +#define ERLCR_CIR_MASK GENMASK(31, 16) +#define ERLCR_EN_MASK BIT(15) +#define ERLCR_EXP_MASK GENMASK(11, 8) +#define ERLCR_TBF_MODE_MASK BIT(7) +#define ERLCR_MANT_MASK GENMASK(6, 0) + +#define MT753X_GERLCR 0x10e0 +#define EGR_BC_MASK GENMASK(7, 0) +#define EGR_BC_CRC 0x4 /* crc */ +#define EGR_BC_CRC_IPG_PREAMBLE 0x18 /* crc + ipg + preamble */ + /* Register for port STP state control */ #define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100)) #define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2)) -- 2.51.0 From 5c87206cdb537f67c51f3f9a229258dce77d9a23 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 31 Oct 2024 13:50:42 +0000 Subject: [PATCH 12/16] ptp: fc3: remove redundant check on variable ret The check on ret has already been performed a few statements earlier and ret has not been re-assigned and so the re-checking is redundant. Clean up the code by removing the redundant check. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20241031135042.3250614-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_fc3.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index e14e149b746e..879b82f03535 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -986,11 +986,6 @@ static int idtfc3_probe(struct platform_device *pdev) mutex_unlock(idtfc3->lock); - if (err) { - ptp_clock_unregister(idtfc3->ptp_clock); - return err; - } - platform_set_drvdata(pdev, idtfc3); return 0; -- 2.51.0 From d847548c7ef44ac01c1c102ed19744c8e26ada9b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 1 Nov 2024 16:13:31 -0500 Subject: [PATCH 13/16] dt-bindings: net: snps,dwmac: Fix "snps,kbbe" type The driver and description indicate "snps,kbbe" is a boolean, not an uint32. Signed-off-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241101211331.24605-2-robh@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 4e2ba1bf788c..f48a0f44cf2d 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -560,7 +560,7 @@ properties: max read outstanding req. limit snps,kbbe: - $ref: /schemas/types.yaml#/definitions/uint32 + $ref: /schemas/types.yaml#/definitions/flag description: do not cross 1KiB boundary. -- 2.51.0 From 8a6631f1cece09047fa44608d21d520ca65ce7d8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 30 Oct 2024 09:52:24 +0100 Subject: [PATCH 14/16] net: macb: avoid redundant lookup for "mdio" child node in MDIO setup Pass the "mdio" child node directly to `macb_mdiobus_register` to avoid performing the node lookup twice. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241030085224.2632426-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ebe886b98891..daa416fb1724 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -915,20 +915,15 @@ static int macb_mii_probe(struct net_device *dev) return 0; } -static int macb_mdiobus_register(struct macb *bp) +static int macb_mdiobus_register(struct macb *bp, struct device_node *mdio_np) { struct device_node *child, *np = bp->pdev->dev.of_node; /* If we have a child named mdio, probe it instead of looking for PHYs * directly under the MAC node */ - child = of_get_child_by_name(np, "mdio"); - if (child) { - int ret = of_mdiobus_register(bp->mii_bus, child); - - of_node_put(child); - return ret; - } + if (mdio_np) + return of_mdiobus_register(bp->mii_bus, mdio_np); /* Only create the PHY from the device tree if at least one PHY is * described. Otherwise scan the entire MDIO bus. We do this to support @@ -950,17 +945,15 @@ static int macb_mdiobus_register(struct macb *bp) static int macb_mii_init(struct macb *bp) { - struct device_node *child, *np = bp->pdev->dev.of_node; + struct device_node *mdio_np, *np = bp->pdev->dev.of_node; int err = -ENXIO; /* With fixed-link, we don't need to register the MDIO bus, * except if we have a child named "mdio" in the device tree. * In that case, some devices may be attached to the MACB's MDIO bus. */ - child = of_get_child_by_name(np, "mdio"); - if (child) - of_node_put(child); - else if (of_phy_is_fixed_link(np)) + mdio_np = of_get_child_by_name(np, "mdio"); + if (!mdio_np && of_phy_is_fixed_link(np)) return macb_mii_probe(bp->dev); /* Enable management port */ @@ -984,7 +977,7 @@ static int macb_mii_init(struct macb *bp) dev_set_drvdata(&bp->dev->dev, bp->mii_bus); - err = macb_mdiobus_register(bp); + err = macb_mdiobus_register(bp, mdio_np); if (err) goto err_out_free_mdiobus; @@ -999,6 +992,8 @@ err_out_unregister_bus: err_out_free_mdiobus: mdiobus_free(bp->mii_bus); err_out: + of_node_put(mdio_np); + return err; } -- 2.51.0 From 0c30d6eedd1ec0c1382bcab9576d26413cd278a3 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 30 Oct 2024 13:43:11 +0100 Subject: [PATCH 15/16] ipvlan: Prepare ipvlan_process_v4_outbound() to future .flowi4_tos conversion. Use ip4h_dscp() to get the DSCP from the IPv4 header, then convert the dscp_t value to __u8 with inet_dscp_to_dsfield(). Then, when we'll convert .flowi4_tos to dscp_t, we'll just have to drop the inet_dscp_to_dsfield() call. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/f48335504a05b3587e0081a9b4511e0761571ca5.1730292157.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b1afcb8740de..fd591ddb3884 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -3,6 +3,7 @@ */ #include +#include #include "ipvlan.h" @@ -422,7 +423,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) int err, ret = NET_XMIT_DROP; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_tos = ip4h->tos & INET_DSCP_MASK, + .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, .daddr = ip4h->daddr, -- 2.51.0 From 937677f481259b5291001ef7c68242d366e23b64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 30 Oct 2024 14:27:19 +0100 Subject: [PATCH 16/16] vrf: Prepare vrf_process_v4_outbound() to future .flowi4_tos conversion. Use ip4h_dscp() to get the DSCP from the IPv4 header, then convert the dscp_t value to __u8 with inet_dscp_to_dsfield(). Then, when we'll convert .flowi4_tos to dscp_t, we'll just have to drop the inet_dscp_to_dsfield() call. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://patch.msgid.link/6be084229008dcfa7a4e2758befccfd2217a331e.1730294788.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4087f72f0d2b..67d25f4f94ef 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -521,7 +521,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; - fl4.flowi4_tos = ip4h->tos & INET_DSCP_MASK; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; -- 2.51.0