From 1725f0eb37d621ce48303ccc14748fb66d618c9e Mon Sep 17 00:00:00 2001 From: David Wu Date: Wed, 19 Mar 2025 21:44:06 +0000 Subject: [PATCH 01/16] net: stmmac: dwmac-rk: Add GMAC support for RK3528 Rockchip RK3528 has two Ethernet controllers based on Synopsys DWC Ethernet QoS IP. Add initial support for the RK3528 GMAC variant. Signed-off-by: David Wu Signed-off-by: Jonas Karlman Link: https://patch.msgid.link/20250319214415.3086027-3-jonas@kwiboo.se Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index c404017bc1da..dfb4668db4ee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -828,6 +828,137 @@ static const struct rk_gmac_ops rk3399_ops = { .set_rmii_speed = rk3399_set_rmii_speed, }; +#define RK3528_VO_GRF_GMAC_CON 0x0018 +#define RK3528_VO_GRF_MACPHY_CON0 0x001c +#define RK3528_VO_GRF_MACPHY_CON1 0x0020 +#define RK3528_VPU_GRF_GMAC_CON5 0x0018 +#define RK3528_VPU_GRF_GMAC_CON6 0x001c + +#define RK3528_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3528_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3528_GMAC_TXCLK_DLY_ENABLE GRF_BIT(14) +#define RK3528_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(14) + +#define RK3528_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0xFF, 8) +#define RK3528_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0xFF, 0) + +#define RK3528_GMAC0_PHY_INTF_SEL_RMII GRF_BIT(1) +#define RK3528_GMAC1_PHY_INTF_SEL_RGMII GRF_CLR_BIT(8) +#define RK3528_GMAC1_PHY_INTF_SEL_RMII GRF_BIT(8) + +#define RK3528_GMAC1_CLK_SELECT_CRU GRF_CLR_BIT(12) +#define RK3528_GMAC1_CLK_SELECT_IO GRF_BIT(12) + +#define RK3528_GMAC0_CLK_RMII_DIV2 GRF_BIT(3) +#define RK3528_GMAC0_CLK_RMII_DIV20 GRF_CLR_BIT(3) +#define RK3528_GMAC1_CLK_RMII_DIV2 GRF_BIT(10) +#define RK3528_GMAC1_CLK_RMII_DIV20 GRF_CLR_BIT(10) + +#define RK3528_GMAC1_CLK_RGMII_DIV1 (GRF_CLR_BIT(11) | GRF_CLR_BIT(10)) +#define RK3528_GMAC1_CLK_RGMII_DIV5 (GRF_BIT(11) | GRF_BIT(10)) +#define RK3528_GMAC1_CLK_RGMII_DIV50 (GRF_BIT(11) | GRF_CLR_BIT(10)) + +#define RK3528_GMAC0_CLK_RMII_GATE GRF_BIT(2) +#define RK3528_GMAC0_CLK_RMII_NOGATE GRF_CLR_BIT(2) +#define RK3528_GMAC1_CLK_RMII_GATE GRF_BIT(9) +#define RK3528_GMAC1_CLK_RMII_NOGATE GRF_CLR_BIT(9) + +static void rk3528_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + RK3528_GMAC1_PHY_INTF_SEL_RGMII); + + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + DELAY_ENABLE(RK3528, tx_delay, rx_delay)); + + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON6, + RK3528_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3528_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3528_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + if (bsp_priv->id == 1) + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + RK3528_GMAC1_PHY_INTF_SEL_RMII); + else + regmap_write(bsp_priv->grf, RK3528_VO_GRF_GMAC_CON, + RK3528_GMAC0_PHY_INTF_SEL_RMII | + RK3528_GMAC0_CLK_RMII_DIV2); +} + +static void rk3528_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + RK3528_GMAC1_CLK_RGMII_DIV50); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + RK3528_GMAC1_CLK_RGMII_DIV5); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, + RK3528_GMAC1_CLK_RGMII_DIV1); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3528_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + unsigned int reg, val; + + if (speed == 10) + val = bsp_priv->id == 1 ? RK3528_GMAC1_CLK_RMII_DIV20 : + RK3528_GMAC0_CLK_RMII_DIV20; + else if (speed == 100) + val = bsp_priv->id == 1 ? RK3528_GMAC1_CLK_RMII_DIV2 : + RK3528_GMAC0_CLK_RMII_DIV2; + else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + return; + } + + reg = bsp_priv->id == 1 ? RK3528_VPU_GRF_GMAC_CON5 : + RK3528_VO_GRF_GMAC_CON; + + regmap_write(bsp_priv->grf, reg, val); +} + +static void rk3528_set_clock_selection(struct rk_priv_data *bsp_priv, + bool input, bool enable) +{ + unsigned int val; + + if (bsp_priv->id == 1) { + val = input ? RK3528_GMAC1_CLK_SELECT_IO : + RK3528_GMAC1_CLK_SELECT_CRU; + val |= enable ? RK3528_GMAC1_CLK_RMII_NOGATE : + RK3528_GMAC1_CLK_RMII_GATE; + regmap_write(bsp_priv->grf, RK3528_VPU_GRF_GMAC_CON5, val); + } else { + val = enable ? RK3528_GMAC0_CLK_RMII_NOGATE : + RK3528_GMAC0_CLK_RMII_GATE; + regmap_write(bsp_priv->grf, RK3528_VO_GRF_GMAC_CON, val); + } +} + +static const struct rk_gmac_ops rk3528_ops = { + .set_to_rgmii = rk3528_set_to_rgmii, + .set_to_rmii = rk3528_set_to_rmii, + .set_rgmii_speed = rk3528_set_rgmii_speed, + .set_rmii_speed = rk3528_set_rmii_speed, + .set_clock_selection = rk3528_set_clock_selection, + .regs_valid = true, + .regs = { + 0xffbd0000, /* gmac0 */ + 0xffbe0000, /* gmac1 */ + 0x0, /* sentinel */ + }, +}; + #define RK3568_GRF_GMAC0_CON0 0x0380 #define RK3568_GRF_GMAC0_CON1 0x0384 #define RK3568_GRF_GMAC1_CON0 0x0388 @@ -1819,6 +1950,7 @@ static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, + { .compatible = "rockchip,rk3528-gmac", .data = &rk3528_ops }, { .compatible = "rockchip,rk3568-gmac", .data = &rk3568_ops }, { .compatible = "rockchip,rk3576-gmac", .data = &rk3576_ops }, { .compatible = "rockchip,rk3588-gmac", .data = &rk3588_ops }, -- 2.50.1 From 0bed91f2b183bc38c216299ce035b44210148785 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 19 Mar 2025 21:44:07 +0000 Subject: [PATCH 02/16] net: stmmac: dwmac-rk: Move integrated_phy_powerup/down functions Rockchip RK3528 (and RV1106) has a different integrated PHY compared to the integrated PHY on RK3228/RK3328. Current powerup/down operation is not compatible with the integrated PHY found in these SoCs. Move the rk_gmac_integrated_phy_powerup/down functions to top of the file to prepare for them to be called directly by a GMAC variant specific powerup/down operation. Signed-off-by: Jonas Karlman Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250319214415.3086027-4-jonas@kwiboo.se Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index dfb4668db4ee..0321befed0d3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -92,6 +92,50 @@ struct rk_priv_data { (((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \ ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE)) +#define RK_GRF_MACPHY_CON0 0xb00 +#define RK_GRF_MACPHY_CON1 0xb04 +#define RK_GRF_MACPHY_CON2 0xb08 +#define RK_GRF_MACPHY_CON3 0xb0c + +#define RK_MACPHY_ENABLE GRF_BIT(0) +#define RK_MACPHY_DISABLE GRF_CLR_BIT(0) +#define RK_MACPHY_CFG_CLK_50M GRF_BIT(14) +#define RK_GMAC2PHY_RMII_MODE (GRF_BIT(6) | GRF_CLR_BIT(7)) +#define RK_GRF_CON2_MACPHY_ID HIWORD_UPDATE(0x1234, 0xffff, 0) +#define RK_GRF_CON3_MACPHY_ID HIWORD_UPDATE(0x35, 0x3f, 0) + +static void rk_gmac_integrated_phy_powerup(struct rk_priv_data *priv) +{ + if (priv->ops->integrated_phy_powerup) + priv->ops->integrated_phy_powerup(priv); + + regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M); + regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE); + + regmap_write(priv->grf, RK_GRF_MACPHY_CON2, RK_GRF_CON2_MACPHY_ID); + regmap_write(priv->grf, RK_GRF_MACPHY_CON3, RK_GRF_CON3_MACPHY_ID); + + if (priv->phy_reset) { + /* PHY needs to be disabled before trying to reset it */ + regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE); + if (priv->phy_reset) + reset_control_assert(priv->phy_reset); + usleep_range(10, 20); + if (priv->phy_reset) + reset_control_deassert(priv->phy_reset); + usleep_range(10, 20); + regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_ENABLE); + msleep(30); + } +} + +static void rk_gmac_integrated_phy_powerdown(struct rk_priv_data *priv) +{ + regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE); + if (priv->phy_reset) + reset_control_assert(priv->phy_reset); +} + #define PX30_GRF_GMAC_CON1 0x0904 /* PX30_GRF_GMAC_CON1 */ @@ -1463,50 +1507,6 @@ static const struct rk_gmac_ops rv1126_ops = { .set_rmii_speed = rv1126_set_rmii_speed, }; -#define RK_GRF_MACPHY_CON0 0xb00 -#define RK_GRF_MACPHY_CON1 0xb04 -#define RK_GRF_MACPHY_CON2 0xb08 -#define RK_GRF_MACPHY_CON3 0xb0c - -#define RK_MACPHY_ENABLE GRF_BIT(0) -#define RK_MACPHY_DISABLE GRF_CLR_BIT(0) -#define RK_MACPHY_CFG_CLK_50M GRF_BIT(14) -#define RK_GMAC2PHY_RMII_MODE (GRF_BIT(6) | GRF_CLR_BIT(7)) -#define RK_GRF_CON2_MACPHY_ID HIWORD_UPDATE(0x1234, 0xffff, 0) -#define RK_GRF_CON3_MACPHY_ID HIWORD_UPDATE(0x35, 0x3f, 0) - -static void rk_gmac_integrated_phy_powerup(struct rk_priv_data *priv) -{ - if (priv->ops->integrated_phy_powerup) - priv->ops->integrated_phy_powerup(priv); - - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M); - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE); - - regmap_write(priv->grf, RK_GRF_MACPHY_CON2, RK_GRF_CON2_MACPHY_ID); - regmap_write(priv->grf, RK_GRF_MACPHY_CON3, RK_GRF_CON3_MACPHY_ID); - - if (priv->phy_reset) { - /* PHY needs to be disabled before trying to reset it */ - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE); - if (priv->phy_reset) - reset_control_assert(priv->phy_reset); - usleep_range(10, 20); - if (priv->phy_reset) - reset_control_deassert(priv->phy_reset); - usleep_range(10, 20); - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_ENABLE); - msleep(30); - } -} - -static void rk_gmac_integrated_phy_powerdown(struct rk_priv_data *priv) -{ - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE); - if (priv->phy_reset) - reset_control_assert(priv->phy_reset); -} - static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat) { struct rk_priv_data *bsp_priv = plat->bsp_priv; -- 2.50.1 From 32c7bc0747bbd8ee4ee32026d36a24be56117d96 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 19 Mar 2025 21:44:08 +0000 Subject: [PATCH 03/16] net: stmmac: dwmac-rk: Add integrated_phy_powerdown operation Rockchip RK3528 (and RV1106) has a different integrated PHY compared to the integrated PHY on RK3228/RK3328. Current powerup/down operation is not compatible with the integrated PHY found in these newer SoCs. Add a new integrated_phy_powerdown operation and change the call chain for integrated_phy_powerup to prepare support for the integrated PHY found in these newer SoCs. Signed-off-by: Jonas Karlman Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250319214415.3086027-5-jonas@kwiboo.se Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 0321befed0d3..3673abd65302 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -33,6 +33,7 @@ struct rk_gmac_ops { void (*set_clock_selection)(struct rk_priv_data *bsp_priv, bool input, bool enable); void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv); + void (*integrated_phy_powerdown)(struct rk_priv_data *bsp_priv); bool php_grf_required; bool regs_valid; u32 regs[]; @@ -104,11 +105,8 @@ struct rk_priv_data { #define RK_GRF_CON2_MACPHY_ID HIWORD_UPDATE(0x1234, 0xffff, 0) #define RK_GRF_CON3_MACPHY_ID HIWORD_UPDATE(0x35, 0x3f, 0) -static void rk_gmac_integrated_phy_powerup(struct rk_priv_data *priv) +static void rk_gmac_integrated_ephy_powerup(struct rk_priv_data *priv) { - if (priv->ops->integrated_phy_powerup) - priv->ops->integrated_phy_powerup(priv); - regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_CFG_CLK_50M); regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_GMAC2PHY_RMII_MODE); @@ -129,7 +127,7 @@ static void rk_gmac_integrated_phy_powerup(struct rk_priv_data *priv) } } -static void rk_gmac_integrated_phy_powerdown(struct rk_priv_data *priv) +static void rk_gmac_integrated_ephy_powerdown(struct rk_priv_data *priv) { regmap_write(priv->grf, RK_GRF_MACPHY_CON0, RK_MACPHY_DISABLE); if (priv->phy_reset) @@ -368,6 +366,8 @@ static void rk3228_integrated_phy_powerup(struct rk_priv_data *priv) { regmap_write(priv->grf, RK3228_GRF_CON_MUX, RK3228_GRF_CON_MUX_GMAC_INTEGRATED_PHY); + + rk_gmac_integrated_ephy_powerup(priv); } static const struct rk_gmac_ops rk3228_ops = { @@ -375,7 +375,8 @@ static const struct rk_gmac_ops rk3228_ops = { .set_to_rmii = rk3228_set_to_rmii, .set_rgmii_speed = rk3228_set_rgmii_speed, .set_rmii_speed = rk3228_set_rmii_speed, - .integrated_phy_powerup = rk3228_integrated_phy_powerup, + .integrated_phy_powerup = rk3228_integrated_phy_powerup, + .integrated_phy_powerdown = rk_gmac_integrated_ephy_powerdown, }; #define RK3288_GRF_SOC_CON1 0x0248 @@ -601,6 +602,8 @@ static void rk3328_integrated_phy_powerup(struct rk_priv_data *priv) { regmap_write(priv->grf, RK3328_GRF_MACPHY_CON1, RK3328_MACPHY_RMII_MODE); + + rk_gmac_integrated_ephy_powerup(priv); } static const struct rk_gmac_ops rk3328_ops = { @@ -608,7 +611,8 @@ static const struct rk_gmac_ops rk3328_ops = { .set_to_rmii = rk3328_set_to_rmii, .set_rgmii_speed = rk3328_set_rgmii_speed, .set_rmii_speed = rk3328_set_rmii_speed, - .integrated_phy_powerup = rk3328_integrated_phy_powerup, + .integrated_phy_powerup = rk3328_integrated_phy_powerup, + .integrated_phy_powerdown = rk_gmac_integrated_ephy_powerdown, }; #define RK3366_GRF_SOC_CON6 0x0418 @@ -1802,16 +1806,16 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) pm_runtime_get_sync(dev); - if (bsp_priv->integrated_phy) - rk_gmac_integrated_phy_powerup(bsp_priv); + if (bsp_priv->integrated_phy && bsp_priv->ops->integrated_phy_powerup) + bsp_priv->ops->integrated_phy_powerup(bsp_priv); return 0; } static void rk_gmac_powerdown(struct rk_priv_data *gmac) { - if (gmac->integrated_phy) - rk_gmac_integrated_phy_powerdown(gmac); + if (gmac->integrated_phy && gmac->ops->integrated_phy_powerdown) + gmac->ops->integrated_phy_powerdown(gmac); pm_runtime_put_sync(&gmac->pdev->dev); -- 2.50.1 From 83e7b35c7879497b51fd3fcd3a17b0b07f89e81b Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 19 Mar 2025 21:44:09 +0000 Subject: [PATCH 04/16] net: stmmac: dwmac-rk: Add initial support for RK3528 integrated PHY Rockchip RK3528 (and RV1106) has a different integrated PHY compared to the integrated PHY on RK3228/RK3328. Current powerup/down operation is not compatible with the integrated PHY found in these newer SoCs. Add operations to powerup/down the integrated PHY found in RK3528. Use helpers that can be used by other GMAC variants in the future. Signed-off-by: Jonas Karlman Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250319214415.3086027-6-jonas@kwiboo.se Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 3673abd65302..700858ff6f7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -134,6 +134,35 @@ static void rk_gmac_integrated_ephy_powerdown(struct rk_priv_data *priv) reset_control_assert(priv->phy_reset); } +#define RK_FEPHY_SHUTDOWN GRF_BIT(1) +#define RK_FEPHY_POWERUP GRF_CLR_BIT(1) +#define RK_FEPHY_INTERNAL_RMII_SEL GRF_BIT(6) +#define RK_FEPHY_24M_CLK_SEL (GRF_BIT(8) | GRF_BIT(9)) +#define RK_FEPHY_PHY_ID GRF_BIT(11) + +static void rk_gmac_integrated_fephy_powerup(struct rk_priv_data *priv, + unsigned int reg) +{ + reset_control_assert(priv->phy_reset); + usleep_range(20, 30); + + regmap_write(priv->grf, reg, + RK_FEPHY_POWERUP | + RK_FEPHY_INTERNAL_RMII_SEL | + RK_FEPHY_24M_CLK_SEL | + RK_FEPHY_PHY_ID); + usleep_range(10000, 12000); + + reset_control_deassert(priv->phy_reset); + usleep_range(50000, 60000); +} + +static void rk_gmac_integrated_fephy_powerdown(struct rk_priv_data *priv, + unsigned int reg) +{ + regmap_write(priv->grf, reg, RK_FEPHY_SHUTDOWN); +} + #define PX30_GRF_GMAC_CON1 0x0904 /* PX30_GRF_GMAC_CON1 */ @@ -993,12 +1022,24 @@ static void rk3528_set_clock_selection(struct rk_priv_data *bsp_priv, } } +static void rk3528_integrated_phy_powerup(struct rk_priv_data *bsp_priv) +{ + rk_gmac_integrated_fephy_powerup(bsp_priv, RK3528_VO_GRF_MACPHY_CON0); +} + +static void rk3528_integrated_phy_powerdown(struct rk_priv_data *bsp_priv) +{ + rk_gmac_integrated_fephy_powerdown(bsp_priv, RK3528_VO_GRF_MACPHY_CON0); +} + static const struct rk_gmac_ops rk3528_ops = { .set_to_rgmii = rk3528_set_to_rgmii, .set_to_rmii = rk3528_set_to_rmii, .set_rgmii_speed = rk3528_set_rgmii_speed, .set_rmii_speed = rk3528_set_rmii_speed, .set_clock_selection = rk3528_set_clock_selection, + .integrated_phy_powerup = rk3528_integrated_phy_powerup, + .integrated_phy_powerdown = rk3528_integrated_phy_powerdown, .regs_valid = true, .regs = { 0xffbd0000, /* gmac0 */ -- 2.50.1 From f3483c8e1da62993fe0f57af23b925de7661adaa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Mar 2025 17:13:09 +0000 Subject: [PATCH 05/16] net: rfs: hash function change RFS is using two kinds of hash tables. First one is controlled by /proc/sys/net/core/rps_sock_flow_entries = 2^N and using the N low order bits of the l4 hash is good enough. Then each RX queue has its own hash table, controlled by /sys/class/net/eth1/queues/rx-$q/rps_flow_cnt = 2^X Current hash function, using the X low order bits is suboptimal, because RSS is usually using Func(hash) = (hash % power_of_two); For example, with 32 RX queues, 6 low order bits have no entropy for a given queue. Switch this hash function to hash_32(hash, log) to increase chances to use all possible slots and reduce collisions. Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250321171309.634100-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 2 +- net/core/dev.c | 13 +++++++++---- net/core/net-sysfs.c | 4 ++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/net/rps.h b/include/net/rps.h index a93401d23d66..e358e9711f27 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -39,7 +39,7 @@ struct rps_dev_flow { * The rps_dev_flow_table structure contains a table of flow mappings. */ struct rps_dev_flow_table { - unsigned int mask; + u8 log; struct rcu_head rcu; struct rps_dev_flow flows[]; }; diff --git a/net/core/dev.c b/net/core/dev.c index bcf81c3ff6a3..b6b1c7898281 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4751,6 +4751,11 @@ EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); +static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) +{ + return hash_32(hash, flow_table->log); +} + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) @@ -4777,7 +4782,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb_get_hash(skb) & flow_table->mask; + flow_id = rfs_slot(skb_get_hash(skb), flow_table); rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -4856,7 +4861,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[hash & flow_table->mask]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -4923,13 +4928,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (flow_table && flow_id <= flow_table->mask) { + if (flow_table && flow_id < (1UL << flow_table->log)) { rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - READ_ONCE(rflow->last_qtail)) < - (int)(10 * flow_table->mask))) + (int)(10 << flow_table->log))) expire = false; } rcu_read_unlock(); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index abaa1c919b98..b6fbe629ccee 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1056,7 +1056,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, rcu_read_lock(); flow_table = rcu_dereference(queue->rps_flow_table); if (flow_table) - val = (unsigned long)flow_table->mask + 1; + val = 1UL << flow_table->log; rcu_read_unlock(); return sysfs_emit(buf, "%lu\n", val); @@ -1109,7 +1109,7 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, if (!table) return -ENOMEM; - table->mask = mask; + table->log = ilog2(mask) + 1; for (count = 0; count <= mask; count++) table->flows[count].cpu = RPS_NO_CPU; } else { -- 2.50.1 From 5e8df79497ce522170b5964d92c7f7df28dc1fa7 Mon Sep 17 00:00:00 2001 From: Johan Korsnes Date: Sun, 23 Mar 2025 20:04:50 +0100 Subject: [PATCH 06/16] net: au1000_eth: Mark au1000_ReleaseDB() static This fixes the following build warning: ``` drivers/net/ethernet/amd/au1000_eth.c:574:6: warning: no previous prototype for 'au1000_ReleaseDB' [-Wmissing-prototypes] 574 | void au1000_ReleaseDB(struct au1000_private *aup, struct db_dest *pDB) | ^~~~~~~~~~~~~~~~ ``` Signed-off-by: Johan Korsnes Cc: Andrew Lunn Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250323190450.111241-1-johan.korsnes@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/au1000_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 0671a066913b..9d35ac348ebe 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -571,7 +571,7 @@ static struct db_dest *au1000_GetFreeDB(struct au1000_private *aup) return pDB; } -void au1000_ReleaseDB(struct au1000_private *aup, struct db_dest *pDB) +static void au1000_ReleaseDB(struct au1000_private *aup, struct db_dest *pDB) { struct db_dest *pDBfree = aup->pDBfree; if (pDBfree) -- 2.50.1 From a8b4ea7857ff870d0ba266561210832833869415 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 24 Mar 2025 13:52:22 +0100 Subject: [PATCH 07/16] dt-bindings: net: qcom,ipa: Correct indentation and style in DTS example DTS example in the bindings should be indented with 2- or 4-spaces and aligned with opening '- |', so correct any differences like 3-spaces or mixtures 2- and 4-spaces in one binding. No functional changes here, but saves some comments during reviews of new patches built on existing code. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rob Herring (Arm) Reviewed-by: Alex Elder Link: https://patch.msgid.link/20250324125222.82057-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/qcom,ipa.yaml | 128 +++++++++--------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 1a46d80a66e8..b4a79912d473 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -210,70 +210,70 @@ additionalProperties: false examples: - | - #include - #include - #include - - smp2p-mpss { - compatible = "qcom,smp2p"; - interrupts = ; - mboxes = <&apss_shared 6>; - qcom,smem = <94>, <432>; - qcom,local-pid = <0>; - qcom,remote-pid = <5>; - - ipa_smp2p_out: ipa-ap-to-modem { - qcom,entry-name = "ipa"; - #qcom,smem-state-cells = <1>; - }; - - ipa_smp2p_in: ipa-modem-to-ap { - qcom,entry-name = "ipa"; - interrupt-controller; - #interrupt-cells = <2>; - }; + #include + #include + #include + + smp2p-mpss { + compatible = "qcom,smp2p"; + interrupts = ; + mboxes = <&apss_shared 6>; + qcom,smem = <94>, <432>; + qcom,local-pid = <0>; + qcom,remote-pid = <5>; + + ipa_smp2p_out: ipa-ap-to-modem { + qcom,entry-name = "ipa"; + #qcom,smem-state-cells = <1>; }; - ipa@1e40000 { - compatible = "qcom,sc7180-ipa"; - - qcom,gsi-loader = "self"; - memory-region = <&ipa_fw_mem>; - firmware-name = "qcom/sc7180-trogdor/modem/modem.mbn"; - - iommus = <&apps_smmu 0x440 0x0>, - <&apps_smmu 0x442 0x0>; - reg = <0x1e40000 0x7000>, - <0x1e47000 0x2000>, - <0x1e04000 0x2c000>; - reg-names = "ipa-reg", - "ipa-shared", - "gsi"; - - interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>, - <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>, - <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, - <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; - interrupt-names = "ipa", - "gsi", - "ipa-clock-query", - "ipa-setup-ready"; - - clocks = <&rpmhcc RPMH_IPA_CLK>; - clock-names = "core"; - - interconnects = - <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, - <&aggre2_noc MASTER_IPA 0 &system_noc SLAVE_IMEM 0>, - <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_IPA_CFG 0>; - interconnect-names = "memory", - "imem", - "config"; - - qcom,qmp = <&aoss_qmp>; - - qcom,smem-states = <&ipa_smp2p_out 0>, - <&ipa_smp2p_out 1>; - qcom,smem-state-names = "ipa-clock-enabled-valid", - "ipa-clock-enabled"; + ipa_smp2p_in: ipa-modem-to-ap { + qcom,entry-name = "ipa"; + interrupt-controller; + #interrupt-cells = <2>; }; + }; + + ipa@1e40000 { + compatible = "qcom,sc7180-ipa"; + + qcom,gsi-loader = "self"; + memory-region = <&ipa_fw_mem>; + firmware-name = "qcom/sc7180-trogdor/modem/modem.mbn"; + + iommus = <&apps_smmu 0x440 0x0>, + <&apps_smmu 0x442 0x0>; + reg = <0x1e40000 0x7000>, + <0x1e47000 0x2000>, + <0x1e04000 0x2c000>; + reg-names = "ipa-reg", + "ipa-shared", + "gsi"; + + interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>, + <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, + <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "ipa", + "gsi", + "ipa-clock-query", + "ipa-setup-ready"; + + clocks = <&rpmhcc RPMH_IPA_CLK>; + clock-names = "core"; + + interconnects = + <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, + <&aggre2_noc MASTER_IPA 0 &system_noc SLAVE_IMEM 0>, + <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_IPA_CFG 0>; + interconnect-names = "memory", + "imem", + "config"; + + qcom,qmp = <&aoss_qmp>; + + qcom,smem-states = <&ipa_smp2p_out 0>, + <&ipa_smp2p_out 1>; + qcom,smem-state-names = "ipa-clock-enabled-valid", + "ipa-clock-enabled"; + }; -- 2.50.1 From 7662abf4db945e0c78ac56c58b198ad1504b554e Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 22 Mar 2025 08:57:44 +0100 Subject: [PATCH 08/16] net: phy: sfp: Add support for SMBus module access The SFP module's eeprom and internals are accessible through an i2c bus. It is possible that the SFP might be connected to an SMBus-only controller, such as the one found in some PHY devices in the VSC85xx family. Introduce a set of sfp read/write ops that are going to be used if the i2c bus is only capable of doing smbus byte accesses. As Single-byte SMBus transaction go against SFF-8472 and breaks the atomicity for diagnostics data access, hwmon is disabled in the case of SMBus access. Moreover, as this may cause other instabilities, print a warning at probe time to indicate that the setup may be unreliable because of the hardware design. As hwmon may be disabled for both broken EEPROM and smbus, the warnings are udpated accordingly. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322075745.120831-2-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 82 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index c88217af44a1..347c1e0e94d9 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -234,6 +234,7 @@ struct sfp { enum mdio_i2c_proto mdio_protocol; struct phy_device *mod_phy; const struct sff_data *type; + size_t i2c_max_block_size; size_t i2c_block_size; u32 max_power_mW; @@ -691,14 +692,71 @@ static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, return ret == ARRAY_SIZE(msgs) ? len : 0; } -static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) +static int sfp_smbus_byte_read(struct sfp *sfp, bool a2, u8 dev_addr, + void *buf, size_t len) { - if (!i2c_check_functionality(i2c, I2C_FUNC_I2C)) - return -EINVAL; + union i2c_smbus_data smbus_data; + u8 bus_addr = a2 ? 0x51 : 0x50; + u8 *data = buf; + int ret; + + while (len) { + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0, + I2C_SMBUS_READ, dev_addr, + I2C_SMBUS_BYTE_DATA, &smbus_data); + if (ret < 0) + return ret; + + *data = smbus_data.byte; + + len--; + data++; + dev_addr++; + } + + return data - (u8 *)buf; +} + +static int sfp_smbus_byte_write(struct sfp *sfp, bool a2, u8 dev_addr, + void *buf, size_t len) +{ + union i2c_smbus_data smbus_data; + u8 bus_addr = a2 ? 0x51 : 0x50; + u8 *data = buf; + int ret; + while (len) { + smbus_data.byte = *data; + ret = i2c_smbus_xfer(sfp->i2c, bus_addr, 0, + I2C_SMBUS_WRITE, dev_addr, + I2C_SMBUS_BYTE_DATA, &smbus_data); + if (ret) + return ret; + + len--; + data++; + dev_addr++; + } + + return 0; +} + +static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) +{ sfp->i2c = i2c; - sfp->read = sfp_i2c_read; - sfp->write = sfp_i2c_write; + + if (i2c_check_functionality(i2c, I2C_FUNC_I2C)) { + sfp->read = sfp_i2c_read; + sfp->write = sfp_i2c_write; + sfp->i2c_max_block_size = SFP_EEPROM_BLOCK_SIZE; + } else if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_BYTE_DATA)) { + sfp->read = sfp_smbus_byte_read; + sfp->write = sfp_smbus_byte_write; + sfp->i2c_max_block_size = 1; + } else { + sfp->i2c = NULL; + return -EINVAL; + } return 0; } @@ -1594,7 +1652,7 @@ static void sfp_hwmon_probe(struct work_struct *work) */ if (sfp->i2c_block_size < 2) { dev_info(sfp->dev, - "skipping hwmon device registration due to broken EEPROM\n"); + "skipping hwmon device registration\n"); dev_info(sfp->dev, "diagnostic EEPROM area cannot be read atomically to guarantee data coherency\n"); return; @@ -2201,7 +2259,7 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) u8 check; int ret; - sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE; + sfp->i2c_block_size = sfp->i2c_max_block_size; ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { @@ -2941,7 +2999,6 @@ static struct sfp *sfp_alloc(struct device *dev) return ERR_PTR(-ENOMEM); sfp->dev = dev; - sfp->i2c_block_size = SFP_EEPROM_BLOCK_SIZE; mutex_init(&sfp->sm_mutex); mutex_init(&sfp->st_mutex); @@ -3115,6 +3172,15 @@ static int sfp_probe(struct platform_device *pdev) if (!sfp->sfp_bus) return -ENOMEM; + if (sfp->i2c_max_block_size < 2) + dev_warn(sfp->dev, + "Please note:\n" + "This SFP cage is accessed via an SMBus only capable of single byte\n" + "transactions. Some features are disabled, other may be unreliable or\n" + "sporadically fail. Use with caution. There is nothing that the kernel\n" + "or community can do to fix it, the kernel will try best efforts. Please\n" + "verify any problems on hardware that supports multi-byte I2C transactions.\n"); + sfp_debugfs_init(sfp); return 0; -- 2.50.1 From d4bd3aca33c2c0d07f76e1482dd5d90525199cec Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 22 Mar 2025 08:57:45 +0100 Subject: [PATCH 09/16] net: mdio: mdio-i2c: Add support for single-byte SMBus operations PHYs that are within copper SFP modules have their MDIO bus accessible through address 0x56 (usually) on the i2c bus. The MDIO-I2C bridge is desgned for 16 bits accesses, but we can also perform 8bits accesses by reading/writing the high and low bytes sequentially. This commit adds support for this type of accesses, thus supporting smbus controllers such as the one in the VSC8552. This was only tested on Copper SFP modules that embed a Marvell 88e1111 PHY. Tested-by: Sean Anderson Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322075745.120831-3-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-i2c.c | 79 ++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-i2c.c b/drivers/net/mdio/mdio-i2c.c index da2001ea1f99..53e96bfab542 100644 --- a/drivers/net/mdio/mdio-i2c.c +++ b/drivers/net/mdio/mdio-i2c.c @@ -106,6 +106,62 @@ static int i2c_mii_write_default_c22(struct mii_bus *bus, int phy_id, int reg, return i2c_mii_write_default_c45(bus, phy_id, -1, reg, val); } +static int smbus_byte_mii_read_default_c22(struct mii_bus *bus, int phy_id, + int reg) +{ + struct i2c_adapter *i2c = bus->priv; + union i2c_smbus_data smbus_data; + int val = 0, ret; + + if (!i2c_mii_valid_phy_id(phy_id)) + return 0; + + ret = i2c_smbus_xfer(i2c, i2c_mii_phy_addr(phy_id), 0, + I2C_SMBUS_READ, reg, + I2C_SMBUS_BYTE_DATA, &smbus_data); + if (ret < 0) + return ret; + + val = (smbus_data.byte & 0xff) << 8; + + ret = i2c_smbus_xfer(i2c, i2c_mii_phy_addr(phy_id), 0, + I2C_SMBUS_READ, reg, + I2C_SMBUS_BYTE_DATA, &smbus_data); + if (ret < 0) + return ret; + + val |= smbus_data.byte & 0xff; + + return val; +} + +static int smbus_byte_mii_write_default_c22(struct mii_bus *bus, int phy_id, + int reg, u16 val) +{ + struct i2c_adapter *i2c = bus->priv; + union i2c_smbus_data smbus_data; + int ret; + + if (!i2c_mii_valid_phy_id(phy_id)) + return 0; + + smbus_data.byte = (val & 0xff00) >> 8; + + ret = i2c_smbus_xfer(i2c, i2c_mii_phy_addr(phy_id), 0, + I2C_SMBUS_WRITE, reg, + I2C_SMBUS_BYTE_DATA, &smbus_data); + if (ret < 0) + return ret; + + smbus_data.byte = val & 0xff; + + ret = i2c_smbus_xfer(i2c, i2c_mii_phy_addr(phy_id), 0, + I2C_SMBUS_WRITE, reg, + I2C_SMBUS_BYTE_DATA, &smbus_data); + + return ret < 0 ? ret : 0; +} + /* RollBall SFPs do not access internal PHY via I2C address 0x56, but * instead via address 0x51, when SFP page is set to 0x03 and password to * 0xffffffff. @@ -378,13 +434,26 @@ static int i2c_mii_init_rollball(struct i2c_adapter *i2c) return 0; } +static bool mdio_i2c_check_functionality(struct i2c_adapter *i2c, + enum mdio_i2c_proto protocol) +{ + if (i2c_check_functionality(i2c, I2C_FUNC_I2C)) + return true; + + if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_BYTE_DATA) && + protocol == MDIO_I2C_MARVELL_C22) + return true; + + return false; +} + struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c, enum mdio_i2c_proto protocol) { struct mii_bus *mii; int ret; - if (!i2c_check_functionality(i2c, I2C_FUNC_I2C)) + if (!mdio_i2c_check_functionality(i2c, protocol)) return ERR_PTR(-EINVAL); mii = mdiobus_alloc(); @@ -395,6 +464,14 @@ struct mii_bus *mdio_i2c_alloc(struct device *parent, struct i2c_adapter *i2c, mii->parent = parent; mii->priv = i2c; + /* Only use SMBus if we have no other choice */ + if (i2c_check_functionality(i2c, I2C_FUNC_SMBUS_BYTE_DATA) && + !i2c_check_functionality(i2c, I2C_FUNC_I2C)) { + mii->read = smbus_byte_mii_read_default_c22; + mii->write = smbus_byte_mii_write_default_c22; + return mii; + } + switch (protocol) { case MDIO_I2C_ROLLBALL: ret = i2c_mii_init_rollball(i2c); -- 2.50.1 From 1f6154227b49c3d3f306f624858e695bfee50aae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 25 Mar 2025 09:14:27 -0700 Subject: [PATCH 10/16] Revert "udp_tunnel: GRO optimizations" Revert "udp_tunnel: use static call for GRO hooks when possible" This reverts commit 311b36574ceaccfa3f91b74054a09cd4bb877702. Revert "udp_tunnel: create a fastpath GRO lookup." This reverts commit 8d4880db378350f8ed8969feea13bdc164564fc1. There are multiple small issues with the series. In the interest of unblocking the merge window let's opt for a revert. Link: https://lore.kernel.org/cover.1742557254.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/udp.h | 16 ---- include/net/netns/ipv4.h | 11 --- include/net/udp.h | 1 - include/net/udp_tunnel.h | 22 ----- net/ipv4/udp.c | 13 +-- net/ipv4/udp_offload.c | 167 +------------------------------------ net/ipv4/udp_tunnel_core.c | 14 ---- net/ipv6/udp.c | 2 - net/ipv6/udp_offload.c | 5 -- 9 files changed, 2 insertions(+), 249 deletions(-) diff --git a/include/linux/udp.h b/include/linux/udp.h index 895240177f4f..0807e21cfec9 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -101,13 +101,6 @@ struct udp_sock { /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; - - /* - * Accounting for the tunnel GRO fastpath. - * Unprotected by compilers guard, as it uses space available in - * the last UDP socket cacheline. - */ - struct hlist_node tunnel_list; }; #define udp_test_bit(nr, sk) \ @@ -226,13 +219,4 @@ static inline void udp_allow_gso(struct sock *sk) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) -static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) -{ -#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) - return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); -#else - return NULL; -#endif -} - #endif /* _LINUX_UDP_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6373e3f17da8..650b2dc9199f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -47,11 +47,6 @@ struct sysctl_fib_multipath_hash_seed { }; #endif -struct udp_tunnel_gro { - struct sock __rcu *sk; - struct hlist_head list; -}; - struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -90,11 +85,6 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; -#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) - /* Not in a pernet subsys because need to be available at GRO stage */ - struct udp_tunnel_gro udp_tunnel_gro[2]; -#endif - #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; struct ctl_table_header *frags_hdr; @@ -287,5 +277,4 @@ struct netns_ipv4 { struct hlist_head *inet_addr_lst; struct delayed_work addr_chk_work; }; - #endif diff --git a/include/net/udp.h b/include/net/udp.h index a772510b2aa5..6e89520e100d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -290,7 +290,6 @@ static inline void udp_lib_init_sock(struct sock *sk) struct udp_sock *up = udp_sk(sk); skb_queue_head_init(&up->reader_queue); - INIT_HLIST_NODE(&up->tunnel_list); up->forward_threshold = sk->sk_rcvbuf >> 2; set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index a7b230867eb1..a93dc51f6323 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -203,28 +203,6 @@ static inline void udp_tunnel_encap_enable(struct sock *sk) udp_encap_enable(); } -#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) -void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add); -void udp_tunnel_update_gro_rcv(struct sock *sk, bool add); -#else -static inline void udp_tunnel_update_gro_lookup(struct net *net, - struct sock *sk, bool add) {} -static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {} -#endif - -static inline void udp_tunnel_cleanup_gro(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - struct net *net = sock_net(sk); - - udp_tunnel_update_gro_rcv(sk, false); - - if (!up->tunnel_list.pprev) - return; - - udp_tunnel_update_gro_lookup(net, sk, false); -} - #define UDP_TUNNEL_NIC_MAX_TABLES 4 enum udp_tunnel_nic_info_flags { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db606f7e4163..d0bffcfa56d8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2891,10 +2891,8 @@ void udp_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (udp_test_bit(ENCAP_ENABLED, sk)) { + if (udp_test_bit(ENCAP_ENABLED, sk)) static_branch_dec(&udp_encap_needed_key); - udp_tunnel_cleanup_gro(sk); - } } } @@ -3806,15 +3804,6 @@ fallback: static int __net_init udp_pernet_init(struct net *net) { -#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) - int i; - - /* No tunnel is configured */ - for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) { - INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list); - RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL); - } -#endif udp_sysctl_init(net); udp_set_table(net); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 088aa8cb8ac0..2c0725583be3 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -12,164 +12,6 @@ #include #include #include -#include - -#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) - -/* - * Dummy GRO tunnel callback, exists mainly to avoid dangling/NULL - * values for the udp tunnel static call. - */ -static struct sk_buff *dummy_gro_rcv(struct sock *sk, - struct list_head *head, - struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->flush = 1; - return NULL; -} - -typedef struct sk_buff *(*udp_tunnel_gro_rcv_t)(struct sock *sk, - struct list_head *head, - struct sk_buff *skb); - -struct udp_tunnel_type_entry { - udp_tunnel_gro_rcv_t gro_receive; - refcount_t count; -}; - -#define UDP_MAX_TUNNEL_TYPES (IS_ENABLED(CONFIG_GENEVE) + \ - IS_ENABLED(CONFIG_VXLAN) * 2 + \ - IS_ENABLED(CONFIG_NET_FOU) * 2) - -DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv); -static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call); -static struct mutex udp_tunnel_gro_type_lock; -static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES]; -static unsigned int udp_tunnel_gro_type_nr; -static DEFINE_SPINLOCK(udp_tunnel_gro_lock); - -void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add) -{ - bool is_ipv6 = sk->sk_family == AF_INET6; - struct udp_sock *tup, *up = udp_sk(sk); - struct udp_tunnel_gro *udp_tunnel_gro; - - spin_lock(&udp_tunnel_gro_lock); - udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6]; - if (add) - hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list); - else - hlist_del_init(&up->tunnel_list); - - if (udp_tunnel_gro->list.first && - !udp_tunnel_gro->list.first->next) { - tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock, - tunnel_list); - - rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup); - } else { - RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL); - } - - spin_unlock(&udp_tunnel_gro_lock); -} -EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup); - -void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) -{ - struct udp_tunnel_type_entry *cur = NULL; - struct udp_sock *up = udp_sk(sk); - int i, old_gro_type_nr; - - if (!up->gro_receive) - return; - - mutex_lock(&udp_tunnel_gro_type_lock); - for (i = 0; i < udp_tunnel_gro_type_nr; i++) - if (udp_tunnel_gro_types[i].gro_receive == up->gro_receive) - cur = &udp_tunnel_gro_types[i]; - - old_gro_type_nr = udp_tunnel_gro_type_nr; - if (add) { - /* - * Update the matching entry, if found, or add a new one - * if needed - */ - if (cur) { - refcount_inc(&cur->count); - goto out; - } - - if (unlikely(udp_tunnel_gro_type_nr == UDP_MAX_TUNNEL_TYPES)) { - pr_err_once("Too many UDP tunnel types, please increase UDP_MAX_TUNNEL_TYPES\n"); - /* Ensure static call will never be enabled */ - udp_tunnel_gro_type_nr = UDP_MAX_TUNNEL_TYPES + 2; - goto out; - } - - cur = &udp_tunnel_gro_types[udp_tunnel_gro_type_nr++]; - refcount_set(&cur->count, 1); - cur->gro_receive = up->gro_receive; - } else { - /* - * The stack cleanups only successfully added tunnel, the - * lookup on removal should never fail. - */ - if (WARN_ON_ONCE(!cur)) - goto out; - - if (!refcount_dec_and_test(&cur->count)) - goto out; - - /* avoid gaps, so that the enable tunnel has always id 0 */ - *cur = udp_tunnel_gro_types[--udp_tunnel_gro_type_nr]; - } - - if (udp_tunnel_gro_type_nr == 1) { - static_call_update(udp_tunnel_gro_rcv, - udp_tunnel_gro_types[0].gro_receive); - static_branch_enable(&udp_tunnel_static_call); - } else if (old_gro_type_nr == 1) { - static_branch_disable(&udp_tunnel_static_call); - static_call_update(udp_tunnel_gro_rcv, dummy_gro_rcv); - } - -out: - mutex_unlock(&udp_tunnel_gro_type_lock); -} -EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv); - -static void udp_tunnel_gro_init(void) -{ - mutex_init(&udp_tunnel_gro_type_lock); -} - -static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, - struct list_head *head, - struct sk_buff *skb) -{ - if (static_branch_likely(&udp_tunnel_static_call)) { - if (unlikely(gro_recursion_inc_test(skb))) { - NAPI_GRO_CB(skb)->flush |= 1; - return NULL; - } - return static_call(udp_tunnel_gro_rcv)(sk, head, skb); - } - return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); -} - -#else - -static void udp_tunnel_gro_init(void) {} - -static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk, - struct list_head *head, - struct sk_buff *skb) -{ - return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); -} - -#endif static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, @@ -780,7 +622,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = udp_tunnel_gro_rcv(sk, head, skb); + pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out: skb_gro_flush_final(skb, pp, flush); @@ -793,13 +635,8 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct iphdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); - struct sock *sk; int iif, sdif; - sk = udp_tunnel_sk(net, false); - if (sk && dport == htons(sk->sk_num)) - return sk; - inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, @@ -930,7 +767,5 @@ int __init udpv4_offload_init(void) .gro_complete = udp4_gro_complete, }, }; - - udp_tunnel_gro_init(); return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP); } diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index c49fceea8313..619a53eb672d 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -58,15 +58,6 @@ error: } EXPORT_SYMBOL(udp_sock_create4); -static bool sk_saddr_any(struct sock *sk) -{ -#if IS_ENABLED(CONFIG_IPV6) - return ipv6_addr_any(&sk->sk_v6_rcv_saddr); -#else - return !sk->sk_rcv_saddr; -#endif -} - void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg) { @@ -89,11 +80,6 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sk); - - udp_tunnel_update_gro_rcv(sock->sk, true); - - if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sock->sk)) - udp_tunnel_update_gro_lookup(net, sock->sk, true); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7317f8e053f1..024458ef163c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -1826,7 +1825,6 @@ void udpv6_destroy_sock(struct sock *sk) if (udp_test_bit(ENCAP_ENABLED, sk)) { static_branch_dec(&udpv6_encap_needed_key); udp_encap_disable(); - udp_tunnel_cleanup_gro(sk); } } } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index d8445ac1b2e4..404212dfc99a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -118,13 +118,8 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, { const struct ipv6hdr *iph = skb_gro_network_header(skb); struct net *net = dev_net_rcu(skb->dev); - struct sock *sk; int iif, sdif; - sk = udp_tunnel_sk(net, true); - if (sk && dport == htons(sk->sk_num)) - return sk; - inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, -- 2.50.1 From b2d1e4c2cb8a27693acd1add5ef7295a976a63dd Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Mon, 24 Mar 2025 16:08:54 +0800 Subject: [PATCH 11/16] octeontx2-af: mcs: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Chen Ni Reviewed-by: Geetha sowjanya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250324080854.408188-1-nichen@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index d39d86e694cc..655dd4726d36 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -925,7 +925,6 @@ void rvu_mcs_exit(struct rvu *rvu) if (!rvu->mcs_intr_wq) return; - flush_workqueue(rvu->mcs_intr_wq); destroy_workqueue(rvu->mcs_intr_wq); rvu->mcs_intr_wq = NULL; } -- 2.50.1 From 976c2696b71da376d42e63ca3802eb2aafc164eb Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 21 Mar 2025 15:48:32 +0900 Subject: [PATCH 12/16] virtio_net: Split struct virtio_net_rss_config struct virtio_net_rss_config was less useful in actual code because of a flexible array placed in the middle. Add new structures that split it into two to avoid having a flexible array in the middle. Suggested-by: Jason Wang Signed-off-by: Akihiko Odaki Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Tested-by: Lei Yang Link: https://patch.msgid.link/20250321-virtio-v2-1-33afb8f4640b@daynix.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/virtio_net.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index ac9174717ef1..963540deae66 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -327,6 +327,19 @@ struct virtio_net_rss_config { __u8 hash_key_data[/* hash_key_length */]; }; +struct virtio_net_rss_config_hdr { + __le32 hash_types; + __le16 indirection_table_mask; + __le16 unclassified_queue; + __le16 indirection_table[/* 1 + indirection_table_mask */]; +}; + +struct virtio_net_rss_config_trailer { + __le16 max_tx_vq; + __u8 hash_key_length; + __u8 hash_key_data[/* hash_key_length */]; +}; + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 /* -- 2.50.1 From 97841341e302eac13d54eb5e968570b5626196a7 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 21 Mar 2025 15:48:33 +0900 Subject: [PATCH 13/16] virtio_net: Fix endian with virtio_net_ctrl_rss Mark the fields of struct virtio_net_ctrl_rss as little endian as they are in struct virtio_net_rss_config, which it follows. Fixes: c7114b1249fa ("drivers/net/virtio_net: Added basic RSS support.") Signed-off-by: Akihiko Odaki Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Tested-by: Lei Yang Link: https://patch.msgid.link/20250321-virtio-v2-2-33afb8f4640b@daynix.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 34cec2b11b74..65e1ef77a1a9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -368,15 +368,15 @@ struct receive_queue { */ #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 struct virtio_net_ctrl_rss { - u32 hash_types; - u16 indirection_table_mask; - u16 unclassified_queue; - u16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ - u16 max_tx_vq; + __le32 hash_types; + __le16 indirection_table_mask; + __le16 unclassified_queue; + __le16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ + __le16 max_tx_vq; u8 hash_key_length; u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; - u16 *indirection_table; + __le16 *indirection_table; }; /* Control VQ buffers: protected by the rtnl lock */ @@ -3638,9 +3638,9 @@ static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pair for (; i < vi->rss_indir_table_size; ++i) { indir_val = ethtool_rxfh_indir_default(i, queue_pairs); - vi->rss.indirection_table[i] = indir_val; + vi->rss.indirection_table[i] = cpu_to_le16(indir_val); } - vi->rss.max_tx_vq = queue_pairs; + vi->rss.max_tx_vq = cpu_to_le16(queue_pairs); } static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) @@ -4159,10 +4159,10 @@ err: static void virtnet_init_default_rss(struct virtnet_info *vi) { - vi->rss.hash_types = vi->rss_hash_types_supported; + vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_supported); vi->rss_hash_types_saved = vi->rss_hash_types_supported; vi->rss.indirection_table_mask = vi->rss_indir_table_size - ? vi->rss_indir_table_size - 1 : 0; + ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; vi->rss.unclassified_queue = 0; virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); @@ -4280,7 +4280,7 @@ static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc * if (new_hashtypes != vi->rss_hash_types_saved) { vi->rss_hash_types_saved = new_hashtypes; - vi->rss.hash_types = vi->rss_hash_types_saved; + vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_saved); if (vi->dev->features & NETIF_F_RXHASH) return virtnet_commit_rss_command(vi); } @@ -5460,7 +5460,7 @@ static int virtnet_get_rxfh(struct net_device *dev, if (rxfh->indir) { for (i = 0; i < vi->rss_indir_table_size; ++i) - rxfh->indir[i] = vi->rss.indirection_table[i]; + rxfh->indir[i] = le16_to_cpu(vi->rss.indirection_table[i]); } if (rxfh->key) @@ -5488,7 +5488,7 @@ static int virtnet_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; for (i = 0; i < vi->rss_indir_table_size; ++i) - vi->rss.indirection_table[i] = rxfh->indir[i]; + vi->rss.indirection_table[i] = cpu_to_le16(rxfh->indir[i]); update = true; } @@ -6109,9 +6109,9 @@ static int virtnet_set_features(struct net_device *dev, if ((dev->features ^ features) & NETIF_F_RXHASH) { if (features & NETIF_F_RXHASH) - vi->rss.hash_types = vi->rss_hash_types_saved; + vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_saved); else - vi->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; + vi->rss.hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); if (!virtnet_commit_rss_command(vi)) return -EINVAL; -- 2.50.1 From ed3100e90d0d120a045a551b85eb43cf2527e885 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 21 Mar 2025 15:48:34 +0900 Subject: [PATCH 14/16] virtio_net: Use new RSS config structs The new RSS configuration structures allow easily constructing data for VIRTIO_NET_CTRL_MQ_RSS_CONFIG as they strictly follow the order of data for the command. Signed-off-by: Akihiko Odaki Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Tested-by: Lei Yang Link: https://patch.msgid.link/20250321-virtio-v2-3-33afb8f4640b@daynix.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 117 ++++++++++++++------------------------- 1 file changed, 43 insertions(+), 74 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 65e1ef77a1a9..e46241c6fdab 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -360,24 +360,7 @@ struct receive_queue { struct xdp_buff **xsk_buffs; }; -/* This structure can contain rss message with maximum settings for indirection table and keysize - * Note, that default structure that describes RSS configuration virtio_net_rss_config - * contains same info but can't handle table values. - * In any case, structure would be passed to virtio hw through sg_buf split by parts - * because table sizes may be differ according to the device configuration. - */ #define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 -struct virtio_net_ctrl_rss { - __le32 hash_types; - __le16 indirection_table_mask; - __le16 unclassified_queue; - __le16 hash_cfg_reserved; /* for HASH_CONFIG (see virtio_net_hash_config for details) */ - __le16 max_tx_vq; - u8 hash_key_length; - u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; - - __le16 *indirection_table; -}; /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { @@ -421,7 +404,9 @@ struct virtnet_info { u16 rss_indir_table_size; u32 rss_hash_types_supported; u32 rss_hash_types_saved; - struct virtio_net_ctrl_rss rss; + struct virtio_net_rss_config_hdr *rss_hdr; + struct virtio_net_rss_config_trailer rss_trailer; + u8 rss_hash_key_data[VIRTIO_NET_RSS_MAX_KEY_SIZE]; /* Has control virtqueue */ bool has_cvq; @@ -523,23 +508,16 @@ enum virtnet_xmit_type { VIRTNET_XMIT_TYPE_XSK, }; -static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size) +static size_t virtnet_rss_hdr_size(const struct virtnet_info *vi) { - if (!indir_table_size) { - rss->indirection_table = NULL; - return 0; - } + u16 indir_table_size = vi->has_rss ? vi->rss_indir_table_size : 1; - rss->indirection_table = kmalloc_array(indir_table_size, sizeof(u16), GFP_KERNEL); - if (!rss->indirection_table) - return -ENOMEM; - - return 0; + return struct_size(vi->rss_hdr, indirection_table, indir_table_size); } -static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss) +static size_t virtnet_rss_trailer_size(const struct virtnet_info *vi) { - kfree(rss->indirection_table); + return struct_size(&vi->rss_trailer, hash_key_data, vi->rss_key_size); } /* We use the last two bits of the pointer to distinguish the xmit type. */ @@ -3638,15 +3616,16 @@ static void virtnet_rss_update_by_qpairs(struct virtnet_info *vi, u16 queue_pair for (; i < vi->rss_indir_table_size; ++i) { indir_val = ethtool_rxfh_indir_default(i, queue_pairs); - vi->rss.indirection_table[i] = cpu_to_le16(indir_val); + vi->rss_hdr->indirection_table[i] = cpu_to_le16(indir_val); } - vi->rss.max_tx_vq = cpu_to_le16(queue_pairs); + vi->rss_trailer.max_tx_vq = cpu_to_le16(queue_pairs); } static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct virtio_net_ctrl_mq *mq __free(kfree) = NULL; - struct virtio_net_ctrl_rss old_rss; + struct virtio_net_rss_config_hdr *old_rss_hdr; + struct virtio_net_rss_config_trailer old_rss_trailer; struct net_device *dev = vi->dev; struct scatterlist sg; @@ -3661,24 +3640,28 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) * update (VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET below) and return directly. */ if (vi->has_rss && !netif_is_rxfh_configured(dev)) { - memcpy(&old_rss, &vi->rss, sizeof(old_rss)); - if (rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size)) { - vi->rss.indirection_table = old_rss.indirection_table; + old_rss_hdr = vi->rss_hdr; + old_rss_trailer = vi->rss_trailer; + vi->rss_hdr = kzalloc(virtnet_rss_hdr_size(vi), GFP_KERNEL); + if (!vi->rss_hdr) { + vi->rss_hdr = old_rss_hdr; return -ENOMEM; } + *vi->rss_hdr = *old_rss_hdr; virtnet_rss_update_by_qpairs(vi, queue_pairs); if (!virtnet_commit_rss_command(vi)) { /* restore ctrl_rss if commit_rss_command failed */ - rss_indirection_table_free(&vi->rss); - memcpy(&vi->rss, &old_rss, sizeof(old_rss)); + kfree(vi->rss_hdr); + vi->rss_hdr = old_rss_hdr; + vi->rss_trailer = old_rss_trailer; dev_warn(&dev->dev, "Fail to set num of queue pairs to %d, because committing RSS failed\n", queue_pairs); return -EINVAL; } - rss_indirection_table_free(&old_rss); + kfree(old_rss_hdr); goto succ; } @@ -4121,28 +4104,12 @@ static int virtnet_set_ringparam(struct net_device *dev, static bool virtnet_commit_rss_command(struct virtnet_info *vi) { struct net_device *dev = vi->dev; - struct scatterlist sgs[4]; - unsigned int sg_buf_size; + struct scatterlist sgs[2]; /* prepare sgs */ - sg_init_table(sgs, 4); - - sg_buf_size = offsetof(struct virtio_net_ctrl_rss, hash_cfg_reserved); - sg_set_buf(&sgs[0], &vi->rss, sg_buf_size); - - if (vi->has_rss) { - sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size; - sg_set_buf(&sgs[1], vi->rss.indirection_table, sg_buf_size); - } else { - sg_set_buf(&sgs[1], &vi->rss.hash_cfg_reserved, sizeof(uint16_t)); - } - - sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) - - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); - sg_set_buf(&sgs[2], &vi->rss.max_tx_vq, sg_buf_size); - - sg_buf_size = vi->rss_key_size; - sg_set_buf(&sgs[3], vi->rss.key, sg_buf_size); + sg_init_table(sgs, 2); + sg_set_buf(&sgs[0], vi->rss_hdr, virtnet_rss_hdr_size(vi)); + sg_set_buf(&sgs[1], &vi->rss_trailer, virtnet_rss_trailer_size(vi)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG @@ -4159,17 +4126,17 @@ err: static void virtnet_init_default_rss(struct virtnet_info *vi) { - vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_supported); + vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_supported); vi->rss_hash_types_saved = vi->rss_hash_types_supported; - vi->rss.indirection_table_mask = vi->rss_indir_table_size + vi->rss_hdr->indirection_table_mask = vi->rss_indir_table_size ? cpu_to_le16(vi->rss_indir_table_size - 1) : 0; - vi->rss.unclassified_queue = 0; + vi->rss_hdr->unclassified_queue = 0; virtnet_rss_update_by_qpairs(vi, vi->curr_queue_pairs); - vi->rss.hash_key_length = vi->rss_key_size; + vi->rss_trailer.hash_key_length = vi->rss_key_size; - netdev_rss_key_fill(vi->rss.key, vi->rss_key_size); + netdev_rss_key_fill(vi->rss_hash_key_data, vi->rss_key_size); } static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) @@ -4280,7 +4247,7 @@ static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc * if (new_hashtypes != vi->rss_hash_types_saved) { vi->rss_hash_types_saved = new_hashtypes; - vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_saved); + vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); if (vi->dev->features & NETIF_F_RXHASH) return virtnet_commit_rss_command(vi); } @@ -5460,11 +5427,11 @@ static int virtnet_get_rxfh(struct net_device *dev, if (rxfh->indir) { for (i = 0; i < vi->rss_indir_table_size; ++i) - rxfh->indir[i] = le16_to_cpu(vi->rss.indirection_table[i]); + rxfh->indir[i] = le16_to_cpu(vi->rss_hdr->indirection_table[i]); } if (rxfh->key) - memcpy(rxfh->key, vi->rss.key, vi->rss_key_size); + memcpy(rxfh->key, vi->rss_hash_key_data, vi->rss_key_size); rxfh->hfunc = ETH_RSS_HASH_TOP; @@ -5488,7 +5455,7 @@ static int virtnet_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; for (i = 0; i < vi->rss_indir_table_size; ++i) - vi->rss.indirection_table[i] = cpu_to_le16(rxfh->indir[i]); + vi->rss_hdr->indirection_table[i] = cpu_to_le16(rxfh->indir[i]); update = true; } @@ -5500,7 +5467,7 @@ static int virtnet_set_rxfh(struct net_device *dev, if (!vi->has_rss && !vi->has_rss_hash_report) return -EOPNOTSUPP; - memcpy(vi->rss.key, rxfh->key, vi->rss_key_size); + memcpy(vi->rss_hash_key_data, rxfh->key, vi->rss_key_size); update = true; } @@ -6109,9 +6076,9 @@ static int virtnet_set_features(struct net_device *dev, if ((dev->features ^ features) & NETIF_F_RXHASH) { if (features & NETIF_F_RXHASH) - vi->rss.hash_types = cpu_to_le32(vi->rss_hash_types_saved); + vi->rss_hdr->hash_types = cpu_to_le32(vi->rss_hash_types_saved); else - vi->rss.hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); + vi->rss_hdr->hash_types = cpu_to_le32(VIRTIO_NET_HASH_REPORT_NONE); if (!virtnet_commit_rss_command(vi)) return -EINVAL; @@ -6801,9 +6768,11 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); } - err = rss_indirection_table_alloc(&vi->rss, vi->rss_indir_table_size); - if (err) + vi->rss_hdr = kzalloc(virtnet_rss_hdr_size(vi), GFP_KERNEL); + if (!vi->rss_hdr) { + err = -ENOMEM; goto free; + } if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_key_size = @@ -7082,7 +7051,7 @@ static void virtnet_remove(struct virtio_device *vdev) remove_vq_common(vi); - rss_indirection_table_free(&vi->rss); + kfree(vi->rss_hdr); free_netdev(vi->dev); } -- 2.50.1 From 4944be2f5ad8c74b93e4e272f3a0f1a136bbc438 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 21 Mar 2025 15:48:35 +0900 Subject: [PATCH 15/16] virtio_net: Allocate rss_hdr with devres virtnet_probe() lacks the code to free rss_hdr in its error path. Allocate rss_hdr with devres so that it will be automatically freed. Fixes: 86a48a00efdf ("virtio_net: Support dynamic rss indirection table size") Signed-off-by: Akihiko Odaki Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Tested-by: Lei Yang Link: https://patch.msgid.link/20250321-virtio-v2-4-33afb8f4640b@daynix.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e46241c6fdab..aac5a5184ba9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3642,7 +3642,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (vi->has_rss && !netif_is_rxfh_configured(dev)) { old_rss_hdr = vi->rss_hdr; old_rss_trailer = vi->rss_trailer; - vi->rss_hdr = kzalloc(virtnet_rss_hdr_size(vi), GFP_KERNEL); + vi->rss_hdr = devm_kzalloc(&dev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); if (!vi->rss_hdr) { vi->rss_hdr = old_rss_hdr; return -ENOMEM; @@ -3653,7 +3653,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) if (!virtnet_commit_rss_command(vi)) { /* restore ctrl_rss if commit_rss_command failed */ - kfree(vi->rss_hdr); + devm_kfree(&dev->dev, vi->rss_hdr); vi->rss_hdr = old_rss_hdr; vi->rss_trailer = old_rss_trailer; @@ -3661,7 +3661,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) queue_pairs); return -EINVAL; } - kfree(old_rss_hdr); + devm_kfree(&dev->dev, old_rss_hdr); goto succ; } @@ -6768,7 +6768,7 @@ static int virtnet_probe(struct virtio_device *vdev) virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); } - vi->rss_hdr = kzalloc(virtnet_rss_hdr_size(vi), GFP_KERNEL); + vi->rss_hdr = devm_kzalloc(&vdev->dev, virtnet_rss_hdr_size(vi), GFP_KERNEL); if (!vi->rss_hdr) { err = -ENOMEM; goto free; @@ -7051,8 +7051,6 @@ static void virtnet_remove(struct virtio_device *vdev) remove_vq_common(vi); - kfree(vi->rss_hdr); - free_netdev(vi->dev); } -- 2.50.1 From ba6f418fbf64bb8c0e98dc1b548c151beeedd16c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:27 -0700 Subject: [PATCH 16/16] net: bubble up taking netdev instance lock to callers of net_devmem_unbind_dmabuf() A recent commit added taking the netdev instance lock in netdev_nl_bind_rx_doit(), but didn't remove it in net_devmem_unbind_dmabuf() which it calls from an error path. Always expect the callers of net_devmem_unbind_dmabuf() to hold the lock. This is consistent with net_devmem_bind_dmabuf(). (Not so) coincidentally this also protects mp_param with the instance lock, which the rest of this series needs. Fixes: 1d22d3060b9b ("net: drop rtnl_lock for queue_mgmt operations") Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 2 -- net/core/netdev-genl.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 6802e82a4d03..ee145a2aa41c 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -128,12 +128,10 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq->mp_params.mp_priv = NULL; rxq->mp_params.mp_ops = NULL; - netdev_lock(binding->dev); rxq_idx = get_netdev_rx_queue_index(rxq); err = netdev_rx_queue_restart(binding->dev, rxq_idx); WARN_ON(err && err != -ENETDOWN); - netdev_unlock(binding->dev); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index a186fea63c09..9e4882a22407 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -947,7 +947,9 @@ void netdev_nl_sock_priv_destroy(struct netdev_nl_sock *priv) mutex_lock(&priv->lock); list_for_each_entry_safe(binding, temp, &priv->bindings, list) { + netdev_lock(binding->dev); net_devmem_unbind_dmabuf(binding); + netdev_unlock(binding->dev); } mutex_unlock(&priv->lock); } -- 2.50.1