From 5f27092328ce47104d2d569bab4d6ec329cb6fec Mon Sep 17 00:00:00 2001 From: Hans-Frieder Vogt Date: Sat, 22 Mar 2025 11:45:54 +0100 Subject: [PATCH 01/16] net: phy: aquantia: search for firmware-name in fwnode Allow the firmware name of an Aquantia PHY alternatively be provided by the property "firmware-name" of a swnode. This software node may be provided by the MAC or MDIO driver. Signed-off-by: Hans-Frieder Vogt Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322-tn9510-v3a-v7-3-672a9a3d8628@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/phy/aquantia/aquantia_firmware.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c index dab3af80593f..bbbcc9736b00 100644 --- a/drivers/net/phy/aquantia/aquantia_firmware.c +++ b/drivers/net/phy/aquantia/aquantia_firmware.c @@ -328,10 +328,11 @@ static int aqr_firmware_load_fs(struct phy_device *phydev) const char *fw_name; int ret; - ret = of_property_read_string(dev->of_node, "firmware-name", - &fw_name); - if (ret) + ret = device_property_read_string(dev, "firmware-name", &fw_name); + if (ret) { + phydev_err(phydev, "failed to read firmware-name: %d\n", ret); return ret; + } ret = request_firmware(&fw, fw_name, dev); if (ret) { -- 2.51.0 From e31e67f58cf2acc6ef25462b1479a2a8d8f82e57 Mon Sep 17 00:00:00 2001 From: Hans-Frieder Vogt Date: Sat, 22 Mar 2025 11:45:55 +0100 Subject: [PATCH 02/16] net: phy: aquantia: add essential functions to aqr105 driver This patch makes functions that were provided for aqr107 applicable to aqr105, or replaces generic functions with specific ones. Since the aqr105 was introduced before NBASE-T was defined (or 802.3bz), there are a number of vendor specific registers involved in the definition of the advertisement, in auto-negotiation and in the setting of the speed. The functions have been written following the downstream driver for TN4010 cards with aqr105 PHY, and use code from aqr107 functions wherever it seemed to make sense. Signed-off-by: Hans-Frieder Vogt Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322-tn9510-v3a-v7-4-672a9a3d8628@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/phy/aquantia/aquantia_main.c | 239 ++++++++++++++++++++++- 1 file changed, 237 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index 86b0e63de5d8..08b1c9cc902b 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -50,6 +50,7 @@ #define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14) #define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11) #define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10) +#define MDIO_AN_VEND_PROV_EXC_PHYID_INFO BIT(6) #define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4) #define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0) #define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4 @@ -333,6 +334,238 @@ static int aqr_read_status(struct phy_device *phydev) return genphy_c45_read_status(phydev); } +static int aqr105_get_features(struct phy_device *phydev) +{ + int ret; + + /* Normal feature discovery */ + ret = genphy_c45_pma_read_abilities(phydev); + if (ret) + return ret; + + /* The AQR105 PHY misses to indicate the 2.5G and 5G modes, so add them + * here + */ + linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->supported); + + /* The AQR105 PHY suppports both RJ45 and SFP+ interfaces */ + linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, phydev->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, phydev->supported); + + return 0; +} + +static int aqr105_setup_forced(struct phy_device *phydev) +{ + int vend = MDIO_AN_VEND_PROV_EXC_PHYID_INFO; + int ctrl10 = 0; + int adv = ADVERTISE_CSMA; + int ret; + + switch (phydev->speed) { + case SPEED_100: + adv |= ADVERTISE_100FULL; + break; + case SPEED_1000: + adv |= ADVERTISE_NPAGE; + if (phydev->duplex == DUPLEX_FULL) + vend |= MDIO_AN_VEND_PROV_1000BASET_FULL; + else + vend |= MDIO_AN_VEND_PROV_1000BASET_HALF; + break; + case SPEED_2500: + adv |= (ADVERTISE_NPAGE | ADVERTISE_RESV); + vend |= MDIO_AN_VEND_PROV_2500BASET_FULL; + break; + case SPEED_5000: + adv |= (ADVERTISE_NPAGE | ADVERTISE_RESV); + vend |= MDIO_AN_VEND_PROV_5000BASET_FULL; + break; + case SPEED_10000: + adv |= (ADVERTISE_NPAGE | ADVERTISE_RESV); + ctrl10 |= MDIO_AN_10GBT_CTRL_ADV10G; + break; + default: + return -EINVAL; + } + ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE, adv); + if (ret < 0) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, vend); + if (ret < 0) + return ret; + ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL, ctrl10); + if (ret < 0) + return ret; + + /* set by vendor driver, but should be on by default */ + ret = phy_set_bits_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1, + MDIO_AN_CTRL1_XNP); + if (ret < 0) + return ret; + + return genphy_c45_an_disable_aneg(phydev); +} + +static int aqr105_config_aneg(struct phy_device *phydev) +{ + bool changed = false; + u16 reg; + int ret; + + ret = aqr_set_mdix(phydev, phydev->mdix_ctrl); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + if (phydev->autoneg == AUTONEG_DISABLE) + return aqr105_setup_forced(phydev); + + ret = genphy_c45_an_config_aneg(phydev); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + /* Clause 45 has no standardized support for 1000BaseT, therefore + * use vendor registers for this mode. + */ + reg = 0; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_1000BASET_FULL; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_1000BASET_HALF; + + /* Handle the case when the 2.5G and 5G speeds are not advertised */ + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_2500BASET_FULL; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + phydev->advertising)) + reg |= MDIO_AN_VEND_PROV_5000BASET_FULL; + + ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV, + MDIO_AN_VEND_PROV_1000BASET_HALF | + MDIO_AN_VEND_PROV_1000BASET_FULL | + MDIO_AN_VEND_PROV_2500BASET_FULL | + MDIO_AN_VEND_PROV_5000BASET_FULL, reg); + if (ret < 0) + return ret; + if (ret > 0) + changed = true; + + return genphy_c45_check_and_restart_aneg(phydev, changed); +} + +static int aqr105_read_rate(struct phy_device *phydev) +{ + int val; + + val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_TX_VEND_STATUS1); + if (val < 0) + return val; + + if (val & MDIO_AN_TX_VEND_STATUS1_FULL_DUPLEX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + switch (FIELD_GET(MDIO_AN_TX_VEND_STATUS1_RATE_MASK, val)) { + case MDIO_AN_TX_VEND_STATUS1_10BASET: + phydev->speed = SPEED_10; + break; + case MDIO_AN_TX_VEND_STATUS1_100BASETX: + phydev->speed = SPEED_100; + break; + case MDIO_AN_TX_VEND_STATUS1_1000BASET: + phydev->speed = SPEED_1000; + break; + case MDIO_AN_TX_VEND_STATUS1_2500BASET: + phydev->speed = SPEED_2500; + break; + case MDIO_AN_TX_VEND_STATUS1_5000BASET: + phydev->speed = SPEED_5000; + break; + case MDIO_AN_TX_VEND_STATUS1_10GBASET: + phydev->speed = SPEED_10000; + break; + default: + phydev->speed = SPEED_UNKNOWN; + } + + return 0; +} + +static int aqr105_read_status(struct phy_device *phydev) +{ + int ret; + int val; + + ret = aqr_read_status(phydev); + if (ret) + return ret; + + if (!phydev->link || phydev->autoneg == AUTONEG_DISABLE) + return 0; + + /** + * The status register is not immediately correct on line side link up. + * Poll periodically until it reflects the correct ON state. + * Only return fail for read error, timeout defaults to OFF state. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_PHYXS, + MDIO_PHYXS_VEND_IF_STATUS, val, + (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val) != + MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF), + AQR107_OP_IN_PROG_SLEEP, + AQR107_OP_IN_PROG_TIMEOUT, false); + if (ret && ret != -ETIMEDOUT) + return ret; + + switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) { + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR: + phydev->interface = PHY_INTERFACE_MODE_10GKR; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KX: + phydev->interface = PHY_INTERFACE_MODE_1000BASEKX; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI: + phydev->interface = PHY_INTERFACE_MODE_10GBASER; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII: + phydev->interface = PHY_INTERFACE_MODE_USXGMII; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XAUI: + phydev->interface = PHY_INTERFACE_MODE_XAUI; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII: + phydev->interface = PHY_INTERFACE_MODE_SGMII; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_RXAUI: + phydev->interface = PHY_INTERFACE_MODE_RXAUI; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII: + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + break; + case MDIO_PHYXS_VEND_IF_STATUS_TYPE_OFF: + default: + phydev->link = false; + phydev->interface = PHY_INTERFACE_MODE_NA; + break; + } + + /* Read rate from vendor register */ + return aqr105_read_rate(phydev); +} + static int aqr107_read_rate(struct phy_device *phydev) { u32 config_reg; @@ -911,11 +1144,13 @@ static struct phy_driver aqr_driver[] = { { PHY_ID_MATCH_MODEL(PHY_ID_AQR105), .name = "Aquantia AQR105", - .config_aneg = aqr_config_aneg, + .get_features = aqr105_get_features, .probe = aqr107_probe, + .config_init = aqr107_config_init, + .config_aneg = aqr105_config_aneg, .config_intr = aqr_config_intr, .handle_interrupt = aqr_handle_interrupt, - .read_status = aqr_read_status, + .read_status = aqr105_read_status, .suspend = aqr107_suspend, .resume = aqr107_resume, }, -- 2.51.0 From 25b6a6d29d4082f6ac231c056ac321a996eb55c9 Mon Sep 17 00:00:00 2001 From: Hans-Frieder Vogt Date: Sat, 22 Mar 2025 11:45:56 +0100 Subject: [PATCH 03/16] net: tn40xx: create swnode for mdio and aqr105 phy and add to mdiobus In case of an AQR105-based device, create a software node for the mdio function, with a child node for the Aquantia AQR105 PHY, providing a firmware-name (and a bit more, which may be used for future checks) to allow the PHY to load a MAC specific firmware from the file system. The name of the PHY software node follows the naming convention suggested in the patch for the mdiobus_scan function (in the same patch series). Signed-off-by: Hans-Frieder Vogt Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322-tn9510-v3a-v7-5-672a9a3d8628@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/tehuti/tn40.c | 5 +- drivers/net/ethernet/tehuti/tn40.h | 33 ++++++++++ drivers/net/ethernet/tehuti/tn40_mdio.c | 82 ++++++++++++++++++++++++- 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/tehuti/tn40.c b/drivers/net/ethernet/tehuti/tn40.c index 259bdac24cf2..a4dd04fc6d89 100644 --- a/drivers/net/ethernet/tehuti/tn40.c +++ b/drivers/net/ethernet/tehuti/tn40.c @@ -1778,7 +1778,7 @@ static int tn40_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = tn40_phy_register(priv); if (ret) { dev_err(&pdev->dev, "failed to set up PHY.\n"); - goto err_free_irq; + goto err_cleanup_swnodes; } ret = tn40_priv_init(priv); @@ -1795,6 +1795,8 @@ static int tn40_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_unregister_phydev: tn40_phy_unregister(priv); +err_cleanup_swnodes: + tn40_swnodes_cleanup(priv); err_free_irq: pci_free_irq_vectors(pdev); err_unset_drvdata: @@ -1816,6 +1818,7 @@ static void tn40_remove(struct pci_dev *pdev) unregister_netdev(ndev); tn40_phy_unregister(priv); + tn40_swnodes_cleanup(priv); pci_free_irq_vectors(priv->pdev); pci_set_drvdata(pdev, NULL); iounmap(priv->regs); diff --git a/drivers/net/ethernet/tehuti/tn40.h b/drivers/net/ethernet/tehuti/tn40.h index 490781fe5120..25da8686d469 100644 --- a/drivers/net/ethernet/tehuti/tn40.h +++ b/drivers/net/ethernet/tehuti/tn40.h @@ -4,10 +4,13 @@ #ifndef _TN40_H_ #define _TN40_H_ +#include #include "tn40_regs.h" #define TN40_DRV_NAME "tn40xx" +#define PCI_DEVICE_ID_TEHUTI_TN9510 0x4025 + #define TN40_MDIO_SPEED_1MHZ (1) #define TN40_MDIO_SPEED_6MHZ (6) @@ -102,10 +105,39 @@ struct tn40_txdb { int size; /* Number of elements in the db */ }; +#define NODE_PROP(_NAME, _PROP) ( \ + (const struct software_node) { \ + .name = _NAME, \ + .properties = _PROP, \ + }) + +#define NODE_PAR_PROP(_NAME, _PAR, _PROP) ( \ + (const struct software_node) { \ + .name = _NAME, \ + .parent = _PAR, \ + .properties = _PROP, \ + }) + +enum tn40_swnodes { + SWNODE_MDIO, + SWNODE_PHY, + SWNODE_MAX +}; + +struct tn40_nodes { + char phy_name[32]; + char mdio_name[32]; + struct property_entry phy_props[3]; + struct software_node swnodes[SWNODE_MAX]; + const struct software_node *group[SWNODE_MAX + 1]; +}; + struct tn40_priv { struct net_device *ndev; struct pci_dev *pdev; + struct tn40_nodes nodes; + struct napi_struct napi; /* RX FIFOs: 1 for data (full) descs, and 2 for free descs */ struct tn40_rxd_fifo rxd_fifo0; @@ -225,6 +257,7 @@ static inline void tn40_write_reg(struct tn40_priv *priv, u32 reg, u32 val) int tn40_set_link_speed(struct tn40_priv *priv, u32 speed); +void tn40_swnodes_cleanup(struct tn40_priv *priv); int tn40_mdiobus_init(struct tn40_priv *priv); int tn40_phy_register(struct tn40_priv *priv); diff --git a/drivers/net/ethernet/tehuti/tn40_mdio.c b/drivers/net/ethernet/tehuti/tn40_mdio.c index af18615d64a8..5bb0cbc87d06 100644 --- a/drivers/net/ethernet/tehuti/tn40_mdio.c +++ b/drivers/net/ethernet/tehuti/tn40_mdio.c @@ -14,6 +14,8 @@ (FIELD_PREP(TN40_MDIO_PRTAD_MASK, (port)))) #define TN40_MDIO_CMD_READ BIT(15) +#define AQR105_FIRMWARE "tehuti/aqr105-tn40xx.cld" + static void tn40_mdio_set_speed(struct tn40_priv *priv, u32 speed) { void __iomem *regs = priv->regs; @@ -111,6 +113,56 @@ static int tn40_mdio_write_c45(struct mii_bus *mii_bus, int addr, int devnum, return tn40_mdio_write(mii_bus->priv, addr, devnum, regnum, val); } +/* registers an mdio node and an aqr105 PHY at address 1 + * tn40_mdio-%id { + * ethernet-phy@1 { + * compatible = "ethernet-phy-id03a1.b4a3"; + * reg = <1>; + * firmware-name = AQR105_FIRMWARE; + * }; + * }; + */ +static int tn40_swnodes_register(struct tn40_priv *priv) +{ + struct tn40_nodes *nodes = &priv->nodes; + struct pci_dev *pdev = priv->pdev; + struct software_node *swnodes; + u32 id; + + id = pci_dev_id(pdev); + + snprintf(nodes->phy_name, sizeof(nodes->phy_name), "ethernet-phy@1"); + snprintf(nodes->mdio_name, sizeof(nodes->mdio_name), "tn40_mdio-%x", + id); + + swnodes = nodes->swnodes; + + swnodes[SWNODE_MDIO] = NODE_PROP(nodes->mdio_name, NULL); + + nodes->phy_props[0] = PROPERTY_ENTRY_STRING("compatible", + "ethernet-phy-id03a1.b4a3"); + nodes->phy_props[1] = PROPERTY_ENTRY_U32("reg", 1); + nodes->phy_props[2] = PROPERTY_ENTRY_STRING("firmware-name", + AQR105_FIRMWARE); + swnodes[SWNODE_PHY] = NODE_PAR_PROP(nodes->phy_name, + &swnodes[SWNODE_MDIO], + nodes->phy_props); + + nodes->group[SWNODE_PHY] = &swnodes[SWNODE_PHY]; + nodes->group[SWNODE_MDIO] = &swnodes[SWNODE_MDIO]; + return software_node_register_node_group(nodes->group); +} + +void tn40_swnodes_cleanup(struct tn40_priv *priv) +{ + /* cleanup of swnodes is only needed for AQR105-based cards */ + if (priv->pdev->device == PCI_DEVICE_ID_TEHUTI_TN9510) { + fwnode_handle_put(dev_fwnode(&priv->mdio->dev)); + device_remove_software_node(&priv->mdio->dev); + software_node_unregister_node_group(priv->nodes.group); + } +} + int tn40_mdiobus_init(struct tn40_priv *priv) { struct pci_dev *pdev = priv->pdev; @@ -129,14 +181,40 @@ int tn40_mdiobus_init(struct tn40_priv *priv) bus->read_c45 = tn40_mdio_read_c45; bus->write_c45 = tn40_mdio_write_c45; + priv->mdio = bus; + + /* provide swnodes for AQR105-based cards only */ + if (pdev->device == PCI_DEVICE_ID_TEHUTI_TN9510) { + ret = tn40_swnodes_register(priv); + if (ret) { + pr_err("swnodes failed\n"); + return ret; + } + + ret = device_add_software_node(&bus->dev, + priv->nodes.group[SWNODE_MDIO]); + if (ret) { + dev_err(&pdev->dev, + "device_add_software_node failed: %d\n", ret); + goto err_swnodes_unregister; + } + } ret = devm_mdiobus_register(&pdev->dev, bus); if (ret) { dev_err(&pdev->dev, "failed to register mdiobus %d %u %u\n", ret, bus->state, MDIOBUS_UNREGISTERED); - return ret; + goto err_swnodes_cleanup; } tn40_mdio_set_speed(priv, TN40_MDIO_SPEED_6MHZ); - priv->mdio = bus; return 0; + +err_swnodes_unregister: + software_node_unregister_node_group(priv->nodes.group); + return ret; +err_swnodes_cleanup: + tn40_swnodes_cleanup(priv); + return ret; } + +MODULE_FIRMWARE(AQR105_FIRMWARE); -- 2.51.0 From 07cfe3a557564ad7d982625ce1f5d8b05a016f68 Mon Sep 17 00:00:00 2001 From: Hans-Frieder Vogt Date: Sat, 22 Mar 2025 11:45:57 +0100 Subject: [PATCH 04/16] net: tn40xx: prepare tn40xx driver to find phy of the TN9510 card Prepare the tn40xx driver to load for Tehuti TN9510 cards, which require bit 3 in the register TN40_REG_MDIO_CMD_STAT to be set. The function of bit 3 is unclear, but may have something to do with the length of the preamble in the MDIO communication. If bit 3 is not set, the PHY will not be found when performing a scan for PHYs. Use the available tn40_mdio_set_speed function which includes setting bit 3. Just move the function to before the devm_mdio_register function, which scans the mdio bus for PHYs. Signed-off-by: Hans-Frieder Vogt Reviewed-by: Andrew Lunn Reviewed-by: FUJITA Tomonori Link: https://patch.msgid.link/20250322-tn9510-v3a-v7-6-672a9a3d8628@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/tehuti/tn40_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/tehuti/tn40_mdio.c b/drivers/net/ethernet/tehuti/tn40_mdio.c index 5bb0cbc87d06..fb1a4a2e4dbc 100644 --- a/drivers/net/ethernet/tehuti/tn40_mdio.c +++ b/drivers/net/ethernet/tehuti/tn40_mdio.c @@ -200,13 +200,13 @@ int tn40_mdiobus_init(struct tn40_priv *priv) } } + tn40_mdio_set_speed(priv, TN40_MDIO_SPEED_6MHZ); ret = devm_mdiobus_register(&pdev->dev, bus); if (ret) { dev_err(&pdev->dev, "failed to register mdiobus %d %u %u\n", ret, bus->state, MDIOBUS_UNREGISTERED); goto err_swnodes_cleanup; } - tn40_mdio_set_speed(priv, TN40_MDIO_SPEED_6MHZ); return 0; err_swnodes_unregister: -- 2.51.0 From 53377b5c2952097527b01ce2f1d9a9332f042f70 Mon Sep 17 00:00:00 2001 From: Hans-Frieder Vogt Date: Sat, 22 Mar 2025 11:45:58 +0100 Subject: [PATCH 05/16] net: tn40xx: add pci-id of the aqr105-based Tehuti TN4010 cards Add the PCI-ID of the AQR105-based Tehuti TN4010 cards to allow loading of the tn40xx driver on these cards. Here, I chose the detailed definition with the subvendor ID similar to the QT2025 cards with the PCI-ID TEHUTI:0x4022, because there is a card with an AQ2104 hiding amongst the AQR105 cards, and they all come with the same PCI-ID (TEHUTI:0x4025). But the AQ2104 is currently not supported. Signed-off-by: Hans-Frieder Vogt Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250322-tn9510-v3a-v7-7-672a9a3d8628@gmx.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/tehuti/tn40.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/tehuti/tn40.c b/drivers/net/ethernet/tehuti/tn40.c index a4dd04fc6d89..558b791a97ed 100644 --- a/drivers/net/ethernet/tehuti/tn40.c +++ b/drivers/net/ethernet/tehuti/tn40.c @@ -1835,6 +1835,10 @@ static const struct pci_device_id tn40_id_table[] = { PCI_VENDOR_ID_ASUSTEK, 0x8709) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_TEHUTI, 0x4022, PCI_VENDOR_ID_EDIMAX, 0x8103) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_TEHUTI, PCI_DEVICE_ID_TEHUTI_TN9510, + PCI_VENDOR_ID_TEHUTI, 0x3015) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_TEHUTI, PCI_DEVICE_ID_TEHUTI_TN9510, + PCI_VENDOR_ID_EDIMAX, 0x8102) }, { } }; -- 2.51.0 From bf2986fcf82a449441f9ee4335df19be19e83970 Mon Sep 17 00:00:00 2001 From: Minjoong Kim Date: Sat, 22 Mar 2025 10:52:00 +0000 Subject: [PATCH 06/16] atm: Fix NULL pointer dereference When MPOA_cache_impos_rcvd() receives the msg, it can trigger Null Pointer Dereference Vulnerability if both entry and holding_time are NULL. Because there is only for the situation where entry is NULL and holding_time exists, it can be passed when both entry and holding_time are NULL. If these are NULL, the entry will be passd to eg_cache_put() as parameter and it is referenced by entry->use code in it. kasan log: [ 3.316691] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006:I [ 3.317568] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] [ 3.318188] CPU: 3 UID: 0 PID: 79 Comm: ex Not tainted 6.14.0-rc2 #102 [ 3.318601] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 3.319298] RIP: 0010:eg_cache_remove_entry+0xa5/0x470 [ 3.319677] Code: c1 f7 6e fd 48 c7 c7 00 7e 38 b2 e8 95 64 54 fd 48 c7 c7 40 7e 38 b2 48 89 ee e80 [ 3.321220] RSP: 0018:ffff88800583f8a8 EFLAGS: 00010006 [ 3.321596] RAX: 0000000000000006 RBX: ffff888005989000 RCX: ffffffffaecc2d8e [ 3.322112] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000030 [ 3.322643] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff6558b88 [ 3.323181] R10: 0000000000000003 R11: 203a207972746e65 R12: 1ffff11000b07f15 [ 3.323707] R13: dffffc0000000000 R14: ffff888005989000 R15: ffff888005989068 [ 3.324185] FS: 000000001b6313c0(0000) GS:ffff88806d380000(0000) knlGS:0000000000000000 [ 3.325042] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.325545] CR2: 00000000004b4b40 CR3: 000000000248e000 CR4: 00000000000006f0 [ 3.326430] Call Trace: [ 3.326725] [ 3.326927] ? die_addr+0x3c/0xa0 [ 3.327330] ? exc_general_protection+0x161/0x2a0 [ 3.327662] ? asm_exc_general_protection+0x26/0x30 [ 3.328214] ? vprintk_emit+0x15e/0x420 [ 3.328543] ? eg_cache_remove_entry+0xa5/0x470 [ 3.328910] ? eg_cache_remove_entry+0x9a/0x470 [ 3.329294] ? __pfx_eg_cache_remove_entry+0x10/0x10 [ 3.329664] ? console_unlock+0x107/0x1d0 [ 3.329946] ? __pfx_console_unlock+0x10/0x10 [ 3.330283] ? do_syscall_64+0xa6/0x1a0 [ 3.330584] ? entry_SYSCALL_64_after_hwframe+0x47/0x7f [ 3.331090] ? __pfx_prb_read_valid+0x10/0x10 [ 3.331395] ? down_trylock+0x52/0x80 [ 3.331703] ? vprintk_emit+0x15e/0x420 [ 3.331986] ? __pfx_vprintk_emit+0x10/0x10 [ 3.332279] ? down_trylock+0x52/0x80 [ 3.332527] ? _printk+0xbf/0x100 [ 3.332762] ? __pfx__printk+0x10/0x10 [ 3.333007] ? _raw_write_lock_irq+0x81/0xe0 [ 3.333284] ? __pfx__raw_write_lock_irq+0x10/0x10 [ 3.333614] msg_from_mpoad+0x1185/0x2750 [ 3.333893] ? __build_skb_around+0x27b/0x3a0 [ 3.334183] ? __pfx_msg_from_mpoad+0x10/0x10 [ 3.334501] ? __alloc_skb+0x1c0/0x310 [ 3.334809] ? __pfx___alloc_skb+0x10/0x10 [ 3.335283] ? _raw_spin_lock+0xe0/0xe0 [ 3.335632] ? finish_wait+0x8d/0x1e0 [ 3.335975] vcc_sendmsg+0x684/0xba0 [ 3.336250] ? __pfx_vcc_sendmsg+0x10/0x10 [ 3.336587] ? __pfx_autoremove_wake_function+0x10/0x10 [ 3.337056] ? fdget+0x176/0x3e0 [ 3.337348] __sys_sendto+0x4a2/0x510 [ 3.337663] ? __pfx___sys_sendto+0x10/0x10 [ 3.337969] ? ioctl_has_perm.constprop.0.isra.0+0x284/0x400 [ 3.338364] ? sock_ioctl+0x1bb/0x5a0 [ 3.338653] ? __rseq_handle_notify_resume+0x825/0xd20 [ 3.339017] ? __pfx_sock_ioctl+0x10/0x10 [ 3.339316] ? __pfx___rseq_handle_notify_resume+0x10/0x10 [ 3.339727] ? selinux_file_ioctl+0xa4/0x260 [ 3.340166] __x64_sys_sendto+0xe0/0x1c0 [ 3.340526] ? syscall_exit_to_user_mode+0x123/0x140 [ 3.340898] do_syscall_64+0xa6/0x1a0 [ 3.341170] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 3.341533] RIP: 0033:0x44a380 [ 3.341757] Code: 0f 1f 84 00 00 00 00 00 66 90 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c00 [ 3.343078] RSP: 002b:00007ffc1d404098 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 3.343631] RAX: ffffffffffffffda RBX: 00007ffc1d404458 RCX: 000000000044a380 [ 3.344306] RDX: 000000000000019c RSI: 00007ffc1d4040b0 RDI: 0000000000000003 [ 3.344833] RBP: 00007ffc1d404260 R08: 0000000000000000 R09: 0000000000000000 [ 3.345381] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 3.346015] R13: 00007ffc1d404448 R14: 00000000004c17d0 R15: 0000000000000001 [ 3.346503] [ 3.346679] Modules linked in: [ 3.346956] ---[ end trace 0000000000000000 ]--- [ 3.347315] RIP: 0010:eg_cache_remove_entry+0xa5/0x470 [ 3.347737] Code: c1 f7 6e fd 48 c7 c7 00 7e 38 b2 e8 95 64 54 fd 48 c7 c7 40 7e 38 b2 48 89 ee e80 [ 3.349157] RSP: 0018:ffff88800583f8a8 EFLAGS: 00010006 [ 3.349517] RAX: 0000000000000006 RBX: ffff888005989000 RCX: ffffffffaecc2d8e [ 3.350103] RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000030 [ 3.350610] RBP: 0000000000000000 R08: 0000000000000000 R09: fffffbfff6558b88 [ 3.351246] R10: 0000000000000003 R11: 203a207972746e65 R12: 1ffff11000b07f15 [ 3.351785] R13: dffffc0000000000 R14: ffff888005989000 R15: ffff888005989068 [ 3.352404] FS: 000000001b6313c0(0000) GS:ffff88806d380000(0000) knlGS:0000000000000000 [ 3.353099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.353544] CR2: 00000000004b4b40 CR3: 000000000248e000 CR4: 00000000000006f0 [ 3.354072] note: ex[79] exited with irqs disabled [ 3.354458] note: ex[79] exited with preempt_count 1 Signed-off-by: Minjoong Kim Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250322105200.14981-1-pwn9uin@gmail.com Signed-off-by: Jakub Kicinski --- net/atm/mpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 324e3ab96bb3..12da0269275c 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1314,6 +1314,8 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg, holding_time = msg->content.eg_info.holding_time; dprintk("(%s) entry = %p, holding_time = %u\n", mpc->dev->name, entry, holding_time); + if (entry == NULL && !holding_time) + return; if (entry == NULL && holding_time) { entry = mpc->eg_ops->add_entry(msg, mpc); mpc->eg_ops->put(entry); -- 2.51.0 From a44940d094afa2e04b5b164b1e136fc18bcb4a2d Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 24 Mar 2025 18:32:34 +0800 Subject: [PATCH 07/16] net: libwx: fix Tx descriptor content for some tunnel packets The length of skb header was incorrectly calculated when transmit a tunnel packet with outer IPv6 extension header, or a IP over IP packet which has inner IPv6 header. Thus the correct Tx context descriptor cannot be composed, resulting in Tx ring hang. Fixes: 3403960cdf86 ("net: wangxun: libwx add tx offload functions") Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20250324103235.823096-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 61 +++++++++++++-------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 2b3d6586f44a..9294a9d8c554 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1082,26 +1082,6 @@ static void wx_tx_ctxtdesc(struct wx_ring *tx_ring, u32 vlan_macip_lens, context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } -static void wx_get_ipv6_proto(struct sk_buff *skb, int offset, u8 *nexthdr) -{ - struct ipv6hdr *hdr = (struct ipv6hdr *)(skb->data + offset); - - *nexthdr = hdr->nexthdr; - offset += sizeof(struct ipv6hdr); - while (ipv6_ext_hdr(*nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - if (*nexthdr == NEXTHDR_NONE) - return; - hp = skb_header_pointer(skb, offset, sizeof(_hdr), &_hdr); - if (!hp) - return; - if (*nexthdr == NEXTHDR_FRAGMENT) - break; - *nexthdr = hp->nexthdr; - } -} - union network_header { struct iphdr *ipv4; struct ipv6hdr *ipv6; @@ -1112,6 +1092,8 @@ static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) { u8 tun_prot = 0, l4_prot = 0, ptype = 0; struct sk_buff *skb = first->skb; + unsigned char *exthdr, *l4_hdr; + __be16 frag_off; if (skb->encapsulation) { union network_header hdr; @@ -1122,14 +1104,18 @@ static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) ptype = WX_PTYPE_TUN_IPV4; break; case htons(ETH_P_IPV6): - wx_get_ipv6_proto(skb, skb_network_offset(skb), &tun_prot); + l4_hdr = skb_transport_header(skb); + exthdr = skb_network_header(skb) + sizeof(struct ipv6hdr); + tun_prot = ipv6_hdr(skb)->nexthdr; + if (l4_hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &tun_prot, &frag_off); ptype = WX_PTYPE_TUN_IPV6; break; default: return ptype; } - if (tun_prot == IPPROTO_IPIP) { + if (tun_prot == IPPROTO_IPIP || tun_prot == IPPROTO_IPV6) { hdr.raw = (void *)inner_ip_hdr(skb); ptype |= WX_PTYPE_PKT_IPIP; } else if (tun_prot == IPPROTO_UDP) { @@ -1166,7 +1152,11 @@ static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) l4_prot = hdr.ipv4->protocol; break; case 6: - wx_get_ipv6_proto(skb, skb_inner_network_offset(skb), &l4_prot); + l4_hdr = skb_inner_transport_header(skb); + exthdr = skb_inner_network_header(skb) + sizeof(struct ipv6hdr); + l4_prot = inner_ipv6_hdr(skb)->nexthdr; + if (l4_hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_prot, &frag_off); ptype |= WX_PTYPE_PKT_IPV6; break; default: @@ -1179,7 +1169,11 @@ static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) ptype = WX_PTYPE_PKT_IP; break; case htons(ETH_P_IPV6): - wx_get_ipv6_proto(skb, skb_network_offset(skb), &l4_prot); + l4_hdr = skb_transport_header(skb); + exthdr = skb_network_header(skb) + sizeof(struct ipv6hdr); + l4_prot = ipv6_hdr(skb)->nexthdr; + if (l4_hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_prot, &frag_off); ptype = WX_PTYPE_PKT_IP | WX_PTYPE_PKT_IPV6; break; default: @@ -1269,13 +1263,20 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ if (enc) { + unsigned char *exthdr, *l4_hdr; + __be16 frag_off; + switch (first->protocol) { case htons(ETH_P_IP): tun_prot = ip_hdr(skb)->protocol; first->tx_flags |= WX_TX_FLAGS_OUTER_IPV4; break; case htons(ETH_P_IPV6): + l4_hdr = skb_transport_header(skb); + exthdr = skb_network_header(skb) + sizeof(struct ipv6hdr); tun_prot = ipv6_hdr(skb)->nexthdr; + if (l4_hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &tun_prot, &frag_off); break; default: break; @@ -1298,6 +1299,7 @@ static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, WX_TXD_TUNNEL_LEN_SHIFT); break; case IPPROTO_IPIP: + case IPPROTO_IPV6: tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - (char *)ip_hdr(skb)) >> 2) << WX_TXD_OUTER_IPLEN_SHIFT; @@ -1341,6 +1343,8 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, vlan_macip_lens = skb_network_offset(skb) << WX_TXD_MACLEN_SHIFT; } else { + unsigned char *exthdr, *l4_hdr; + __be16 frag_off; u8 l4_prot = 0; union { struct iphdr *ipv4; @@ -1362,7 +1366,12 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, tun_prot = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6): + l4_hdr = skb_transport_header(skb); + exthdr = skb_network_header(skb) + sizeof(struct ipv6hdr); tun_prot = ipv6_hdr(skb)->nexthdr; + if (l4_hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &tun_prot, &frag_off); break; default: return; @@ -1386,6 +1395,7 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, WX_TXD_TUNNEL_LEN_SHIFT); break; case IPPROTO_IPIP: + case IPPROTO_IPV6: tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - (char *)ip_hdr(skb)) >> 2) << WX_TXD_OUTER_IPLEN_SHIFT; @@ -1408,7 +1418,10 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, break; case 6: vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1; + exthdr = network_hdr.raw + sizeof(struct ipv6hdr); l4_prot = network_hdr.ipv6->nexthdr; + if (transport_hdr.raw != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_prot, &frag_off); break; default: break; -- 2.51.0 From c7d82913d5f9e97860772ee4051eaa66b56a6273 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 24 Mar 2025 18:32:35 +0800 Subject: [PATCH 08/16] net: libwx: fix Tx L4 checksum The hardware only supports L4 checksum offload for TCP/UDP/SCTP protocol. There was a bug to set Tx checksum flag for the other protocol that results in Tx ring hang. Fix to compute software checksum for these packets. Fixes: 3403960cdf86 ("net: wangxun: libwx add tx offload functions") Signed-off-by: Jiawen Wu Link: https://patch.msgid.link/20250324103235.823096-2-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 9294a9d8c554..497abf2723a5 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -1337,6 +1337,7 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, u8 tun_prot = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { +csum_failed: if (!(first->tx_flags & WX_TX_FLAGS_HW_VLAN) && !(first->tx_flags & WX_TX_FLAGS_CC)) return; @@ -1441,7 +1442,8 @@ static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, WX_TXD_L4LEN_SHIFT; break; default: - break; + skb_checksum_help(skb); + goto csum_failed; } /* update TX checksum flag */ -- 2.51.0 From 2c5ac026fd1421cf6a78770b48570b2563ef40b7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Mar 2025 16:39:29 +0200 Subject: [PATCH 09/16] =?utf8?q?net:=20phy:=20Introduce=20PHY=5FID=5FSIZE?= =?utf8?q?=20=E2=80=94=20minimum=20size=20for=20PHY=20ID=20string?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The PHY_ID_FMT defines the format specifier "%s:%02x" to form the PHY ID string, where the maximum of the first part is defined in MII_BUS_ID_SIZE, including NUL terminator, and the second part is implied to be 3 as the maximum address is limited to 32, meaning that 2 hex digits is more than enough, plus ':' (colon) delimiter. However, some drivers, which are using PHY_ID_FMT, customise buffer size and do that incorrectly. Introduce a new constant PHY_ID_SIZE that makes the minimum required size explicit, so drivers are encouraged to use it. Suggested-by: "Russell King (Oracle)" Signed-off-by: Andy Shevchenko Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20250324144751.1271761-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index bfdbdc538910..a2bfae80c449 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -292,6 +292,7 @@ static inline long rgmii_clock(int speed) /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ #define PHY_ID_FMT "%s:%02x" +#define PHY_ID_SIZE (MII_BUS_ID_SIZE + 3) #define MII_BUS_ID_SIZE 61 -- 2.51.0 From 61997271a5a7dcd5a7dd5f0d81e4cb045e1db9ef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 24 Mar 2025 16:39:30 +0200 Subject: [PATCH 10/16] net: usb: asix: ax88772: Increase phy_name size MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit GCC compiler (Debian 14.2.0-17) is not happy about printing into a too short buffer (when build with `make W=1`): drivers/net/usb/ax88172a.c:311:9: note: ‘snprintf’ output between 4 and 66 bytes into a destination of size 20 Indeed, the buffer size is chosen based on some assumptions, while in general the assigned name might not fit. Increase the buffer size to cover the minimum required one. With that, change snprintf() to use sizeof() instead of the hard coded value. While at it, make sure that the PHY address is not bigger than the allowed maximum. Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20250324144751.1271761-3-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/ax88172a.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index e47bb125048d..f613e4bc68c8 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -18,8 +18,8 @@ struct ax88172a_private { struct mii_bus *mdio; struct phy_device *phydev; - char phy_name[20]; - u16 phy_addr; + char phy_name[PHY_ID_SIZE]; + u8 phy_addr; u16 oldmode; int use_embdphy; struct asix_rx_fixup_info rx_fixup_info; @@ -210,7 +210,11 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) ret = asix_read_phy_addr(dev, priv->use_embdphy); if (ret < 0) goto free; - + if (ret >= PHY_MAX_ADDR) { + netdev_err(dev->net, "Invalid PHY address %#x\n", ret); + ret = -ENODEV; + goto free; + } priv->phy_addr = ret; ax88172a_reset_phy(dev, priv->use_embdphy); @@ -308,7 +312,7 @@ static int ax88172a_reset(struct usbnet *dev) rx_ctl); /* Connect to PHY */ - snprintf(priv->phy_name, 20, PHY_ID_FMT, + snprintf(priv->phy_name, sizeof(priv->phy_name), PHY_ID_FMT, priv->mdio->id, priv->phy_addr); priv->phydev = phy_connect(dev->net, priv->phy_name, -- 2.51.0 From 70facbf978ac90c6da17a3de2a8dd111b06f1bac Mon Sep 17 00:00:00 2001 From: Daniel Hsu Date: Tue, 25 Mar 2025 16:10:08 +0800 Subject: [PATCH 11/16] mctp: Fix incorrect tx flow invalidation condition in mctp-i2c Previously, the condition for invalidating the tx flow in mctp_i2c_invalidate_tx_flow() checked if `rc` was nonzero. However, this could incorrectly trigger the invalidation even when `rc > 0` was returned as a success status. This patch updates the condition to explicitly check for `rc < 0`, ensuring that only error cases trigger the invalidation. Signed-off-by: Daniel Hsu Reviewed-by: Jeremy Kerr Signed-off-by: David S. Miller --- drivers/net/mctp/mctp-i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index d74d47dd6e04..f782d93f826e 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -537,7 +537,7 @@ static void mctp_i2c_xmit(struct mctp_i2c_dev *midev, struct sk_buff *skb) rc = __i2c_transfer(midev->adapter, &msg, 1); /* on tx errors, the flow can no longer be considered valid */ - if (rc) + if (rc < 0) mctp_i2c_invalidate_tx_flow(midev, skb); break; -- 2.51.0 From 3ece3e8e5976c49c3f887e5923f998eabd54ff40 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Mar 2025 13:05:35 +0100 Subject: [PATCH 12/16] PCI/MSI: Handle the NOMASK flag correctly for all PCI/MSI backends The conversion of the XEN specific global variable pci_msi_ignore_mask to a MSI domain flag, missed the facts that: 1) Legacy architectures do not provide a interrupt domain 2) Parent MSI domains do not necessarily have a domain info attached Both cases result in an unconditional NULL pointer dereference. This was unfortunatly missed in review and testing revealed it late. Cure this by using the existing pci_msi_domain_supports() helper, which handles all possible cases correctly. Fixes: c3164d2e0d18 ("PCI/MSI: Convert pci_msi_ignore_mask to per MSI domain flag") Reported-by: Daniel Gomez Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Tested-by: Marek Szyprowski Tested-by: Borislav Petkov Tested-by: Daniel Gomez Link: https://lore.kernel.org/all/87iknwyp2o.ffs@tglx Closes: https://lore.kernel.org/all/qn7fzggcj6qe6r6gdbwcz23pzdz2jx64aldccmsuheabhmjgrt@tawf5nfwuvw7 --- drivers/pci/msi/msi.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index d74162880d83..7058d59e7c5f 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -285,8 +285,6 @@ static void pci_msi_set_enable(struct pci_dev *dev, int enable) static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) { - const struct irq_domain *d = dev_get_msi_domain(&dev->dev); - const struct msi_domain_info *info = d->host_data; struct msi_desc desc; u16 control; @@ -297,7 +295,7 @@ static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, /* Lies, damned lies, and MSIs */ if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) control |= PCI_MSI_FLAGS_MASKBIT; - if (info->flags & MSI_FLAG_NO_MASK) + if (pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY)) control &= ~PCI_MSI_FLAGS_MASKBIT; desc.nvec_used = nvec; @@ -604,20 +602,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, */ void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) { - const struct irq_domain *d = dev_get_msi_domain(&dev->dev); - const struct msi_domain_info *info = d->host_data; - desc->nvec_used = 1; desc->pci.msi_attrib.is_msix = 1; desc->pci.msi_attrib.is_64 = 1; desc->pci.msi_attrib.default_irq = dev->irq; desc->pci.mask_base = dev->msix_base; - desc->pci.msi_attrib.can_mask = !(info->flags & MSI_FLAG_NO_MASK) && - !desc->pci.msi_attrib.is_virtual; - if (desc->pci.msi_attrib.can_mask) { + + if (!pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY) && + !desc->pci.msi_attrib.is_virtual) { void __iomem *addr = pci_msix_desc_addr(desc); + desc->pci.msi_attrib.can_mask = 1; desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); } } @@ -715,8 +711,6 @@ static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { - const struct irq_domain *d = dev_get_msi_domain(&dev->dev); - const struct msi_domain_info *info = d->host_data; int ret, tsize; u16 control; @@ -747,7 +741,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Disable INTX */ pci_intx_for_msi(dev, 0); - if (!(info->flags & MSI_FLAG_NO_MASK)) { + if (!pci_msi_domain_supports(dev, MSI_FLAG_NO_MASK, DENY_LEGACY)) { /* * Ensure that all table entries are masked to prevent * stale entries from firing in a crash kernel. -- 2.51.0 From 705094f6556d88540e1076e432cbca6b596421dd Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 26 Mar 2025 15:01:48 +1100 Subject: [PATCH 13/16] unix: fix up for "apparmor: add fine grained af_unix mediation" After merging the apparmor tree, today's linux-next build (x86_64 allmodconfig) failed like this: security/apparmor/af_unix.c: In function 'unix_state_double_lock': security/apparmor/af_unix.c:627:17: error: implicit declaration of function 'unix_state_lock'; did you mean 'unix_state_double_lock'? [-Wimplicit-function-declaration] 627 | unix_state_lock(sk1); | ^~~~~~~~~~~~~~~ | unix_state_double_lock security/apparmor/af_unix.c: In function 'unix_state_double_unlock': security/apparmor/af_unix.c:642:17: error: implicit declaration of function 'unix_state_unlock'; did you mean 'unix_state_double_lock'? [-Wimplicit-function-declaration] 642 | unix_state_unlock(sk1); | ^~~~~~~~~~~~~~~~~ | unix_state_double_lock Caused by commit c05e705812d1 ("apparmor: add fine grained af_unix mediation") interacting with commit 84960bf24031 ("af_unix: Move internal definitions to net/unix/.") from the net-next tree. Reviewed-by: Kuniyuki Iwashima Signed-off-by: Stephen Rothwell Link: https://patch.msgid.link/20250326150148.72d9138d@canb.auug.org.au Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 3 +++ net/unix/af_unix.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b588069ece7e..1af1841b7601 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -55,4 +55,7 @@ struct unix_sock { #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) + #endif diff --git a/net/unix/af_unix.h b/net/unix/af_unix.h index ed4aedc42813..59db179df9bb 100644 --- a/net/unix/af_unix.h +++ b/net/unix/af_unix.h @@ -8,9 +8,6 @@ #define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) - struct sock *unix_peer_get(struct sock *sk); struct unix_skb_parms { -- 2.51.0 From 23f00807619d15063d676218f36c5dfeda1eb420 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 25 Mar 2025 11:02:26 +0200 Subject: [PATCH 14/16] rtnetlink: Allocate vfinfo size for VF GUIDs when supported Commit 30aad41721e0 ("net/core: Add support for getting VF GUIDs") added support for getting VF port and node GUIDs in netlink ifinfo messages, but their size was not taken into consideration in the function that allocates the netlink message, causing the following warning when a netlink message is filled with many VF port and node GUIDs: # echo 64 > /sys/bus/pci/devices/0000\:08\:00.0/sriov_numvfs # ip link show dev ib0 RTNETLINK answers: Message too long Cannot send link get request: Message too long Kernel warning: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1930 at net/core/rtnetlink.c:4151 rtnl_getlink+0x586/0x5a0 Modules linked in: xt_conntrack xt_MASQUERADE nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay mlx5_ib macsec mlx5_core tls rpcrdma rdma_ucm ib_uverbs ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm iw_cm ib_ipoib fuse ib_cm ib_core CPU: 2 UID: 0 PID: 1930 Comm: ip Not tainted 6.14.0-rc2+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:rtnl_getlink+0x586/0x5a0 Code: cb 82 e8 3d af 0a 00 4d 85 ff 0f 84 08 ff ff ff 4c 89 ff 41 be ea ff ff ff e8 66 63 5b ff 49 c7 07 80 4f cb 82 e9 36 fc ff ff <0f> 0b e9 16 fe ff ff e8 de a0 56 00 66 66 2e 0f 1f 84 00 00 00 00 RSP: 0018:ffff888113557348 EFLAGS: 00010246 RAX: 00000000ffffffa6 RBX: ffff88817e87aa34 RCX: dffffc0000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: ffff88817e87afb8 RBP: 0000000000000009 R08: ffffffff821f44aa R09: 0000000000000000 R10: ffff8881260f79a8 R11: ffff88817e87af00 R12: ffff88817e87aa00 R13: ffffffff8563d300 R14: 00000000ffffffa6 R15: 00000000ffffffff FS: 00007f63a5dbf280(0000) GS:ffff88881ee00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f63a5ba4493 CR3: 00000001700fe002 CR4: 0000000000772eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0xa5/0x230 ? rtnl_getlink+0x586/0x5a0 ? report_bug+0x22d/0x240 ? handle_bug+0x53/0xa0 ? exc_invalid_op+0x14/0x50 ? asm_exc_invalid_op+0x16/0x20 ? skb_trim+0x6a/0x80 ? rtnl_getlink+0x586/0x5a0 ? __pfx_rtnl_getlink+0x10/0x10 ? rtnetlink_rcv_msg+0x1e5/0x860 ? __pfx___mutex_lock+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? __pfx_lock_acquire+0x10/0x10 ? stack_trace_save+0x90/0xd0 ? filter_irq_stacks+0x1d/0x70 ? kasan_save_stack+0x30/0x40 ? kasan_save_stack+0x20/0x40 ? kasan_save_track+0x10/0x30 rtnetlink_rcv_msg+0x21c/0x860 ? entry_SYSCALL_64_after_hwframe+0x76/0x7e ? __pfx_rtnetlink_rcv_msg+0x10/0x10 ? arch_stack_walk+0x9e/0xf0 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd5/0x410 ? rcu_is_watching+0x34/0x60 netlink_rcv_skb+0xe0/0x210 ? __pfx_rtnetlink_rcv_msg+0x10/0x10 ? __pfx_netlink_rcv_skb+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? __pfx___netlink_lookup+0x10/0x10 ? lock_release+0x62/0x200 ? netlink_deliver_tap+0xfd/0x290 ? rcu_is_watching+0x34/0x60 ? lock_release+0x62/0x200 ? netlink_deliver_tap+0x95/0x290 netlink_unicast+0x31f/0x480 ? __pfx_netlink_unicast+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd5/0x410 netlink_sendmsg+0x369/0x660 ? lock_release+0x62/0x200 ? __pfx_netlink_sendmsg+0x10/0x10 ? import_ubuf+0xb9/0xf0 ? __import_iovec+0x254/0x2b0 ? lock_release+0x62/0x200 ? __pfx_netlink_sendmsg+0x10/0x10 ____sys_sendmsg+0x559/0x5a0 ? __pfx_____sys_sendmsg+0x10/0x10 ? __pfx_copy_msghdr_from_user+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? do_read_fault+0x213/0x4a0 ? rcu_is_watching+0x34/0x60 ___sys_sendmsg+0xe4/0x150 ? __pfx____sys_sendmsg+0x10/0x10 ? do_fault+0x2cc/0x6f0 ? handle_pte_fault+0x2e3/0x3d0 ? __pfx_handle_pte_fault+0x10/0x10 ? preempt_count_sub+0x14/0xc0 ? __down_read_trylock+0x150/0x270 ? __handle_mm_fault+0x404/0x8e0 ? __pfx___handle_mm_fault+0x10/0x10 ? lock_release+0x62/0x200 ? __rcu_read_unlock+0x65/0x90 ? rcu_is_watching+0x34/0x60 __sys_sendmsg+0xd5/0x150 ? __pfx___sys_sendmsg+0x10/0x10 ? __up_read+0x192/0x480 ? lock_release+0x62/0x200 ? __rcu_read_unlock+0x65/0x90 ? rcu_is_watching+0x34/0x60 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f63a5b13367 Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007fff8c726bc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000067b687c2 RCX: 00007f63a5b13367 RDX: 0000000000000000 RSI: 00007fff8c726c30 RDI: 0000000000000004 RBP: 00007fff8c726cb8 R08: 0000000000000000 R09: 0000000000000034 R10: 00007fff8c726c7c R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000000 R14: 00007fff8c726cd0 R15: 00007fff8c726cd0 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xd08/0x2830 softirqs last enabled at (0): [] copy_process+0xd08/0x2830 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace 0000000000000000 ]--- Thus, when calculating ifinfo message size, take VF GUIDs sizes into account when supported. Fixes: 30aad41721e0 ("net/core: Add support for getting VF GUIDs") Signed-off-by: Mark Zhang Reviewed-by: Maher Sanalla Signed-off-by: Mark Bloch Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20250325090226.749730-1-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a24a30dfc2d..334db17be37d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1177,6 +1177,9 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, /* IFLA_VF_STATS_TX_DROPPED */ nla_total_size_64bit(sizeof(__u64))); } + if (dev->netdev_ops->ndo_get_vf_guid) + size += num_vfs * 2 * + nla_total_size(sizeof(struct ifla_vf_guid)); return size; } else return 0; -- 2.51.0 From 67d1a8956d2d62fe6b4c13ebabb57806098511d8 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Tue, 25 Mar 2025 10:58:41 +0100 Subject: [PATCH 15/16] rndis_host: Flag RNDIS modems as WWAN devices Set FLAG_WWAN instead of FLAG_ETHERNET for RNDIS interfaces on Mobile Broadband Modems, as opposed to regular Ethernet adapters. Otherwise NetworkManager gets confused, misjudges the device type, and wouldn't know it should connect a modem to get the device to work. What would be the result depends on ModemManager version -- older ModemManager would end up disconnecting a device after an unsuccessful probe attempt (if it connected without needing to unlock a SIM), while a newer one might spawn a separate PPP connection over a tty interface instead, resulting in a general confusion and no end of chaos. The only way to get this work reliably is to fix the device type and have good enough version ModemManager (or equivalent). Fixes: 63ba395cd7a5 ("rndis_host: support Novatel Verizon USB730L") Signed-off-by: Lubomir Rintel Link: https://patch.msgid.link/20250325095842.1567999-1-lkundrak@v3.sk Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 7b3739b29c8f..bb0bf1415872 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -630,6 +630,16 @@ static const struct driver_info zte_rndis_info = { .tx_fixup = rndis_tx_fixup, }; +static const struct driver_info wwan_rndis_info = { + .description = "Mobile Broadband RNDIS device", + .flags = FLAG_WWAN | FLAG_POINTTOPOINT | FLAG_FRAMING_RN | FLAG_NO_SETINT, + .bind = rndis_bind, + .unbind = rndis_unbind, + .status = rndis_status, + .rx_fixup = rndis_rx_fixup, + .tx_fixup = rndis_tx_fixup, +}; + /*-------------------------------------------------------------------------*/ static const struct usb_device_id products [] = { @@ -666,9 +676,11 @@ static const struct usb_device_id products [] = { USB_INTERFACE_INFO(USB_CLASS_WIRELESS_CONTROLLER, 1, 3), .driver_info = (unsigned long) &rndis_info, }, { - /* Novatel Verizon USB730L */ + /* Mobile Broadband Modem, seen in Novatel Verizon USB730L and + * Telit FN990A (RNDIS) + */ USB_INTERFACE_INFO(USB_CLASS_MISC, 4, 1), - .driver_info = (unsigned long) &rndis_info, + .driver_info = (unsigned long)&wwan_rndis_info, }, { }, // END }; -- 2.51.0 From 2eb6c6a34cb1c22b09b219390cdff0f02cd90258 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 25 Mar 2025 10:54:27 -0700 Subject: [PATCH 16/16] net: move replay logic to tc_modify_qdisc Eric reports that by the time we call netdev_lock_ops after rtnl_unlock/rtnl_lock, the dev might point to an invalid device. As suggested by Jakub in [0], move rtnl lock/unlock and request_module outside of qdisc_create. This removes extra complexity with relocking the netdev. 0: https://lore.kernel.org/netdev/20250325032803.1542c15e@kernel.org/ Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Reported-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20250305163732.2766420-1-sdf@fomichev.me/T/#me8dfd778ea4c4463acab55644e3f9836bc608771 Signed-off-by: Stanislav Fomichev Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250325175427.3818808-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 73 +++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 46 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index defb05c1fba4..f74a097f54ae 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1267,38 +1267,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_MODULES - if (ops == NULL && kind != NULL) { - char name[IFNAMSIZ]; - if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { - /* We dropped the RTNL semaphore in order to - * perform the module load. So, even if we - * succeeded in loading the module we have to - * tell the caller to replay the request. We - * indicate this using -EAGAIN. - * We replay the request because the device may - * go away in the mean time. - */ - netdev_unlock_ops(dev); - rtnl_unlock(); - request_module(NET_SCH_ALIAS_PREFIX "%s", name); - rtnl_lock(); - netdev_lock_ops(dev); - ops = qdisc_lookup_ops(kind); - if (ops != NULL) { - /* We will try again qdisc_lookup_ops, - * so don't keep a reference. - */ - module_put(ops->owner); - err = -EAGAIN; - goto err_out; - } - } - } -#endif - - err = -ENOENT; if (!ops) { + err = -ENOENT; NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } @@ -1623,8 +1593,7 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], - struct tcmsg *tcm, - bool *replay) + struct tcmsg *tcm) { struct Qdisc *q = NULL; struct Qdisc *p = NULL; @@ -1789,13 +1758,8 @@ create_n_graft2: tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } - if (q == NULL) { - if (err == -EAGAIN) { - *replay = true; - return 0; - } + if (!q) return err; - } graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); @@ -1808,6 +1772,27 @@ graft: return 0; } +static void request_qdisc_module(struct nlattr *kind) +{ + struct Qdisc_ops *ops; + char name[IFNAMSIZ]; + + if (!kind) + return; + + ops = qdisc_lookup_ops(kind); + if (ops) { + module_put(ops->owner); + return; + } + + if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { + rtnl_unlock(); + request_module(NET_SCH_ALIAS_PREFIX "%s", name); + rtnl_lock(); + } +} + /* * Create/change qdisc. */ @@ -1818,27 +1803,23 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct tcmsg *tcm; - bool replay; int err; -replay: - /* Reinit, just in case something touches this. */ err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; + request_qdisc_module(tca[TCA_KIND]); + tcm = nlmsg_data(n); dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; - replay = false; netdev_lock_ops(dev); - err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay); + err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); - if (replay) - goto replay; return err; } -- 2.51.0