From adc36d0914f629e12700ef223f146e1c65d01c57 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:36 +0200 Subject: [PATCH 01/16] net: tulip: Use pure PCI devres API The currently used function pci_request_regions() is one of the problematic "hybrid devres" PCI functions, which are sometimes managed through devres, and sometimes not (depending on whether pci_enable_device() or pcim_enable_device() has been called before). The PCI subsystem wants to remove this behavior and, therefore, needs to port all users to functions that don't have this problem. Replace pci_request_regions() with pcim_request_all_regions(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250425085740.65304-5-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dec/tulip/tulip_core.c | 2 +- drivers/net/ethernet/dec/tulip/winbond-840.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index c8c53121557f..bec76e7bf5dd 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -1411,7 +1411,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* grab all resources from both PIO and MMIO regions, as we * don't want anyone else messing around with our hardware */ - if (pci_request_regions(pdev, DRV_NAME)) + if (pcim_request_all_regions(pdev, DRV_NAME)) return -ENODEV; ioaddr = pcim_iomap(pdev, TULIP_BAR, tulip_tbl[chip_idx].io_size); diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 5930cdec6f2f..e593273b2867 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -375,7 +375,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); - if (pci_request_regions(pdev, DRV_NAME)) + if (pcim_request_all_regions(pdev, DRV_NAME)) goto err_out_netdev; ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); -- 2.51.0 From 2a5a74947a2bdd8ba437f79e746c23724182c662 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:37 +0200 Subject: [PATCH 02/16] net: ethernet: natsemi: Use pure PCI devres API The currently used function pci_request_regions() is one of the problematic "hybrid devres" PCI functions, which are sometimes managed through devres, and sometimes not (depending on whether pci_enable_device() or pcim_enable_device() has been called before). The PCI subsystem wants to remove this behavior and, therefore, needs to port all users to functions that don't have this problem. Replace pci_request_regions() with pcim_request_all_regions(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250425085740.65304-6-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/natsemi/natsemi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 05606692e631..dd279788cf9e 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -846,7 +846,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); - i = pci_request_regions(pdev, DRV_NAME); + i = pcim_request_all_regions(pdev, DRV_NAME); if (i) goto err_pci_request_regions; -- 2.51.0 From 6e5f7a5b5e0c6623742298fe0d5a0d3df6be1afc Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:38 +0200 Subject: [PATCH 03/16] net: ethernet: sis900: Use pure PCI devres API The currently used function pci_request_regions() is one of the problematic "hybrid devres" PCI functions, which are sometimes managed through devres, and sometimes not (depending on whether pci_enable_device() or pcim_enable_device() has been called before). The PCI subsystem wants to remove this behavior and, therefore, needs to port all users to functions that don't have this problem. Replace pci_request_regions() with pcim_request_all_regions(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Acked-by: Daniele Venzano Link: https://patch.msgid.link/20250425085740.65304-7-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sis/sis900.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 332cbd725900..df869f82cae8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -468,7 +468,7 @@ static int sis900_probe(struct pci_dev *pci_dev, SET_NETDEV_DEV(net_dev, &pci_dev->dev); /* We do a request_region() to register /proc/ioports info. */ - ret = pci_request_regions(pci_dev, "sis900"); + ret = pcim_request_all_regions(pci_dev, "sis900"); if (ret) goto err_out; -- 2.51.0 From fad4d94d9ae5a67b76b934e544be91220b682ef1 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:39 +0200 Subject: [PATCH 04/16] net: mdio: thunder: Use pure PCI devres API The currently used function pci_request_regions() is one of the problematic "hybrid devres" PCI functions, which are sometimes managed through devres, and sometimes not (depending on whether pci_enable_device() or pcim_enable_device() has been called before). The PCI subsystem wants to remove this behavior and, therefore, needs to port all users to functions that don't have this problem. Furthermore, the PCI function being managed implies that it's not necessary to call pci_release_regions() manually. Remove the calls to pci_release_regions(). Replace pci_request_regions() with pcim_request_all_regions(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250425085740.65304-8-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-thunder.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c index 1e1aa72b1eff..a3047f7258a7 100644 --- a/drivers/net/mdio/mdio-thunder.c +++ b/drivers/net/mdio/mdio-thunder.c @@ -40,16 +40,16 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, return err; } - err = pci_request_regions(pdev, KBUILD_MODNAME); + err = pcim_request_all_regions(pdev, KBUILD_MODNAME); if (err) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); + dev_err(&pdev->dev, "pcim_request_all_regions failed\n"); goto err_disable_device; } nexus->bar0 = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0)); if (!nexus->bar0) { err = -ENOMEM; - goto err_release_regions; + goto err_disable_device; } i = 0; @@ -107,9 +107,6 @@ static int thunder_mdiobus_pci_probe(struct pci_dev *pdev, } return 0; -err_release_regions: - pci_release_regions(pdev); - err_disable_device: pci_set_drvdata(pdev, NULL); return err; @@ -129,7 +126,6 @@ static void thunder_mdiobus_pci_remove(struct pci_dev *pdev) mdiobus_unregister(bus->mii_bus); oct_mdio_writeq(0, bus->register_base + SMI_EN); } - pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); } -- 2.51.0 From 06133ddc3590845c91d8bd44425b170fce11837f Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:40 +0200 Subject: [PATCH 05/16] net: thunder_bgx: Use pure PCI devres API The currently used function pci_request_regions() is one of the problematic "hybrid devres" PCI functions, which are sometimes managed through devres, and sometimes not (depending on whether pci_enable_device() or pcim_enable_device() has been called before). The PCI subsystem wants to remove this behavior and, therefore, needs to port all users to functions that don't have this problem. Furthermore, the PCI function being managed implies that it's not necessary to call pci_release_regions() manually. Remove the calls to pci_release_regions(). Replace pci_request_regions() with pcim_request_all_regions(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250425085740.65304-9-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 608cc6af5af1..c9369bdd04e0 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1605,7 +1605,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return dev_err_probe(dev, err, "Failed to enable PCI device\n"); } - err = pci_request_regions(pdev, DRV_NAME); + err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); goto err_disable_device; @@ -1616,7 +1616,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!bgx->reg_base) { dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n"); err = -ENOMEM; - goto err_release_regions; + goto err_disable_device; } set_max_bgx_per_node(pdev); @@ -1688,8 +1688,6 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); -err_release_regions: - pci_release_regions(pdev); err_disable_device: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); @@ -1710,7 +1708,6 @@ static void bgx_remove(struct pci_dev *pdev) pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); bgx_vnic[bgx->bgx_id] = NULL; - pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } -- 2.51.0 From 1549bd06e340eaac2c317a045c493c3e9cc454cf Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 25 Apr 2025 10:57:41 +0200 Subject: [PATCH 06/16] net: thunder_bgx: Don't disable PCI device manually thunder_bgx's PCI device is enabled with pcim_enable_device(), a managed devres function which ensures that the device gets enabled on driver detach automatically. Remove the calls to pci_disable_device(). Signed-off-by: Philipp Stanner Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250425085740.65304-10-phasta@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index c9369bdd04e0..3b7ad744b2dd 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1608,7 +1608,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = pcim_request_all_regions(pdev, DRV_NAME); if (err) { dev_err(dev, "PCI request regions failed 0x%x\n", err); - goto err_disable_device; + goto err_zero_drv_data; } /* MAP configuration registers */ @@ -1616,7 +1616,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!bgx->reg_base) { dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n"); err = -ENOMEM; - goto err_disable_device; + goto err_zero_drv_data; } set_max_bgx_per_node(pdev); @@ -1688,8 +1688,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); -err_disable_device: - pci_disable_device(pdev); +err_zero_drv_data: pci_set_drvdata(pdev, NULL); return err; } @@ -1708,7 +1707,6 @@ static void bgx_remove(struct pci_dev *pdev) pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); bgx_vnic[bgx->bgx_id] = NULL; - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } -- 2.51.0 From ef7d33e174564d6294edd00ca797e2b0aac76259 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Fri, 25 Apr 2025 14:40:57 +0800 Subject: [PATCH 07/16] rtase: Modify the format specifier in snprintf to %u Modify the format specifier in snprintf to %u. Signed-off-by: Justin Lai Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250425064057.30035-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 6251548d50ff..700e9ae8e833 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1114,7 +1114,7 @@ static int rtase_open(struct net_device *dev) /* request other interrupts to handle multiqueue */ for (i = 1; i < tp->int_nums; i++) { ivec = &tp->int_vector[i]; - snprintf(ivec->name, sizeof(ivec->name), "%s_int%i", + snprintf(ivec->name, sizeof(ivec->name), "%s_int%u", tp->dev->name, i); ret = request_irq(ivec->irq, rtase_q_interrupt, 0, ivec->name, ivec); -- 2.51.0 From a427e7f99b710f3547f0bfb91c4371acb56e1c84 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 23 Apr 2025 20:46:44 +0000 Subject: [PATCH 08/16] tools/Makefile: Add ynl target Add targets to build, clean, and install ynl headers, libynl.a, and python tooling. Signed-off-by: Joe Damato Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250423204647.190784-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- tools/Makefile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index 5e1254eb66de..c31cbbd12c45 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -41,6 +41,7 @@ help: @echo ' mm - misc mm tools' @echo ' wmi - WMI interface examples' @echo ' x86_energy_perf_policy - Intel energy policy tool' + @echo ' ynl - ynl headers, library, and python tool' @echo '' @echo 'You can do:' @echo ' $$ make -C tools/ _install' @@ -118,11 +119,14 @@ freefall: FORCE kvm_stat: FORCE $(call descend,kvm/$@) +ynl: FORCE + $(call descend,net/ynl) + all: acpi counter cpupower gpio hv firewire \ perf selftests bootconfig spi turbostat usb \ virtio mm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ - debugging tracing thermal thermometer thermal-engine + debugging tracing thermal thermometer thermal-engine ynl acpi_install: $(call descend,power/$(@:_install=),install) @@ -157,13 +161,16 @@ freefall_install: kvm_stat_install: $(call descend,kvm/$(@:_install=),install) +ynl_install: + $(call descend,net/$(@:_install=),install) + install: acpi_install counter_install cpupower_install gpio_install \ hv_install firewire_install iio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install mm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ wmi_install debugging_install intel-speed-select_install \ - tracing_install thermometer_install thermal-engine_install + tracing_install thermometer_install thermal-engine_install ynl_install acpi_clean: $(call descend,power/acpi,clean) @@ -214,12 +221,15 @@ freefall_clean: build_clean: $(call descend,build,clean) +ynl_clean: + $(call descend,net/$(@:_clean=),clean) + clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \ gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \ intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \ - sched_ext_clean + sched_ext_clean ynl_clean .PHONY: FORCE -- 2.51.0 From 0d15a26b247d25cd012134bf8825128fedb15cc9 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 24 Apr 2025 15:23:16 +0530 Subject: [PATCH 09/16] net: ti: icssg-prueth: Add ICSSG FW Stats The ICSSG firmware maintains set of stats called PA_STATS. Currently the driver only dumps 4 stats. Add support for dumping more stats. The offset for different stats are defined as MACROs in icssg_switch_map.h file. All the offsets are for Slice0. Slice1 offsets are slice0 + 4. The offset calculation is taken care while reading the stats in emac_update_hardware_stats(). The statistics are documented in Documentation/networking/device_drivers/icssg_prueth.rst Reviewed-by: Simon Horman Signed-off-by: MD Danish Anwar Link: https://patch.msgid.link/20250424095316.2643573-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/index.rst | 1 + .../ethernet/ti/icssg_prueth.rst | 56 ++++++++++++++++++ drivers/net/ethernet/ti/icssg/icssg_common.c | 24 +++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.h | 2 +- drivers/net/ethernet/ti/icssg/icssg_stats.c | 8 +-- drivers/net/ethernet/ti/icssg/icssg_stats.h | 58 ++++++++++++------- .../net/ethernet/ti/icssg/icssg_switch_map.h | 33 +++++++++++ 7 files changed, 151 insertions(+), 31 deletions(-) create mode 100644 Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 05d822b904b4..f9ed93c1da35 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -55,6 +55,7 @@ Contents: ti/cpsw_switchdev ti/am65_nuss_cpsw_switchdev ti/tlan + ti/icssg_prueth wangxun/txgbe wangxun/ngbe diff --git a/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst b/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst new file mode 100644 index 000000000000..da21ddf431bb --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/ti/icssg_prueth.rst @@ -0,0 +1,56 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Texas Instruments ICSSG PRUETH ethernet driver +============================================== + +:Version: 1.0 + +ICSSG Firmware +============== + +Every ICSSG core has two Programmable Real-Time Unit(PRUs), two auxiliary +Real-Time Transfer Unit (RTUs), and two Transmit Real-Time Transfer Units +(TX_PRUs). Each one of these runs its own firmware. The firmwares combnined are +referred as ICSSG Firmware. + +Firmware Statistics +=================== + +The ICSSG firmware maintains certain statistics which are dumped by the driver +via ``ethtool -S `` + +These statistics are as follows, + + - ``FW_RTU_PKT_DROP``: Diagnostic error counter which increments when RTU drops a locally injected packet due to port being disabled or rule violation. + - ``FW_Q0_OVERFLOW``: TX overflow counter for queue0 + - ``FW_Q1_OVERFLOW``: TX overflow counter for queue1 + - ``FW_Q2_OVERFLOW``: TX overflow counter for queue2 + - ``FW_Q3_OVERFLOW``: TX overflow counter for queue3 + - ``FW_Q4_OVERFLOW``: TX overflow counter for queue4 + - ``FW_Q5_OVERFLOW``: TX overflow counter for queue5 + - ``FW_Q6_OVERFLOW``: TX overflow counter for queue6 + - ``FW_Q7_OVERFLOW``: TX overflow counter for queue7 + - ``FW_DROPPED_PKT``: This counter is incremented when a packet is dropped at PRU because of rule violation. + - ``FW_RX_ERROR``: Incremented if there was a CRC error or Min/Max frame error at PRU + - ``FW_RX_DS_INVALID``: Incremented when RTU detects Data Status invalid condition + - ``FW_TX_DROPPED_PACKET``: Counter for packets dropped via TX Port + - ``FW_TX_TS_DROPPED_PACKET``: Counter for packets with TS flag dropped via TX Port + - ``FW_INF_PORT_DISABLED``: Incremented when RX frame is dropped due to port being disabled + - ``FW_INF_SAV``: Incremented when RX frame is dropped due to Source Address violation + - ``FW_INF_SA_DL``: Incremented when RX frame is dropped due to Source Address being in the denylist + - ``FW_INF_PORT_BLOCKED``: Incremented when RX frame is dropped due to port being blocked and frame being a special frame + - ``FW_INF_DROP_TAGGED`` : Incremented when RX frame is dropped for being tagged + - ``FW_INF_DROP_PRIOTAGGED``: Incremented when RX frame is dropped for being priority tagged + - ``FW_INF_DROP_NOTAG``: Incremented when RX frame is dropped for being untagged + - ``FW_INF_DROP_NOTMEMBER``: Incremented when RX frame is dropped for port not being member of VLAN + - ``FW_RX_EOF_SHORT_FRMERR``: Incremented if End Of Frame (EOF) task is scheduled without seeing RX_B1 + - ``FW_RX_B0_DROP_EARLY_EOF``: Incremented when frame is dropped due to Early EOF + - ``FW_TX_JUMBO_FRM_CUTOFF``: Incremented when frame is cut off to prevent packet size > 2000 Bytes + - ``FW_RX_EXP_FRAG_Q_DROP``: Incremented when express frame is received in the same queue as the previous fragment + - ``FW_RX_FIFO_OVERRUN``: RX fifo overrun counter + - ``FW_CUT_THR_PKT``: Incremented when a packet is forwarded using Cut-Through forwarding method + - ``FW_HOST_RX_PKT_CNT``: Number of valid packets sent by Rx PRU to Host on PSI + - ``FW_HOST_TX_PKT_CNT``: Number of valid packets copied by RTU0 to Tx queues + - ``FW_HOST_EGRESS_Q_PRE_OVERFLOW``: Host Egress Q (Pre-emptible) Overflow Counter + - ``FW_HOST_EGRESS_Q_EXP_OVERFLOW``: Host Egress Q (Pre-emptible) Overflow Counter diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b4be76e13a2f..79f2d86acf21 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1318,10 +1318,28 @@ void icssg_ndo_get_stats64(struct net_device *ndev, stats->rx_over_errors = emac_get_stat_by_name(emac, "rx_over_errors"); stats->multicast = emac_get_stat_by_name(emac, "rx_multicast_frames"); - stats->rx_errors = ndev->stats.rx_errors; - stats->rx_dropped = ndev->stats.rx_dropped; + stats->rx_errors = ndev->stats.rx_errors + + emac_get_stat_by_name(emac, "FW_RX_ERROR") + + emac_get_stat_by_name(emac, "FW_RX_EOF_SHORT_FRMERR") + + emac_get_stat_by_name(emac, "FW_RX_B0_DROP_EARLY_EOF") + + emac_get_stat_by_name(emac, "FW_RX_EXP_FRAG_Q_DROP") + + emac_get_stat_by_name(emac, "FW_RX_FIFO_OVERRUN"); + stats->rx_dropped = ndev->stats.rx_dropped + + emac_get_stat_by_name(emac, "FW_DROPPED_PKT") + + emac_get_stat_by_name(emac, "FW_INF_PORT_DISABLED") + + emac_get_stat_by_name(emac, "FW_INF_SAV") + + emac_get_stat_by_name(emac, "FW_INF_SA_DL") + + emac_get_stat_by_name(emac, "FW_INF_PORT_BLOCKED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_TAGGED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_PRIOTAGGED") + + emac_get_stat_by_name(emac, "FW_INF_DROP_NOTAG") + + emac_get_stat_by_name(emac, "FW_INF_DROP_NOTMEMBER"); stats->tx_errors = ndev->stats.tx_errors; - stats->tx_dropped = ndev->stats.tx_dropped; + stats->tx_dropped = ndev->stats.tx_dropped + + emac_get_stat_by_name(emac, "FW_RTU_PKT_DROP") + + emac_get_stat_by_name(emac, "FW_TX_DROPPED_PACKET") + + emac_get_stat_by_name(emac, "FW_TX_TS_DROPPED_PACKET") + + emac_get_stat_by_name(emac, "FW_TX_JUMBO_FRM_CUTOFF"); } EXPORT_SYMBOL_GPL(icssg_ndo_get_stats64); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index b6be4aa57a61..23c465f1ce7f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -54,7 +54,7 @@ #define ICSSG_MAX_RFLOWS 8 /* per slice */ -#define ICSSG_NUM_PA_STATS 4 +#define ICSSG_NUM_PA_STATS 32 #define ICSSG_NUM_MIIG_STATS 60 /* Number of ICSSG related stats */ #define ICSSG_NUM_STATS (ICSSG_NUM_MIIG_STATS + ICSSG_NUM_PA_STATS) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 6f0edae38ea2..e8241e998aa9 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -11,7 +11,6 @@ #define ICSSG_TX_PACKET_OFFSET 0xA0 #define ICSSG_TX_BYTE_OFFSET 0xEC -#define ICSSG_FW_STATS_BASE 0x0248 static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ @@ -46,9 +45,8 @@ void emac_update_hardware_stats(struct prueth_emac *emac) if (prueth->pa_stats) { for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { - reg = ICSSG_FW_STATS_BASE + - icssg_all_pa_stats[i].offset * - PRUETH_NUM_MACS + slice * sizeof(u32); + reg = icssg_all_pa_stats[i].offset + + slice * sizeof(u32); regmap_read(prueth->pa_stats, reg, &val); emac->pa_stats[i] += val; } @@ -80,7 +78,7 @@ int emac_get_stat_by_name(struct prueth_emac *emac, char *stat_name) if (emac->prueth->pa_stats) { for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { if (!strcmp(icssg_all_pa_stats[i].name, stat_name)) - return emac->pa_stats[icssg_all_pa_stats[i].offset / sizeof(u32)]; + return emac->pa_stats[i]; } } diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.h b/drivers/net/ethernet/ti/icssg/icssg_stats.h index e88b919f532c..5ec0b38e0c67 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.h +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.h @@ -155,24 +155,10 @@ static const struct icssg_miig_stats icssg_all_miig_stats[] = { ICSSG_MIIG_STATS(tx_bytes, true), }; -/** - * struct pa_stats_regs - ICSSG Firmware maintained PA Stats register - * @fw_rx_cnt: Number of valid packets sent by Rx PRU to Host on PSI - * @fw_tx_cnt: Number of valid packets copied by RTU0 to Tx queues - * @fw_tx_pre_overflow: Host Egress Q (Pre-emptible) Overflow Counter - * @fw_tx_exp_overflow: Host Egress Q (Express) Overflow Counter - */ -struct pa_stats_regs { - u32 fw_rx_cnt; - u32 fw_tx_cnt; - u32 fw_tx_pre_overflow; - u32 fw_tx_exp_overflow; -}; - -#define ICSSG_PA_STATS(field) \ -{ \ - #field, \ - offsetof(struct pa_stats_regs, field), \ +#define ICSSG_PA_STATS(field) \ +{ \ + #field, \ + field, \ } struct icssg_pa_stats { @@ -181,10 +167,38 @@ struct icssg_pa_stats { }; static const struct icssg_pa_stats icssg_all_pa_stats[] = { - ICSSG_PA_STATS(fw_rx_cnt), - ICSSG_PA_STATS(fw_tx_cnt), - ICSSG_PA_STATS(fw_tx_pre_overflow), - ICSSG_PA_STATS(fw_tx_exp_overflow), + ICSSG_PA_STATS(FW_RTU_PKT_DROP), + ICSSG_PA_STATS(FW_Q0_OVERFLOW), + ICSSG_PA_STATS(FW_Q1_OVERFLOW), + ICSSG_PA_STATS(FW_Q2_OVERFLOW), + ICSSG_PA_STATS(FW_Q3_OVERFLOW), + ICSSG_PA_STATS(FW_Q4_OVERFLOW), + ICSSG_PA_STATS(FW_Q5_OVERFLOW), + ICSSG_PA_STATS(FW_Q6_OVERFLOW), + ICSSG_PA_STATS(FW_Q7_OVERFLOW), + ICSSG_PA_STATS(FW_DROPPED_PKT), + ICSSG_PA_STATS(FW_RX_ERROR), + ICSSG_PA_STATS(FW_RX_DS_INVALID), + ICSSG_PA_STATS(FW_TX_DROPPED_PACKET), + ICSSG_PA_STATS(FW_TX_TS_DROPPED_PACKET), + ICSSG_PA_STATS(FW_INF_PORT_DISABLED), + ICSSG_PA_STATS(FW_INF_SAV), + ICSSG_PA_STATS(FW_INF_SA_DL), + ICSSG_PA_STATS(FW_INF_PORT_BLOCKED), + ICSSG_PA_STATS(FW_INF_DROP_TAGGED), + ICSSG_PA_STATS(FW_INF_DROP_PRIOTAGGED), + ICSSG_PA_STATS(FW_INF_DROP_NOTAG), + ICSSG_PA_STATS(FW_INF_DROP_NOTMEMBER), + ICSSG_PA_STATS(FW_RX_EOF_SHORT_FRMERR), + ICSSG_PA_STATS(FW_RX_B0_DROP_EARLY_EOF), + ICSSG_PA_STATS(FW_TX_JUMBO_FRM_CUTOFF), + ICSSG_PA_STATS(FW_RX_EXP_FRAG_Q_DROP), + ICSSG_PA_STATS(FW_RX_FIFO_OVERRUN), + ICSSG_PA_STATS(FW_CUT_THR_PKT), + ICSSG_PA_STATS(FW_HOST_RX_PKT_CNT), + ICSSG_PA_STATS(FW_HOST_TX_PKT_CNT), + ICSSG_PA_STATS(FW_HOST_EGRESS_Q_PRE_OVERFLOW), + ICSSG_PA_STATS(FW_HOST_EGRESS_Q_EXP_OVERFLOW), }; #endif /* __NET_TI_ICSSG_STATS_H */ diff --git a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h index 424a7e945ea8..490a9cc06fb0 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_switch_map.h +++ b/drivers/net/ethernet/ti/icssg/icssg_switch_map.h @@ -231,4 +231,37 @@ /* Start of 32 bits PA_STAT counters */ #define PA_STAT_32b_START_OFFSET 0x0080 +#define FW_RTU_PKT_DROP 0x0088 +#define FW_Q0_OVERFLOW 0x0090 +#define FW_Q1_OVERFLOW 0x0098 +#define FW_Q2_OVERFLOW 0x00A0 +#define FW_Q3_OVERFLOW 0x00A8 +#define FW_Q4_OVERFLOW 0x00B0 +#define FW_Q5_OVERFLOW 0x00B8 +#define FW_Q6_OVERFLOW 0x00C0 +#define FW_Q7_OVERFLOW 0x00C8 +#define FW_DROPPED_PKT 0x00F8 +#define FW_RX_ERROR 0x0100 +#define FW_RX_DS_INVALID 0x0108 +#define FW_TX_DROPPED_PACKET 0x0110 +#define FW_TX_TS_DROPPED_PACKET 0x0118 +#define FW_INF_PORT_DISABLED 0x0120 +#define FW_INF_SAV 0x0128 +#define FW_INF_SA_DL 0x0130 +#define FW_INF_PORT_BLOCKED 0x0138 +#define FW_INF_DROP_TAGGED 0x0140 +#define FW_INF_DROP_PRIOTAGGED 0x0148 +#define FW_INF_DROP_NOTAG 0x0150 +#define FW_INF_DROP_NOTMEMBER 0x0158 +#define FW_RX_EOF_SHORT_FRMERR 0x0188 +#define FW_RX_B0_DROP_EARLY_EOF 0x0190 +#define FW_TX_JUMBO_FRM_CUTOFF 0x0198 +#define FW_RX_EXP_FRAG_Q_DROP 0x01A0 +#define FW_RX_FIFO_OVERRUN 0x01A8 +#define FW_CUT_THR_PKT 0x01B0 +#define FW_HOST_RX_PKT_CNT 0x0248 +#define FW_HOST_TX_PKT_CNT 0x0250 +#define FW_HOST_EGRESS_Q_PRE_OVERFLOW 0x0258 +#define FW_HOST_EGRESS_Q_EXP_OVERFLOW 0x0260 + #endif /* __NET_TI_ICSSG_SWITCH_MAP_H */ -- 2.51.0 From 32607a332cfea5a4b2a185f3e3d605a9bf4f8df0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Apr 2025 10:35:18 -0400 Subject: [PATCH 10/16] ipv4: prefer multipath nexthop that matches source address With multipath routes, try to ensure that packets leave on the device that is associated with the source address. Avoid the following tcpdump example: veth0 Out IP 10.1.0.2.38640 > 10.2.0.3.8000: Flags [S] veth1 Out IP 10.1.0.2.38648 > 10.2.0.3.8000: Flags [S] Which can happen easily with the most straightforward setup: ip addr add 10.0.0.1/24 dev veth0 ip addr add 10.1.0.1/24 dev veth1 ip route add 10.2.0.3 nexthop via 10.0.0.2 dev veth0 \ nexthop via 10.1.0.2 dev veth1 This is apparently considered WAI, based on the comment in ip_route_output_key_hash_rcu: * 2. Moreover, we are allowed to send packets with saddr * of another iface. --ANK It may be ok for some uses of multipath, but not all. For instance, when using two ISPs, a router may drop packets with unknown source. The behavior occurs because tcp_v4_connect makes three route lookups when establishing a connection: 1. ip_route_connect calls to select a source address, with saddr zero. 2. ip_route_connect calls again now that saddr and daddr are known. 3. ip_route_newports calls again after a source port is also chosen. With a route with multiple nexthops, each lookup may make a different choice depending on available entropy to fib_select_multipath. So it is possible for 1 to select the saddr from the first entry, but 3 to select the second entry. Leading to the above situation. Address this by preferring a match that matches the flowi4 saddr. This will make 2 and 3 make the same choice as 1. Continue to update the backup choice until a choice that matches saddr is found. Do this in fib_select_multipath itself, rather than passing an fl4_oif constraint, to avoid changing non-multipath route selection. Commit e6b45241c57a ("ipv4: reset flowi parameters on route connect") shows how that may cause regressions. Also read ipv4.sysctl_fib_multipath_use_neigh only once. No need to refresh in the loop. This does not happen in IPv6, which performs only one lookup. Signed-off-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250424143549.669426-2-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip_fib.h | 3 ++- net/ipv4/fib_semantics.c | 39 +++++++++++++++++++++++++-------------- net/ipv4/route.c | 2 +- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e3864b74e92a..48bb3cf41469 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -574,7 +574,8 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); -void fib_select_multipath(struct fib_result *res, int hash); +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5326f1501af0..2371f311a1e1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2170,34 +2170,45 @@ static bool fib_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -void fib_select_multipath(struct fib_result *res, int hash) +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4) { struct fib_info *fi = res->fi; struct net *net = fi->fib_net; - bool first = false; + bool found = false; + bool use_neigh; + __be32 saddr; if (unlikely(res->fi->nh)) { nexthop_path_fib_result(res, hash); return; } + use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh); + saddr = fl4 ? fl4->saddr : 0; + change_nexthops(fi) { - if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { - if (!fib_good_nh(nexthop_nh)) - continue; - if (!first) { - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - first = true; - } + if (use_neigh && !fib_good_nh(nexthop_nh)) + continue; + + if (!found) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + found = !saddr || nexthop_nh->nh_saddr == saddr; } if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) continue; - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - return; + if (!saddr || nexthop_nh->nh_saddr == saddr) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + return; + } + + if (found) + return; + } endfor_nexthops(fi); } #endif @@ -2212,7 +2223,7 @@ void fib_select_path(struct net *net, struct fib_result *res, if (fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(net, fl4, skb, NULL); - fib_select_multipath(res, h); + fib_select_multipath(res, h, fl4); } else #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cffbe83802..e5e4c71be3af 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2154,7 +2154,7 @@ ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, if (res->fi && fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); - fib_select_multipath(res, h); + fib_select_multipath(res, h, NULL); IPCB(skb)->flags |= IPSKB_MULTIPATH; } #endif -- 2.51.0 From 65e9024643c7512ade3aedbb341e11d77ed7abc2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Apr 2025 10:35:19 -0400 Subject: [PATCH 11/16] ip: load balance tcp connections to single dst addr and port Load balance new TCP connections across nexthops also when they connect to the same service at a single remote address and port. This affects only port-based multipath hashing: fib_multipath_hash_policy 1 or 3. Local connections must choose both a source address and port when connecting to a remote service, in ip_route_connect. This "chicken-and-egg problem" (commit 2d7192d6cbab ("ipv4: Sanitize and simplify ip_route_{connect,newports}()")) is resolved by first selecting a source address, by looking up a route using the zero wildcard source port and address. As a result multiple connections to the same destination address and port have no entropy in fib_multipath_hash. This is not a problem when forwarding, as skb-based hashing has a 4-tuple. Nor when establishing UDP connections, as autobind there selects a port before reaching ip_route_connect. Load balance also TCP, by using a random port in fib_multipath_hash. Port assignment in inet_hash_connect is not atomic with ip_route_connect. Thus ports are unpredictable, effectively random. Implementation details: Do not actually pass a random fl4_sport, as that affects not only hashing, but routing more broadly, and can match a source port based policy route, which existing wildcard port 0 will not. Instead, define a new wildcard flowi flag that is used only for hashing. Selecting a random source is equivalent to just selecting a random hash entirely. But for code clarity, follow the normal 4-tuple hash process and only update this field. fib_multipath_hash can be reached with zero sport from other code paths, so explicitly pass this flowi flag, rather than trying to infer this case in the function itself. Signed-off-by: Willem de Bruijn Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20250424143549.669426-3-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- include/net/flow.h | 1 + include/net/route.h | 3 +++ net/ipv4/route.c | 13 ++++++++++--- net/ipv6/route.c | 13 ++++++++++--- net/ipv6/tcp_ipv6.c | 2 ++ 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 2a3f0c42f092..a1839c278d87 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -39,6 +39,7 @@ struct flowi_common { #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 #define FLOWI_FLAG_L3MDEV_OIF 0x04 +#define FLOWI_FLAG_ANY_SPORT 0x08 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/route.h b/include/net/route.h index c605fd5ec0c0..8e39aa822cf9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -326,6 +326,9 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport) + flow_flags |= FLOWI_FLAG_ANY_SPORT; + flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e5e4c71be3af..507b2e5dec50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2037,8 +2037,12 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, hash_keys.addrs.v4addrs.dst = fl4->daddr; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) hash_keys.basic.ip_proto = fl4->flowi4_proto; - if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) - hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; + } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; @@ -2093,7 +2097,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; - hash_keys.ports.src = fl4->fl4_sport; + if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl4->fl4_sport; hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d0351e95d916..aa6b45bd3515 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2492,8 +2492,12 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net, hash_keys.basic.ip_proto = fl6->flowi6_proto; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); - if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) - hash_keys.ports.src = fl6->fl6_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { + if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl6->fl6_sport; + } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl6->fl6_dport; @@ -2547,7 +2551,10 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; hash_keys.addrs.v6addrs.src = fl6->saddr; hash_keys.addrs.v6addrs.dst = fl6->daddr; - hash_keys.ports.src = fl6->fl6_sport; + if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT) + hash_keys.ports.src = (__force __be16)get_random_u16(); + else + hash_keys.ports.src = fl6->fl6_sport; hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7dcb33f879ee..e8e68a142649 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -267,6 +267,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) + fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); -- 2.51.0 From 4d0dac499bf384fe3f42acc30906d304c3499dd8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 24 Apr 2025 10:35:20 -0400 Subject: [PATCH 12/16] selftests/net: test tcp connection load balancing Verify that TCP connections use both routes when connecting multiple times to a remote service over a two nexthop multipath route. Use socat to create the connections. Use tc prio + tc filter to count routes taken, counting SYN packets across the two egress devices. Also verify that the saddr matches that of the device. To avoid flaky tests when testing inherently randomized behavior, set a low bar and pass if even a single SYN is observed on each device. Signed-off-by: Willem de Bruijn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250424143549.669426-4-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/fib_tests.sh | 120 ++++++++++++++++++++++- tools/testing/selftests/net/lib.sh | 24 +++++ 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ea6f886a210..c58dc4ac2810 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1085,6 +1085,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -2600,6 +2629,93 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2799,8 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 701905eeff66..7e1e56318625 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -270,6 +270,30 @@ tc_rule_handle_stats_get() .options.actions[0].stats$selector" } +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + ret_set_ksft_status() { local ksft_status=$1; shift -- 2.51.0 From 6fc54c408dc9103bf98470b3877b741c44642baa Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 19 Mar 2025 11:26:39 +0100 Subject: [PATCH 13/16] igb: Link IRQs to NAPI instances Link IRQs to NAPI instances via netdev-genl API. This allows users to query that information via netlink: |$ ./tools/net/ynl/pyynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ | --dump napi-get --json='{"ifindex": 2}' |[{'defer-hard-irqs': 0, | 'gro-flush-timeout': 0, | 'id': 8204, | 'ifindex': 2, | 'irq': 127, | 'irq-suspend-timeout': 0}, | {'defer-hard-irqs': 0, | 'gro-flush-timeout': 0, | 'id': 8203, | 'ifindex': 2, | 'irq': 126, | 'irq-suspend-timeout': 0}, | {'defer-hard-irqs': 0, | 'gro-flush-timeout': 0, | 'id': 8202, | 'ifindex': 2, | 'irq': 125, | 'irq-suspend-timeout': 0}, | {'defer-hard-irqs': 0, | 'gro-flush-timeout': 0, | 'id': 8201, | 'ifindex': 2, | 'irq': 124, | 'irq-suspend-timeout': 0}] |$ cat /proc/interrupts | grep enp2s0 |123: 0 1 IR-PCI-MSIX-0000:02:00.0 0-edge enp2s0 |124: 0 7 IR-PCI-MSIX-0000:02:00.0 1-edge enp2s0-TxRx-0 |125: 0 0 IR-PCI-MSIX-0000:02:00.0 2-edge enp2s0-TxRx-1 |126: 0 5 IR-PCI-MSIX-0000:02:00.0 3-edge enp2s0-TxRx-2 |127: 0 0 IR-PCI-MSIX-0000:02:00.0 4-edge enp2s0-TxRx-3 Reviewed-by: Joe Damato Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Kurt Kanzenbach Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c646c71915f0..401af6dce2a8 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -947,6 +947,9 @@ static int igb_request_msix(struct igb_adapter *adapter) q_vector); if (err) goto err_free; + + netif_napi_set_irq(&q_vector->napi, + adapter->msix_entries[vector].vector); } igb_configure_msix(adapter); -- 2.51.0 From b75a1dea500f4397a46e971cd41e613b7f18e317 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 19 Mar 2025 11:26:40 +0100 Subject: [PATCH 14/16] igb: Link queues to NAPI instances Link queues to NAPI instances via netdev-genl API. This is required to use XDP/ZC busy polling. See commit 5ef44b3cb43b ("xsk: Bring back busy polling support") for details. This also allows users to query the info with netlink: |$ ./tools/net/ynl/pyynl/cli.py --spec Documentation/netlink/specs/netdev.yaml \ | --dump queue-get --json='{"ifindex": 2}' |[{'id': 0, 'ifindex': 2, 'napi-id': 8201, 'type': 'rx'}, | {'id': 1, 'ifindex': 2, 'napi-id': 8202, 'type': 'rx'}, | {'id': 2, 'ifindex': 2, 'napi-id': 8203, 'type': 'rx'}, | {'id': 3, 'ifindex': 2, 'napi-id': 8204, 'type': 'rx'}, | {'id': 0, 'ifindex': 2, 'napi-id': 8201, 'type': 'tx'}, | {'id': 1, 'ifindex': 2, 'napi-id': 8202, 'type': 'tx'}, | {'id': 2, 'ifindex': 2, 'napi-id': 8203, 'type': 'tx'}, | {'id': 3, 'ifindex': 2, 'napi-id': 8204, 'type': 'tx'}] Add rtnl locking to PCI error handlers, because netif_queue_set_napi() requires the lock held. While at __igb_open() use RCT coding style. Signed-off-by: Kurt Kanzenbach Reviewed-by: Joe Damato Tested-by: Sweta Kumari Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb.h | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 43 ++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 02f340280d20..79eca385a751 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -722,6 +722,8 @@ enum igb_boards { extern char igb_driver_name[]; +void igb_set_queue_napi(struct igb_adapter *adapter, int q_idx, + struct napi_struct *napi); int igb_xmit_xdp_ring(struct igb_adapter *adapter, struct igb_ring *ring, struct xdp_frame *xdpf); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 401af6dce2a8..0535cc72b11b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2099,6 +2099,22 @@ static void igb_check_swap_media(struct igb_adapter *adapter) wr32(E1000_CTRL_EXT, ctrl_ext); } +void igb_set_queue_napi(struct igb_adapter *adapter, int vector, + struct napi_struct *napi) +{ + struct igb_q_vector *q_vector = adapter->q_vector[vector]; + + if (q_vector->rx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->rx.ring->queue_index, + NETDEV_QUEUE_TYPE_RX, napi); + + if (q_vector->tx.ring) + netif_queue_set_napi(adapter->netdev, + q_vector->tx.ring->queue_index, + NETDEV_QUEUE_TYPE_TX, napi); +} + /** * igb_up - Open the interface and prepare it to handle traffic * @adapter: board private structure @@ -2106,6 +2122,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter) int igb_up(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int i; /* hardware has been reset, we need to reload some things */ @@ -2113,8 +2130,11 @@ int igb_up(struct igb_adapter *adapter) clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } if (adapter->flags & IGB_FLAG_HAS_MSIX) igb_configure_msix(adapter); @@ -2184,6 +2204,7 @@ void igb_down(struct igb_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) { if (adapter->q_vector[i]) { napi_synchronize(&adapter->q_vector[i]->napi); + igb_set_queue_napi(adapter, i, NULL); napi_disable(&adapter->q_vector[i]->napi); } } @@ -4116,8 +4137,9 @@ static int igb_sw_init(struct igb_adapter *adapter) static int __igb_open(struct net_device *netdev, bool resuming) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + struct napi_struct *napi; int err; int i; @@ -4169,8 +4191,11 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); - for (i = 0; i < adapter->num_q_vectors; i++) - napi_enable(&(adapter->q_vector[i]->napi)); + for (i = 0; i < adapter->num_q_vectors; i++) { + napi = &adapter->q_vector[i]->napi; + napi_enable(napi); + igb_set_queue_napi(adapter, i, napi); + } /* Clear any pending interrupts. */ rd32(E1000_TSICR); @@ -9677,8 +9702,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); if (netif_running(netdev)) igb_down(adapter); + rtnl_unlock(); + pci_disable_device(pdev); /* Request a slot reset. */ @@ -9737,16 +9765,21 @@ static void igb_io_resume(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + rtnl_lock(); if (netif_running(netdev)) { if (!test_bit(__IGB_DOWN, &adapter->state)) { dev_dbg(&pdev->dev, "Resuming from non-fatal error, do nothing.\n"); + rtnl_unlock(); return; } + if (igb_up(adapter)) { dev_err(&pdev->dev, "igb_up failed after reset\n"); + rtnl_unlock(); return; } } + rtnl_unlock(); netif_device_attach(netdev); -- 2.51.0 From fc0fb1f116e97012d90bb77a6032972564ed8fd6 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 19 Mar 2025 11:26:41 +0100 Subject: [PATCH 15/16] igb: Add support for persistent NAPI config Use netif_napi_add_config() to assign persistent per-NAPI config. This is useful for preserving NAPI settings when changing queue counts or for user space programs using SO_INCOMING_NAPI_ID. Reviewed-by: Joe Damato Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Kurt Kanzenbach Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 0535cc72b11b..d9205573886e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1197,7 +1197,8 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, return -ENOMEM; /* initialize NAPI */ - netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll); + netif_napi_add_config(adapter->netdev, &q_vector->napi, igb_poll, + v_idx); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; -- 2.51.0 From a22ed15c99a052c6ea015b516acfd7e02bcdb995 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 19 Mar 2025 11:26:42 +0100 Subject: [PATCH 16/16] igb: Get rid of spurious interrupts When running the igc with XDP/ZC in busy polling mode with deferral of hard interrupts, interrupts still happen from time to time. That is caused by the igb task watchdog which triggers Rx interrupts periodically. That mechanism has been introduced to overcome skb/memory allocation failures [1]. So the Rx clean functions stop processing the Rx ring in case of such failure. The task watchdog triggers Rx interrupts periodically in the hope that memory became available in the mean time. The current behavior is undesirable for real time applications, because the driver induced Rx interrupts trigger also the softirq processing. However, all real time packets should be processed by the application which uses the busy polling method. Therefore, only trigger the Rx interrupts in case of real allocation failures. Introduce a new flag for signaling that condition. Follow the same logic as in commit 8dcf2c212078 ("igc: Get rid of spurious interrupts"). [1] - https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/?id=3be507547e6177e5c808544bd6a2efa2c7f1d436 Reviewed-by: Joe Damato Reviewed-by: Aleksandr Loktionov Tested-by: Sweta Kumari Signed-off-by: Kurt Kanzenbach Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb.h | 3 ++- drivers/net/ethernet/intel/igb/igb_main.c | 29 +++++++++++++++++++---- drivers/net/ethernet/intel/igb/igb_xsk.c | 1 + 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 79eca385a751..f34ead8243e9 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -391,7 +391,8 @@ enum e1000_ring_flags_t { IGB_RING_FLAG_RX_LB_VLAN_BSWAP, IGB_RING_FLAG_TX_CTX_IDX, IGB_RING_FLAG_TX_DETECT_HANG, - IGB_RING_FLAG_TX_DISABLED + IGB_RING_FLAG_TX_DISABLED, + IGB_RING_FLAG_RX_ALLOC_FAILED, }; #define ring_uses_large_buffer(ring) \ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d9205573886e..9e9a5900e6e5 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5755,11 +5755,29 @@ no_wait: if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 eics = 0; - for (i = 0; i < adapter->num_q_vectors; i++) - eics |= adapter->q_vector[i]->eims_value; - wr32(E1000_EICS, eics); + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + struct igb_ring *rx_ring; + + if (!q_vector->rx.ring) + continue; + + rx_ring = adapter->rx_ring[q_vector->rx.ring->queue_index]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + eics |= q_vector->eims_value; + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + } + } + if (eics) + wr32(E1000_EICS, eics); } else { - wr32(E1000_ICS, E1000_ICS_RXDMT0); + struct igb_ring *rx_ring = adapter->rx_ring[0]; + + if (test_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags)) { + clear_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); + wr32(E1000_ICS, E1000_ICS_RXDMT0); + } } igb_spoof_check(adapter); @@ -9090,6 +9108,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_failed++; rx_buffer->pagecnt_bias++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; } @@ -9149,6 +9168,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, page = dev_alloc_pages(igb_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } @@ -9165,6 +9185,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, __free_pages(page, igb_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); return false; } diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c index 157d43787fa0..5cf67ba29269 100644 --- a/drivers/net/ethernet/intel/igb/igb_xsk.c +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c @@ -415,6 +415,7 @@ int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_failed++; + set_bit(IGB_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags); break; } -- 2.51.0