From 231646cb6a8cc4e94943daf223d72aefe242ec23 Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:34 +0000 Subject: [PATCH 01/16] enic: Make MSI-X I/O interrupts come after the other required ones The VIC hardware has a constraint that the MSIX interrupt used for errors be specified as a 7 bit number. Before this patch, it was allocated after the I/O interrupts, which would cause a problem if 128 or more I/O interrupts are in use. So make the required interrupts come before the I/O interrupts to guarantee the error interrupt offset never exceeds 7 bits. Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Reviewed-by: Simon Horman Signed-off-by: Nelson Escobar Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-2-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 20 +++++++++++++++----- drivers/net/ethernet/cisco/enic/enic_res.c | 11 +++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 07459eac2592..ec83a273d1ca 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -280,18 +280,28 @@ static inline unsigned int enic_msix_wq_intr(struct enic *enic, return enic->cq[enic_cq_wq(enic, wq)].interrupt_offset; } -static inline unsigned int enic_msix_err_intr(struct enic *enic) -{ - return enic->rq_count + enic->wq_count; -} +/* MSIX interrupts are organized as the error interrupt, then the notify + * interrupt followed by all the I/O interrupts. The error interrupt needs + * to fit in 7 bits due to hardware constraints + */ +#define ENIC_MSIX_RESERVED_INTR 2 +#define ENIC_MSIX_ERR_INTR 0 +#define ENIC_MSIX_NOTIFY_INTR 1 +#define ENIC_MSIX_IO_INTR_BASE ENIC_MSIX_RESERVED_INTR +#define ENIC_MSIX_MIN_INTR (ENIC_MSIX_RESERVED_INTR + 2) #define ENIC_LEGACY_IO_INTR 0 #define ENIC_LEGACY_ERR_INTR 1 #define ENIC_LEGACY_NOTIFY_INTR 2 +static inline unsigned int enic_msix_err_intr(struct enic *enic) +{ + return ENIC_MSIX_ERR_INTR; +} + static inline unsigned int enic_msix_notify_intr(struct enic *enic) { - return enic->rq_count + enic->wq_count + 1; + return ENIC_MSIX_NOTIFY_INTR; } static inline bool enic_is_err_intr(struct enic *enic, int intr) diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 60be09acb9fd..72b51e9d8d1a 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -221,9 +221,12 @@ void enic_init_vnic_resources(struct enic *enic) switch (intr_mode) { case VNIC_DEV_INTR_MODE_INTX: + error_interrupt_enable = 1; + error_interrupt_offset = ENIC_LEGACY_ERR_INTR; + break; case VNIC_DEV_INTR_MODE_MSIX: error_interrupt_enable = 1; - error_interrupt_offset = enic->intr_count - 2; + error_interrupt_offset = enic_msix_err_intr(enic); break; default: error_interrupt_enable = 0; @@ -249,15 +252,15 @@ void enic_init_vnic_resources(struct enic *enic) /* Init CQ resources * - * CQ[0 - n+m-1] point to INTR[0] for INTx, MSI - * CQ[0 - n+m-1] point to INTR[0 - n+m-1] for MSI-X + * All CQs point to INTR[0] for INTx, MSI + * CQ[i] point to INTR[ENIC_MSIX_IO_INTR_BASE + i] for MSI-X */ for (i = 0; i < enic->cq_count; i++) { switch (intr_mode) { case VNIC_DEV_INTR_MODE_MSIX: - interrupt_offset = i; + interrupt_offset = ENIC_MSIX_IO_INTR_BASE + i; break; default: interrupt_offset = 0; -- 2.51.0 From 5aee3324724a7a449a1f2bdaee209cfa80fa80b1 Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:35 +0000 Subject: [PATCH 02/16] enic: Save resource counts we read from HW Save the resources counts for wq,rq,cq, and interrupts in *_avail variables so that we don't lose the information when adjusting the counts we are actually using. Report the wq_avail and rq_avail as the channel maximums in 'ethtool -l' output. Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Signed-off-by: Nelson Escobar Reviewed-by: Simon Horman Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-3-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 4 ++++ .../net/ethernet/cisco/enic/enic_ethtool.c | 4 ++-- drivers/net/ethernet/cisco/enic/enic_res.c | 19 ++++++++++++------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index ec83a273d1ca..98d6e0b82552 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -206,23 +206,27 @@ struct enic { /* work queue cache line section */ ____cacheline_aligned struct enic_wq wq[ENIC_WQ_MAX]; + unsigned int wq_avail; unsigned int wq_count; u16 loop_enable; u16 loop_tag; /* receive queue cache line section */ ____cacheline_aligned struct enic_rq rq[ENIC_RQ_MAX]; + unsigned int rq_avail; unsigned int rq_count; struct vxlan_offload vxlan; struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX]; /* interrupt resource cache line section */ ____cacheline_aligned struct vnic_intr intr[ENIC_INTR_MAX]; + unsigned int intr_avail; unsigned int intr_count; u32 __iomem *legacy_pba; /* memory-mapped */ /* completion queue cache line section */ ____cacheline_aligned struct vnic_cq cq[ENIC_CQ_MAX]; + unsigned int cq_avail; unsigned int cq_count; struct enic_rfs_flw_tbl rfs_h; u32 rx_copybreak; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index de07fa9374d2..95b071153fed 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -695,8 +695,8 @@ static void enic_get_channels(struct net_device *netdev, switch (vnic_dev_get_intr_mode(enic->vdev)) { case VNIC_DEV_INTR_MODE_MSIX: - channels->max_rx = ENIC_RQ_MAX; - channels->max_tx = ENIC_WQ_MAX; + channels->max_rx = min(enic->rq_avail, ENIC_RQ_MAX); + channels->max_tx = min(enic->wq_avail, ENIC_WQ_MAX); channels->rx_count = enic->rq_count; channels->tx_count = enic->wq_count; break; diff --git a/drivers/net/ethernet/cisco/enic/enic_res.c b/drivers/net/ethernet/cisco/enic/enic_res.c index 72b51e9d8d1a..126125199833 100644 --- a/drivers/net/ethernet/cisco/enic/enic_res.c +++ b/drivers/net/ethernet/cisco/enic/enic_res.c @@ -187,16 +187,21 @@ void enic_free_vnic_resources(struct enic *enic) void enic_get_res_counts(struct enic *enic) { - enic->wq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ); - enic->rq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_RQ); - enic->cq_count = vnic_dev_get_res_count(enic->vdev, RES_TYPE_CQ); - enic->intr_count = vnic_dev_get_res_count(enic->vdev, - RES_TYPE_INTR_CTRL); + enic->wq_avail = vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ); + enic->rq_avail = vnic_dev_get_res_count(enic->vdev, RES_TYPE_RQ); + enic->cq_avail = vnic_dev_get_res_count(enic->vdev, RES_TYPE_CQ); + enic->intr_avail = vnic_dev_get_res_count(enic->vdev, + RES_TYPE_INTR_CTRL); + + enic->wq_count = enic->wq_avail; + enic->rq_count = enic->rq_avail; + enic->cq_count = enic->cq_avail; + enic->intr_count = enic->intr_avail; dev_info(enic_get_dev(enic), "vNIC resources avail: wq %d rq %d cq %d intr %d\n", - enic->wq_count, enic->rq_count, - enic->cq_count, enic->intr_count); + enic->wq_avail, enic->rq_avail, + enic->cq_avail, enic->intr_avail); } void enic_init_vnic_resources(struct enic *enic) -- 2.51.0 From a64e5492ca908bb4f8c286b5761507792a0e03cd Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:36 +0000 Subject: [PATCH 03/16] enic: Allocate arrays in enic struct based on VIC config Allocate wq, rq, cq, intr, and napi arrays based on the number of resources configured in the VIC. Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Signed-off-by: Nelson Escobar Reviewed-by: Simon Horman Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-4-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic.h | 24 +++--- drivers/net/ethernet/cisco/enic/enic_main.c | 84 +++++++++++++++++++-- 2 files changed, 87 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 98d6e0b82552..10b7e02ba4d0 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -23,10 +23,8 @@ #define ENIC_BARS_MAX 6 -#define ENIC_WQ_MAX 8 -#define ENIC_RQ_MAX 8 -#define ENIC_CQ_MAX (ENIC_WQ_MAX + ENIC_RQ_MAX) -#define ENIC_INTR_MAX (ENIC_CQ_MAX + 2) +#define ENIC_WQ_MAX 256 +#define ENIC_RQ_MAX 256 #define ENIC_WQ_NAPI_BUDGET 256 @@ -184,8 +182,8 @@ struct enic { struct work_struct reset; struct work_struct tx_hang_reset; struct work_struct change_mtu_work; - struct msix_entry msix_entry[ENIC_INTR_MAX]; - struct enic_msix_entry msix[ENIC_INTR_MAX]; + struct msix_entry *msix_entry; + struct enic_msix_entry *msix; u32 msg_enable; spinlock_t devcmd_lock; u8 mac_addr[ETH_ALEN]; @@ -204,28 +202,24 @@ struct enic { bool enic_api_busy; struct enic_port_profile *pp; - /* work queue cache line section */ - ____cacheline_aligned struct enic_wq wq[ENIC_WQ_MAX]; + struct enic_wq *wq; unsigned int wq_avail; unsigned int wq_count; u16 loop_enable; u16 loop_tag; - /* receive queue cache line section */ - ____cacheline_aligned struct enic_rq rq[ENIC_RQ_MAX]; + struct enic_rq *rq; unsigned int rq_avail; unsigned int rq_count; struct vxlan_offload vxlan; - struct napi_struct napi[ENIC_RQ_MAX + ENIC_WQ_MAX]; + struct napi_struct *napi; - /* interrupt resource cache line section */ - ____cacheline_aligned struct vnic_intr intr[ENIC_INTR_MAX]; + struct vnic_intr *intr; unsigned int intr_avail; unsigned int intr_count; u32 __iomem *legacy_pba; /* memory-mapped */ - /* completion queue cache line section */ - ____cacheline_aligned struct vnic_cq cq[ENIC_CQ_MAX]; + struct vnic_cq *cq; unsigned int cq_avail; unsigned int cq_count; struct enic_rfs_flw_tbl rfs_h; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index eb00058b6c68..564202e81a71 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -940,7 +940,7 @@ static void enic_get_stats(struct net_device *netdev, net_stats->rx_errors = stats->rx.rx_errors; net_stats->multicast = stats->rx.rx_multicast_frames_ok; - for (i = 0; i < ENIC_RQ_MAX; i++) { + for (i = 0; i < enic->rq_count; i++) { struct enic_rq_stats *rqs = &enic->rq[i].stats; if (!enic->rq[i].vrq.ctrl) @@ -1792,7 +1792,7 @@ static void enic_free_intr(struct enic *enic) free_irq(enic->pdev->irq, enic); break; case VNIC_DEV_INTR_MODE_MSIX: - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) if (enic->msix[i].requested) free_irq(enic->msix_entry[i].vector, enic->msix[i].devid); @@ -1859,7 +1859,7 @@ static int enic_request_intr(struct enic *enic) enic->msix[intr].isr = enic_isr_msix_notify; enic->msix[intr].devid = enic; - for (i = 0; i < ARRAY_SIZE(enic->msix); i++) + for (i = 0; i < enic->intr_count; i++) enic->msix[i].requested = 0; for (i = 0; i < enic->intr_count; i++) { @@ -2456,8 +2456,7 @@ static int enic_set_intr_mode(struct enic *enic) * (the last INTR is used for notifications) */ - BUG_ON(ARRAY_SIZE(enic->msix_entry) < n + m + 2); - for (i = 0; i < n + m + 2; i++) + for (i = 0; i < enic->intr_avail; i++) enic->msix_entry[i].entry = i; /* Use multiple RQs if RSS is enabled @@ -2674,6 +2673,71 @@ static const struct netdev_stat_ops enic_netdev_stat_ops = { .get_base_stats = enic_get_base_stats, }; +static void enic_free_enic_resources(struct enic *enic) +{ + kfree(enic->wq); + enic->wq = NULL; + + kfree(enic->rq); + enic->rq = NULL; + + kfree(enic->cq); + enic->cq = NULL; + + kfree(enic->napi); + enic->napi = NULL; + + kfree(enic->msix_entry); + enic->msix_entry = NULL; + + kfree(enic->msix); + enic->msix = NULL; + + kfree(enic->intr); + enic->intr = NULL; +} + +static int enic_alloc_enic_resources(struct enic *enic) +{ + enic->wq = kcalloc(enic->wq_avail, sizeof(struct enic_wq), GFP_KERNEL); + if (!enic->wq) + goto free_queues; + + enic->rq = kcalloc(enic->rq_avail, sizeof(struct enic_rq), GFP_KERNEL); + if (!enic->rq) + goto free_queues; + + enic->cq = kcalloc(enic->cq_avail, sizeof(struct vnic_cq), GFP_KERNEL); + if (!enic->cq) + goto free_queues; + + enic->napi = kcalloc(enic->wq_avail + enic->rq_avail, + sizeof(struct napi_struct), GFP_KERNEL); + if (!enic->napi) + goto free_queues; + + enic->msix_entry = kcalloc(enic->intr_avail, sizeof(struct msix_entry), + GFP_KERNEL); + if (!enic->msix_entry) + goto free_queues; + + enic->msix = kcalloc(enic->intr_avail, sizeof(struct enic_msix_entry), + GFP_KERNEL); + if (!enic->msix) + goto free_queues; + + enic->intr = kcalloc(enic->intr_avail, sizeof(struct vnic_intr), + GFP_KERNEL); + if (!enic->intr) + goto free_queues; + + return 0; + +free_queues: + enic_free_enic_resources(enic); + return -ENOMEM; +} + static void enic_dev_deinit(struct enic *enic) { unsigned int i; @@ -2691,6 +2755,7 @@ static void enic_dev_deinit(struct enic *enic) enic_free_vnic_resources(enic); enic_clear_intr_mode(enic); enic_free_affinity_hint(enic); + enic_free_enic_resources(enic); } static void enic_kdump_kernel_config(struct enic *enic) @@ -2734,6 +2799,12 @@ static int enic_dev_init(struct enic *enic) enic_get_res_counts(enic); + err = enic_alloc_enic_resources(enic); + if (err) { + dev_err(dev, "Failed to allocate enic resources\n"); + return err; + } + /* modify resource count if we are in kdump_kernel */ enic_kdump_kernel_config(enic); @@ -2746,7 +2817,7 @@ static int enic_dev_init(struct enic *enic) if (err) { dev_err(dev, "Failed to set intr mode based on resource " "counts and system capabilities, aborting\n"); - return err; + goto err_out_free_vnic_resources; } /* Allocate and configure vNIC resources @@ -2788,6 +2859,7 @@ err_out_free_vnic_resources: enic_free_affinity_hint(enic); enic_clear_intr_mode(enic); enic_free_vnic_resources(enic); + enic_free_enic_resources(enic); return err; } -- 2.51.0 From cc94d6c4d40c070ca95db8e43e92c4ee1fff5009 Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:37 +0000 Subject: [PATCH 04/16] enic: Adjust used MSI-X wq/rq/cq/interrupt resources in a more robust way Instead of failing to use MSI-X if resources aren't configured exactly right, use the resources we do have. Since we could start using large numbers of rq resources, we do limit the rq count to what netif_get_num_default_rss_queues() recommends. Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Reviewed-by: Simon Horman Signed-off-by: Nelson Escobar Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-5-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic_main.c | 116 ++++++++++---------- 1 file changed, 57 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 564202e81a71..8b07899462d0 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2442,85 +2442,86 @@ static void enic_tx_hang_reset(struct work_struct *work) static int enic_set_intr_mode(struct enic *enic) { - unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX); - unsigned int m = min_t(unsigned int, enic->wq_count, ENIC_WQ_MAX); unsigned int i; + int num_intr; /* Set interrupt mode (INTx, MSI, MSI-X) depending * on system capabilities. * - * Try MSI-X first + * We need a minimum of 1 RQ, 1 WQ, and 2 CQs * - * We need n RQs, m WQs, n+m CQs, and n+m+2 INTRs - * (the second to last INTR is used for WQ/RQ errors) - * (the last INTR is used for notifications) */ - for (i = 0; i < enic->intr_avail; i++) - enic->msix_entry[i].entry = i; - - /* Use multiple RQs if RSS is enabled - */ - - if (ENIC_SETTING(enic, RSS) && - enic->config.intr_mode < 1 && - enic->rq_count >= n && - enic->wq_count >= m && - enic->cq_count >= n + m && - enic->intr_count >= n + m + 2) { - - if (pci_enable_msix_range(enic->pdev, enic->msix_entry, - n + m + 2, n + m + 2) > 0) { - - enic->rq_count = n; - enic->wq_count = m; - enic->cq_count = n + m; - enic->intr_count = n + m + 2; - - vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - - return 0; - } + if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { + dev_err(enic_get_dev(enic), + "Not enough resources available rq: %d wq: %d cq: %d\n", + enic->rq_avail, enic->wq_avail, + enic->cq_avail); + return -ENOSPC; } + /* if RSS isn't set, then we can only use one RQ */ + if (!ENIC_SETTING(enic, RSS)) + enic->rq_avail = 1; + + /* Try MSI-X first */ if (enic->config.intr_mode < 1 && - enic->rq_count >= 1 && - enic->wq_count >= m && - enic->cq_count >= 1 + m && - enic->intr_count >= 1 + m + 2) { - if (pci_enable_msix_range(enic->pdev, enic->msix_entry, - 1 + m + 2, 1 + m + 2) > 0) { - - enic->rq_count = 1; - enic->wq_count = m; - enic->cq_count = 1 + m; - enic->intr_count = 1 + m + 2; + enic->intr_avail >= ENIC_MSIX_MIN_INTR) { + unsigned int max_queues; + unsigned int rq_default; + unsigned int rq_avail; + unsigned int wq_avail; + + for (i = 0; i < enic->intr_avail; i++) + enic->msix_entry[i].entry = i; + + num_intr = pci_enable_msix_range(enic->pdev, enic->msix_entry, + ENIC_MSIX_MIN_INTR, + enic->intr_avail); + if (num_intr > 0) { + wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); + rq_default = netif_get_num_default_rss_queues(); + rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); + max_queues = min(enic->cq_avail, + enic->intr_avail - ENIC_MSIX_RESERVED_INTR); + + if (wq_avail + rq_avail <= max_queues) { + enic->rq_count = rq_avail; + enic->wq_count = wq_avail; + } else { + /* recalculate wq/rq count */ + if (rq_avail < wq_avail) { + enic->rq_count = min(rq_avail, max_queues / 2); + enic->wq_count = max_queues - enic->rq_count; + } else { + enic->wq_count = min(wq_avail, max_queues / 2); + enic->rq_count = max_queues - enic->wq_count; + } + } + enic->cq_count = enic->rq_count + enic->wq_count; + enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; vnic_dev_set_intr_mode(enic->vdev, - VNIC_DEV_INTR_MODE_MSIX); - + VNIC_DEV_INTR_MODE_MSIX); + enic->intr_avail = num_intr; return 0; } } /* Next try MSI * - * We need 1 RQ, 1 WQ, 2 CQs, and 1 INTR + * We need 1 INTR */ if (enic->config.intr_mode < 2 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 1 && + enic->intr_avail >= 1 && !pci_enable_msi(enic->pdev)) { enic->rq_count = 1; enic->wq_count = 1; enic->cq_count = 2; enic->intr_count = 1; - + enic->intr_avail = 1; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); return 0; @@ -2528,23 +2529,20 @@ static int enic_set_intr_mode(struct enic *enic) /* Next try INTx * - * We need 1 RQ, 1 WQ, 2 CQs, and 3 INTRs + * We need 3 INTRs * (the first INTR is used for WQ/RQ) * (the second INTR is used for WQ/RQ errors) * (the last INTR is used for notifications) */ if (enic->config.intr_mode < 3 && - enic->rq_count >= 1 && - enic->wq_count >= 1 && - enic->cq_count >= 2 && - enic->intr_count >= 3) { + enic->intr_avail >= 3) { enic->rq_count = 1; enic->wq_count = 1; enic->cq_count = 2; enic->intr_count = 3; - + enic->intr_avail = 3; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); return 0; @@ -2762,8 +2760,8 @@ static void enic_kdump_kernel_config(struct enic *enic) { if (is_kdump_kernel()) { dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); - enic->rq_count = 1; - enic->wq_count = 1; + enic->rq_avail = 1; + enic->wq_avail = 1; enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; enic->config.mtu = min_t(u16, 1500, enic->config.mtu); -- 2.51.0 From 374f6c04df8e03c6f60eafbd1e02ac875017b85e Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:38 +0000 Subject: [PATCH 05/16] enic: Move enic resource adjustments to separate function Move the enic resource adjustments out of enic_set_intr_mode() and into its own function, enic_adjust_resources(). Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Reviewed-by: Simon Horman Signed-off-by: Nelson Escobar Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-6-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic_main.c | 127 +++++++++++--------- 1 file changed, 70 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 8b07899462d0..84e85c9e2bf5 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2448,30 +2448,11 @@ static int enic_set_intr_mode(struct enic *enic) /* Set interrupt mode (INTx, MSI, MSI-X) depending * on system capabilities. * - * We need a minimum of 1 RQ, 1 WQ, and 2 CQs - * + * Try MSI-X first */ - if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { - dev_err(enic_get_dev(enic), - "Not enough resources available rq: %d wq: %d cq: %d\n", - enic->rq_avail, enic->wq_avail, - enic->cq_avail); - return -ENOSPC; - } - - /* if RSS isn't set, then we can only use one RQ */ - if (!ENIC_SETTING(enic, RSS)) - enic->rq_avail = 1; - - /* Try MSI-X first */ if (enic->config.intr_mode < 1 && enic->intr_avail >= ENIC_MSIX_MIN_INTR) { - unsigned int max_queues; - unsigned int rq_default; - unsigned int rq_avail; - unsigned int wq_avail; - for (i = 0; i < enic->intr_avail; i++) enic->msix_entry[i].entry = i; @@ -2479,28 +2460,6 @@ static int enic_set_intr_mode(struct enic *enic) ENIC_MSIX_MIN_INTR, enic->intr_avail); if (num_intr > 0) { - wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); - rq_default = netif_get_num_default_rss_queues(); - rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); - max_queues = min(enic->cq_avail, - enic->intr_avail - ENIC_MSIX_RESERVED_INTR); - - if (wq_avail + rq_avail <= max_queues) { - enic->rq_count = rq_avail; - enic->wq_count = wq_avail; - } else { - /* recalculate wq/rq count */ - if (rq_avail < wq_avail) { - enic->rq_count = min(rq_avail, max_queues / 2); - enic->wq_count = max_queues - enic->rq_count; - } else { - enic->wq_count = min(wq_avail, max_queues / 2); - enic->rq_count = max_queues - enic->wq_count; - } - } - enic->cq_count = enic->rq_count + enic->wq_count; - enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; - vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSIX); enic->intr_avail = num_intr; @@ -2516,14 +2475,8 @@ static int enic_set_intr_mode(struct enic *enic) if (enic->config.intr_mode < 2 && enic->intr_avail >= 1 && !pci_enable_msi(enic->pdev)) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 1; enic->intr_avail = 1; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); - return 0; } @@ -2537,14 +2490,8 @@ static int enic_set_intr_mode(struct enic *enic) if (enic->config.intr_mode < 3 && enic->intr_avail >= 3) { - - enic->rq_count = 1; - enic->wq_count = 1; - enic->cq_count = 2; - enic->intr_count = 3; enic->intr_avail = 3; vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); - return 0; } @@ -2569,6 +2516,67 @@ static void enic_clear_intr_mode(struct enic *enic) vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); } +static int enic_adjust_resources(struct enic *enic) +{ + unsigned int max_queues; + unsigned int rq_default; + unsigned int rq_avail; + unsigned int wq_avail; + + if (enic->rq_avail < 1 || enic->wq_avail < 1 || enic->cq_avail < 2) { + dev_err(enic_get_dev(enic), + "Not enough resources available rq: %d wq: %d cq: %d\n", + enic->rq_avail, enic->wq_avail, + enic->cq_avail); + return -ENOSPC; + } + + /* if RSS isn't set, then we can only use one RQ */ + if (!ENIC_SETTING(enic, RSS)) + enic->rq_avail = 1; + + switch (vnic_dev_get_intr_mode(enic->vdev)) { + case VNIC_DEV_INTR_MODE_INTX: + case VNIC_DEV_INTR_MODE_MSI: + enic->rq_count = 1; + enic->wq_count = 1; + enic->cq_count = 2; + enic->intr_count = enic->intr_avail; + break; + case VNIC_DEV_INTR_MODE_MSIX: + /* Adjust the number of wqs/rqs/cqs/interrupts that will be + * used based on which resource is the most constrained + */ + wq_avail = min(enic->wq_avail, ENIC_WQ_MAX); + rq_default = netif_get_num_default_rss_queues(); + rq_avail = min3(enic->rq_avail, ENIC_RQ_MAX, rq_default); + max_queues = min(enic->cq_avail, + enic->intr_avail - ENIC_MSIX_RESERVED_INTR); + if (wq_avail + rq_avail <= max_queues) { + enic->rq_count = rq_avail; + enic->wq_count = wq_avail; + } else { + /* recalculate wq/rq count */ + if (rq_avail < wq_avail) { + enic->rq_count = min(rq_avail, max_queues / 2); + enic->wq_count = max_queues - enic->rq_count; + } else { + enic->wq_count = min(wq_avail, max_queues / 2); + enic->rq_count = max_queues - enic->wq_count; + } + } + enic->cq_count = enic->rq_count + enic->wq_count; + enic->intr_count = enic->cq_count + ENIC_MSIX_RESERVED_INTR; + + break; + default: + dev_err(enic_get_dev(enic), "Unknown interrupt mode\n"); + return -EINVAL; + } + + return 0; +} + static void enic_get_queue_stats_rx(struct net_device *dev, int idx, struct netdev_queue_stats_rx *rxs) { @@ -2807,9 +2815,7 @@ static int enic_dev_init(struct enic *enic) */ enic_kdump_kernel_config(enic); - /* Set interrupt mode based on resource counts and system - * capabilities - */ + /* Set interrupt mode based on system capabilities */ err = enic_set_intr_mode(enic); if (err) { @@ -2818,6 +2824,13 @@ static int enic_dev_init(struct enic *enic) goto err_out_free_vnic_resources; } + /* Adjust resource counts based on most constrained resources */ + err = enic_adjust_resources(enic); + if (err) { + dev_err(dev, "Failed to adjust resources\n"); + goto err_out_free_vnic_resources; + } + /* Allocate and configure vNIC resources */ -- 2.51.0 From a28ccf1d6c102c8d874e8d75d676a2f870b75fa9 Mon Sep 17 00:00:00 2001 From: Nelson Escobar Date: Wed, 13 Nov 2024 23:56:39 +0000 Subject: [PATCH 06/16] enic: Move kdump check into enic_adjust_resources() Move the kdump check into enic_adjust_resources() so that everything that modifies resources is in the same function. Co-developed-by: John Daley Signed-off-by: John Daley Co-developed-by: Satish Kharat Signed-off-by: Satish Kharat Reviewed-by: Simon Horman Signed-off-by: Nelson Escobar Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241113-remove_vic_resource_limits-v4-7-a34cf8570c67@cisco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cisco/enic/enic_main.c | 25 ++++++++------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 84e85c9e2bf5..9913952ccb42 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2531,6 +2531,15 @@ static int enic_adjust_resources(struct enic *enic) return -ENOSPC; } + if (is_kdump_kernel()) { + dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); + enic->rq_avail = 1; + enic->wq_avail = 1; + enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; + enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; + enic->config.mtu = min_t(u16, 1500, enic->config.mtu); + } + /* if RSS isn't set, then we can only use one RQ */ if (!ENIC_SETTING(enic, RSS)) enic->rq_avail = 1; @@ -2764,18 +2773,6 @@ static void enic_dev_deinit(struct enic *enic) enic_free_enic_resources(enic); } -static void enic_kdump_kernel_config(struct enic *enic) -{ - if (is_kdump_kernel()) { - dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); - enic->rq_avail = 1; - enic->wq_avail = 1; - enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; - enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; - enic->config.mtu = min_t(u16, 1500, enic->config.mtu); - } -} - static int enic_dev_init(struct enic *enic) { struct device *dev = enic_get_dev(enic); @@ -2811,10 +2808,6 @@ static int enic_dev_init(struct enic *enic) return err; } - /* modify resource count if we are in kdump_kernel - */ - enic_kdump_kernel_config(enic); - /* Set interrupt mode based on system capabilities */ err = enic_set_intr_mode(enic); -- 2.51.0 From e51edeaf3506654ebd62c16e0ddf58da271b5200 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 13 Nov 2024 18:46:44 +0000 Subject: [PATCH 07/16] net/netlink: Correct the comment on netlink message max cap Since commit d35c99ff77ec ("netlink: do not enter direct reclaim from netlink_dump()") the cap is 32KiB. Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://patch.msgid.link/20241113-tcp-md5-diag-prep-v2-5-00a2a7feb1fa@gmail.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2ea4763a2004..8c78b29683a6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2264,7 +2264,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; /* NLMSG_GOODSIZE is small to avoid high order allocations being - * required, but it makes sense to _attempt_ a 16K bytes allocation + * required, but it makes sense to _attempt_ a 32KiB allocation * to reduce number of system calls on dump operations, if user * ever provided a big enough buffer. */ @@ -2286,7 +2286,7 @@ static int netlink_dump(struct sock *sk, bool lock_taken) goto errout_skb; /* Trim skb to allocated size. User is expected to provide buffer as - * large as max(min_dump_alloc, 16KiB (mac_recvmsg_len capped at + * large as max(min_dump_alloc, 32KiB (max_recvmsg_len capped at * netlink_recvmsg())). dump will pack as many smaller messages as * could fit within the allocated skb. skb is typically allocated * with larger space than required (could be as much as near 2x the -- 2.51.0 From 11ee317d883ef111b8c36228437eaffea7b49bbc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Nov 2024 10:20:12 +0000 Subject: [PATCH 08/16] octeontx2-pf: Fix spelling mistake "reprentator" -> "representor" There is a spelling mistake in a NL_SET_ERR_MSG_MOD error message. Fix it. Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20241114102012.1868514-1-colin.i.king@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index ae58d0601b45..232b10740c13 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -687,7 +687,7 @@ int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack) err = register_netdev(ndev); if (err) { NL_SET_ERR_MSG_MOD(extack, - "PFVF reprentator registration failed"); + "PFVF representor registration failed"); free_netdev(ndev); goto exit; } -- 2.51.0 From 221a9c1df790fa711d65daf5ba05d0addc279153 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 14 Nov 2024 03:00:11 -0800 Subject: [PATCH 09/16] net: netpoll: Individualize the skb pool The current implementation of the netpoll system uses a global skb pool, which can lead to inefficient memory usage and waste when targets are disabled or no longer in use. This can result in a significant amount of memory being unnecessarily allocated and retained, potentially causing performance issues and limiting the availability of resources for other system components. Modify the netpoll system to assign a skb pool to each target instead of using a global one. This approach allows for more fine-grained control over memory allocation and deallocation, ensuring that resources are only allocated and retained as needed. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20241114-skb_buffers_v2-v3-1-9be9f52a8b69@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 1 + net/core/netpoll.c | 31 +++++++++++++------------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index cd4e28db0cbd..77635b885c18 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -32,6 +32,7 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; + struct sk_buff_head skb_pool; }; struct netpoll_info { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 94b7f07a952f..719c9aae845f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -45,9 +45,6 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 - -static struct sk_buff_head skb_pool; - #define USEC_PER_POLL 50 #define MAX_SKB_SIZE \ @@ -234,20 +231,23 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); } -static void refill_skbs(void) +static void refill_skbs(struct netpoll *np) { + struct sk_buff_head *skb_pool; struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_pool.lock, flags); - while (skb_pool.qlen < MAX_SKBS) { + skb_pool = &np->skb_pool; + + spin_lock_irqsave(&skb_pool->lock, flags); + while (skb_pool->qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(&skb_pool, skb); + __skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool.lock, flags); + spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) @@ -284,12 +284,12 @@ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) struct sk_buff *skb; zap_completion_queue(); - refill_skbs(); + refill_skbs(np); repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) - skb = skb_dequeue(&skb_pool); + skb = skb_dequeue(&np->skb_pool); if (!skb) { if (++count < 10) { @@ -673,6 +673,8 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; int err; + skb_queue_head_init(&np->skb_pool); + rtnl_lock(); if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; @@ -773,7 +775,7 @@ put_noaddr: } /* fill up the skb queue */ - refill_skbs(); + refill_skbs(np); err = __netpoll_setup(np, ndev); if (err) @@ -792,13 +794,6 @@ unlock: } EXPORT_SYMBOL(netpoll_setup); -static int __init netpoll_init(void) -{ - skb_queue_head_init(&skb_pool); - return 0; -} -core_initcall(netpoll_init); - static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) { struct netpoll_info *npinfo = -- 2.51.0 From 6c59f16f1770481a6ee684720ec55b1e38b3a4b2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 14 Nov 2024 03:00:12 -0800 Subject: [PATCH 10/16] net: netpoll: flush skb pool during cleanup The netpoll subsystem maintains a pool of 32 pre-allocated SKBs per instance, but these SKBs are not freed when the netpoll user is brought down. This leads to memory waste as these buffers remain allocated but unused. Add skb_pool_flush() to properly clean up these SKBs when netconsole is terminated, improving memory efficiency. Signed-off-by: Breno Leitao Link: https://patch.msgid.link/20241114-skb_buffers_v2-v3-2-9be9f52a8b69@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 719c9aae845f..00e1e4a32902 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -531,6 +531,14 @@ static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) return -1; } +static void skb_pool_flush(struct netpoll *np) +{ + struct sk_buff_head *skb_pool; + + skb_pool = &np->skb_pool; + skb_queue_purge_reason(skb_pool, SKB_CONSUMED); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -779,10 +787,12 @@ put_noaddr: err = __netpoll_setup(np, ndev); if (err) - goto put; + goto flush; rtnl_unlock(); return 0; +flush: + skb_pool_flush(np); put: DEBUG_NET_WARN_ON_ONCE(np->dev); if (ip_overwritten) @@ -830,6 +840,8 @@ void __netpoll_cleanup(struct netpoll *np) call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); + + skb_pool_flush(np); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); -- 2.51.0 From fdb53791195ce8fe99ba5e538689f5b3e5968ced Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 14 Nov 2024 19:25:48 +0800 Subject: [PATCH 11/16] rtase: Modify the name of the goto label Modify the name of the goto label in rtase_init_one(). Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241114112549.376101-2-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index f8777b7663d3..874994d9ceb9 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -2115,7 +2115,7 @@ static int rtase_init_one(struct pci_dev *pdev, ret = rtase_alloc_interrupt(pdev, tp); if (ret < 0) { dev_err(&pdev->dev, "unable to alloc MSIX/MSI\n"); - goto err_out_1; + goto err_out_del_napi; } rtase_init_netdev_ops(dev); @@ -2148,7 +2148,7 @@ static int rtase_init_one(struct pci_dev *pdev, GFP_KERNEL); if (!tp->tally_vaddr) { ret = -ENOMEM; - goto err_out; + goto err_out_free_dma; } rtase_tally_counter_clear(tp); @@ -2159,13 +2159,13 @@ static int rtase_init_one(struct pci_dev *pdev, ret = register_netdev(dev); if (ret != 0) - goto err_out; + goto err_out_free_dma; netdev_dbg(dev, "%pM, IRQ %d\n", dev->dev_addr, dev->irq); return 0; -err_out: +err_out_free_dma: if (tp->tally_vaddr) { dma_free_coherent(&pdev->dev, sizeof(*tp->tally_vaddr), @@ -2175,7 +2175,7 @@ err_out: tp->tally_vaddr = NULL; } -err_out_1: +err_out_del_napi: for (i = 0; i < tp->int_nums; i++) { ivec = &tp->int_vector[i]; netif_napi_del(&ivec->napi); -- 2.51.0 From 39007e1c1c7cb28104bf2b8f1f3e73aee3dea4c4 Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Thu, 14 Nov 2024 19:25:49 +0800 Subject: [PATCH 12/16] rtase: Modify the content format of the enum rtase_registers Remove unnecessary spaces. Signed-off-by: Justin Lai Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241114112549.376101-3-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 583c33930f88..942f1e531a85 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -170,7 +170,7 @@ enum rtase_registers { RTASE_INT_MITI_TX = 0x0A00, RTASE_INT_MITI_RX = 0x0A80, - RTASE_VLAN_ENTRY_0 = 0xAC80, + RTASE_VLAN_ENTRY_0 = 0xAC80, }; enum rtase_desc_status_bit { -- 2.51.0 From 4b42fbc6bd8f73d9ded535d8c61ccaa837ff3bd4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:53 +0100 Subject: [PATCH 13/16] ndo_fdb_add: Add a parameter to report whether notification was sent Currently when FDB entries are added to or deleted from a VXLAN netdevice, the VXLAN driver emits one notification, including the VXLAN-specific attributes. The core however always sends a notification as well, a generic one. Thus two notifications are unnecessarily sent for these operations. A similar situation comes up with bridge driver, which also emits notifications on its own: # ip link add name vx type vxlan id 1000 dstport 4789 # bridge monitor fdb & [1] 1981693 # bridge fdb add de:ad:be:ef:13:37 dev vx self dst 192.0.2.1 de:ad:be:ef:13:37 dev vx dst 192.0.2.1 self permanent de:ad:be:ef:13:37 dev vx self permanent In order to prevent this duplicity, add a paremeter to ndo_fdb_add, bool *notified. The flag is primed to false, and if the callee sends a notification on its own, it sets it to true, thus informing the core that it should not generate another notification. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/cbf6ae8195e85cbf922f8058ce4eba770f3b71ed.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- drivers/net/ethernet/intel/ice/ice_main.c | 4 +++- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan/vxlan_core.c | 5 ++++- include/linux/netdevice.h | 5 ++++- net/bridge/br_fdb.c | 12 +++++++----- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 9 ++++++--- 12 files changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 55fb362eb508..ab5febf83ec3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13095,12 +13095,13 @@ static int i40e_get_phys_port_id(struct net_device *netdev, * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation + * @notified: whether notification was emitted * @extack: netlink extended ack, unused currently */ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, + u16 flags, bool *notified, struct netlink_ext_ack *extack) { struct i40e_netdev_priv *np = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a6f586f9bfd1..c875036f654b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6125,12 +6125,14 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation + * @notified: whether notification was emitted * @extack: netlink extended ack */ static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, struct netlink_ext_ack __always_unused *extack) + u16 flags, bool *notified, + struct netlink_ext_ack __always_unused *extack) { int err; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f1d088168723..f0528bd13184 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2486,7 +2486,7 @@ static int igb_set_features(struct net_device *netdev, static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, + u16 flags, bool *notified, struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 8b8404d8c946..adc9392463ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9954,7 +9954,7 @@ static int ixgbe_set_features(struct net_device *netdev, static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, + u16 flags, bool *notified, struct netlink_ext_ack *extack) { /* guarantee we can provide a unique filter for the unicast address */ diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 7c9540a71725..4f15ba2c5525 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -730,7 +730,7 @@ static void ocelot_get_stats64(struct net_device *dev, static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, u16 flags, + u16 vid, u16 flags, bool *notified, struct netlink_ext_ack *extack) { struct ocelot_port_private *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index b3588a1ebc25..2484cebd97d4 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -394,7 +394,7 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *netdev, const unsigned char *addr, u16 vid, u16 flags, - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { struct qlcnic_adapter *adapter = netdev_priv(netdev); int err = 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index edbd5afcec41..dfb462e63248 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1024,7 +1024,7 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev, static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags, + u16 flags, bool *notified, struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 42b07bc2b107..22f17c5c7549 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1241,7 +1241,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags, - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); /* struct net *net = dev_net(vxlan->dev); */ @@ -1277,6 +1277,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], nhid, true, extack); spin_unlock_bh(&vxlan->hash_lock[hash_index]); + if (!err) + *notified = true; + return err; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0aae346d919e..6a7fd191e1ee 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1248,8 +1248,10 @@ struct netdev_net_notifier { * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid, u16 flags, - * struct netlink_ext_ack *extack); + * bool *notified, struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid) @@ -1525,6 +1527,7 @@ struct net_device_ops { const unsigned char *addr, u16 vid, u16 flags, + bool *notified, struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 77f110035df1..5f29958f3ddd 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1152,7 +1152,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { int err = 0; @@ -1183,6 +1183,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } + if (!err) + *notified = true; return err; } @@ -1195,7 +1197,7 @@ static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { struct nlattr *nfea_tb[NFEA_MAX + 1], *attr; struct net_bridge_vlan_group *vg; @@ -1258,10 +1260,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, - extack); + notified, extack); } else { err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, - extack); + notified, extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1273,7 +1275,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb, extack); + nfea_tb, notified, extack); if (err) goto out; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 041f6e571a20..ebfc59049ec1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -858,7 +858,7 @@ int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, - struct netlink_ext_ack *extack); + bool *notified, struct netlink_ext_ack *extack); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 327fa4957929..f31b2436cde5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4578,9 +4578,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; + bool notified = false; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, - nlh->nlmsg_flags, extack); + nlh->nlmsg_flags, ¬ified, extack); if (err) goto out; else @@ -4589,16 +4590,18 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF)) { + bool notified = false; + if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags, - extack); + ¬ified, extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); - if (!err) { + if (!err && !notified) { rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; -- 2.51.0 From 42575ad5aab932273475d1ec3e7881cb5a05420e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:54 +0100 Subject: [PATCH 14/16] ndo_fdb_del: Add a parameter to report whether notification was sent In a similar fashion to ndo_fdb_add, which was covered in the previous patch, add the bool *notified argument to ndo_fdb_del. Callees that send a notification on their own set the flag to true. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/06b1acf4953ef0a5ed153ef1f32d7292044f2be6.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 4 +++- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/vxlan/vxlan_core.c | 5 ++++- include/linux/netdevice.h | 9 +++++++-- net/bridge/br_fdb.c | 15 ++++++++------- net/bridge/br_private.h | 2 +- net/core/rtnetlink.c | 11 ++++++++--- 9 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c875036f654b..b79848fe2a9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6166,12 +6166,14 @@ ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[], * @dev: the net device pointer * @addr: the MAC address entry being added * @vid: VLAN ID + * @notified: whether notification was emitted * @extack: netlink extended ack */ static int ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - __always_unused u16 vid, struct netlink_ext_ack *extack) + __always_unused u16 vid, bool *notified, + struct netlink_ext_ack *extack) { int err; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 4f15ba2c5525..558e03301aa8 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -744,7 +744,7 @@ static int ocelot_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int ocelot_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - struct netlink_ext_ack *extack) + bool *notified, struct netlink_ext_ack *extack) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 2484cebd97d4..eb69121df726 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -367,7 +367,7 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p) static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *netdev, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct qlcnic_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index dfb462e63248..fed4fe2a4748 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1049,7 +1049,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 22f17c5c7549..9ea63059d52d 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1319,7 +1319,7 @@ out: /* Delete entry (via netlink) */ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -1341,6 +1341,9 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], true); spin_unlock_bh(&vxlan->hash_lock[hash_index]); + if (!err) + *notified = true; + return err; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a7fd191e1ee..ecc686409161 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1254,8 +1254,11 @@ struct netdev_net_notifier { * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid) + * const unsigned char *addr, u16 vid + * bool *notified, struct netlink_ext_ack *extack); * Deletes the FDB entry from dev corresponding to addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1533,7 +1536,9 @@ struct net_device_ops { struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, struct netlink_ext_ack *extack); + u16 vid, + bool *notified, + struct netlink_ext_ack *extack); int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 5f29958f3ddd..82bac2426631 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1287,7 +1287,7 @@ out: static int fdb_delete_by_addr_and_port(struct net_bridge *br, const struct net_bridge_port *p, - const u8 *addr, u16 vlan) + const u8 *addr, u16 vlan, bool *notified) { struct net_bridge_fdb_entry *fdb; @@ -1296,18 +1296,19 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br, return -ENOENT; fdb_delete(br, fdb, true); + *notified = true; return 0; } static int __br_fdb_delete(struct net_bridge *br, const struct net_bridge_port *p, - const unsigned char *addr, u16 vid) + const unsigned char *addr, u16 vid, bool *notified) { int err; spin_lock_bh(&br->hash_lock); - err = fdb_delete_by_addr_and_port(br, p, addr, vid); + err = fdb_delete_by_addr_and_port(br, p, addr, vid, notified); spin_unlock_bh(&br->hash_lock); return err; @@ -1316,7 +1317,7 @@ static int __br_fdb_delete(struct net_bridge *br, /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; @@ -1339,19 +1340,19 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], } if (vid) { - err = __br_fdb_delete(br, p, addr, vid); + err = __br_fdb_delete(br, p, addr, vid, notified); } else { struct net_bridge_vlan *v; err = -ENOENT; - err &= __br_fdb_delete(br, p, addr, 0); + err &= __br_fdb_delete(br, p, addr, 0, notified); if (!vg || !vg->num_vlans) return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - err &= __br_fdb_delete(br, p, addr, v->vid); + err &= __br_fdb_delete(br, p, addr, v->vid, notified); } } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index ebfc59049ec1..9853cfbb9d14 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -853,7 +853,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - struct netlink_ext_ack *extack); + bool *notified, struct netlink_ext_ack *extack); int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f31b2436cde5..dd142f444659 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4701,11 +4701,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); + bool notified = false; ops = br_dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); } else { if (ops->ndo_fdb_del_bulk) err = ops->ndo_fdb_del_bulk(nlh, dev, extack); @@ -4719,10 +4721,13 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { + bool notified = false; + ops = dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) - err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); + err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, + ¬ified, extack); else err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); } else { @@ -4733,7 +4738,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!err) { - if (!del_bulk) + if (!del_bulk && !notified) rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; -- 2.51.0 From b219bcfcc92e9bd50c6277ac68cb75f64b403e5e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:55 +0100 Subject: [PATCH 15/16] selftests: net: lib: Move logging from forwarding/lib.sh here Many net selftests invent their own logging helpers. These really should be in a library sourced by these tests. Currently forwarding/lib.sh has a suite of perfectly fine logging helpers, but sourcing a forwarding/ library from a higher-level directory smells of layering violation. In this patch, move the logging helpers to net/lib.sh so that every net test can use them. Together with the logging helpers, it's also necessary to move pause_on_fail(), and EXIT_STATUS and RET. Existing lib.sh users might be using these same names for their functions or variables. However lib.sh is always sourced near the top of the file (checked), and whatever new definitions will simply override the ones provided by lib.sh. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Acked-by: Shuah Khan Link: https://patch.msgid.link/edd3785a3bd72ffbe1409300989e993ee50ae98b.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 113 ----------------- tools/testing/selftests/net/lib.sh | 115 ++++++++++++++++++ 2 files changed, 115 insertions(+), 113 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 89c25f72b10c..41dd14c42c48 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -48,7 +48,6 @@ declare -A NETIFS=( : "${WAIT_TIME:=5}" # Whether to pause on, respectively, after a failure and before cleanup. -: "${PAUSE_ON_FAIL:=no}" : "${PAUSE_ON_CLEANUP:=no}" # Whether to create virtual interfaces, and what netdevice type they should be. @@ -446,22 +445,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -ret_set_ksft_status() -{ - local ksft_status=$1; shift - local msg=$1; shift - - RET=$(ksft_status_merge $RET $ksft_status) - if (( $? )); then - retmsg=$msg - fi -} - # Whether FAILs should be interpreted as XFAILs. Internal. FAIL_TO_XFAIL= @@ -535,102 +518,6 @@ xfail_on_veth() fi } -log_test_result() -{ - local test_name=$1; shift - local opt_str=$1; shift - local result=$1; shift - local retmsg=$1; shift - - printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" - if [[ $retmsg ]]; then - printf "\t%s\n" "$retmsg" - fi -} - -pause_on_fail() -{ - if [[ $PAUSE_ON_FAIL == yes ]]; then - echo "Hit enter to continue, 'q' to quit" - read a - [[ $a == q ]] && exit 1 - fi -} - -handle_test_result_pass() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" " OK " -} - -handle_test_result_fail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" FAIL "$retmsg" - pause_on_fail -} - -handle_test_result_xfail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" - pause_on_fail -} - -handle_test_result_skip() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" SKIP "$retmsg" -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if ((RET == ksft_pass)); then - handle_test_result_pass "$test_name" "$opt_str" - elif ((RET == ksft_xfail)); then - handle_test_result_xfail "$test_name" "$opt_str" - elif ((RET == ksft_skip)); then - handle_test_result_skip "$test_name" "$opt_str" - else - handle_test_result_fail "$test_name" "$opt_str" - fi - - EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) - return $RET -} - -log_test_skip() -{ - RET=$ksft_skip retmsg= log_test "$@" -} - -log_test_xfail() -{ - RET=$ksft_xfail retmsg= log_test "$@" -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c8991cc6bf28..691318b1ec55 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -9,6 +9,9 @@ source "$net_dir/lib/sh/defer.sh" : "${WAIT_TIMEOUT:=20}" +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms # Kselftest framework constants. @@ -20,6 +23,11 @@ ksft_skip=4 # namespace list created by setup_ns NS_LIST=() +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + ############################################################################## # Helpers @@ -236,3 +244,110 @@ tc_rule_handle_stats_get() | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1; shift + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} -- 2.51.0 From 601d9d70a40a8ccf93f41a153dd4c9aa1db60d57 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 14 Nov 2024 15:09:56 +0100 Subject: [PATCH 16/16] selftests: net: lib: Move tests_run from forwarding/lib.sh here It would be good to use the same mechanism for scheduling and dispatching general net tests as the many forwarding tests already use. To that end, move the logging helpers to net/lib.sh so that every net test can use them. Existing lib.sh users might be using the name themselves. However lib.sh is always sourced near the top of the file (checked), and whatever new definition will simply override the one provided by lib.sh. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Acked-by: Shuah Khan Link: https://patch.msgid.link/a6fc083486493425b2c61185c327845b6ce3233a.1731589511.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 10 ---------- tools/testing/selftests/net/lib.sh | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 41dd14c42c48..d28dbf27c1f0 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1285,16 +1285,6 @@ matchall_sink_create() action drop } -tests_run() -{ - local current_test - - for current_test in ${TESTS:-$ALL_TESTS}; do - in_defer_scope \ - $current_test - done -} - cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 691318b1ec55..4f52b8e48a3a 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -351,3 +351,13 @@ log_info() echo "INFO: $msg" } + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + in_defer_scope \ + $current_test + done +} -- 2.51.0