From 51de3600093429e3b712e5f091d767babc5dd6df Mon Sep 17 00:00:00 2001 From: Ying Lu Date: Wed, 2 Apr 2025 16:58:59 +0800 Subject: [PATCH 01/16] usbnet:fix NPE during rx_complete MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Missing usbnet_going_away Check in Critical Path. The usb_submit_urb function lacks a usbnet_going_away validation, whereas __usbnet_queue_skb includes this check. This inconsistency creates a race condition where: A URB request may succeed, but the corresponding SKB data fails to be queued. Subsequent processes: (e.g., rx_complete → defer_bh → __skb_unlink(skb, list)) attempt to access skb->next, triggering a NULL pointer dereference (Kernel Panic). Fixes: 04e906839a05 ("usbnet: fix cyclical race on disconnect with work queue") Cc: stable@vger.kernel.org Signed-off-by: Ying Lu Link: https://patch.msgid.link/4c9ef2efaa07eb7f9a5042b74348a67e5a3a7aea.1743584159.git.luying1@xiaomi.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index aeab2308b150..724b93aa4f7e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -530,7 +530,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) netif_device_present (dev->net) && test_bit(EVENT_DEV_OPEN, &dev->flags) && !test_bit (EVENT_RX_HALT, &dev->flags) && - !test_bit (EVENT_DEV_ASLEEP, &dev->flags)) { + !test_bit (EVENT_DEV_ASLEEP, &dev->flags) && + !usbnet_going_away(dev)) { switch (retval = usb_submit_urb (urb, GFP_ATOMIC)) { case -EPIPE: usbnet_defer_kevent (dev, EVENT_RX_HALT); @@ -551,8 +552,7 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags) tasklet_schedule (&dev->bh); break; case 0: - if (!usbnet_going_away(dev)) - __usbnet_queue_skb(&dev->rxq, skb, rx_start); + __usbnet_queue_skb(&dev->rxq, skb, rx_start); } } else { netif_dbg(dev, ifdown, dev->net, "rx: stopped\n"); -- 2.50.1 From 4d0ab3a6885e3e9040310a8d8f54503366083626 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:23 +0300 Subject: [PATCH 02/16] ipv6: Start path selection from the first nexthop Cited commit transitioned IPv6 path selection to use hash-threshold instead of modulo-N. With hash-threshold, each nexthop is assigned a region boundary in the multipath hash function's output space and a nexthop is chosen if the calculated hash is smaller than the nexthop's region boundary. Hash-threshold does not work correctly if path selection does not start with the first nexthop. For example, if fib6_select_path() is always passed the last nexthop in the group, then it will always be chosen because its region boundary covers the entire hash function's output space. Fix this by starting the selection process from the first nexthop and do not consider nexthops for which rt6_score_route() provided a negative score. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Reported-by: Stanislav Fomichev Closes: https://lore.kernel.org/netdev/Z9RIyKZDNoka53EO@mini-arch/ Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250402114224.293392-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3406a0d45bd..864f0002034b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -412,11 +412,35 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } +static struct fib6_info * +rt6_multipath_first_sibling_rcu(const struct fib6_info *rt) +{ + struct fib6_info *iter; + struct fib6_node *fn; + + fn = rcu_dereference(rt->fib6_node); + if (!fn) + goto out; + iter = rcu_dereference(fn->leaf); + if (!iter) + goto out; + + while (iter) { + if (iter->fib6_metric == rt->fib6_metric && + rt6_qualify_for_ecmp(iter)) + return iter; + iter = rcu_dereference(iter->fib6_next); + } + +out: + return NULL; +} + void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *match = res->f6i; + struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) @@ -440,10 +464,18 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, return; } - if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) + first = rt6_multipath_first_sibling_rcu(match); + if (!first) goto out; - list_for_each_entry_rcu(sibling, &match->fib6_siblings, + if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) { + match = first; + goto out; + } + + list_for_each_entry_rcu(sibling, &first->fib6_siblings, fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; -- 2.50.1 From 8b8e0dd357165e0258d9f9cdab5366720ed2f619 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:24 +0300 Subject: [PATCH 03/16] ipv6: Do not consider link down nexthops in path selection Nexthops whose link is down are not supposed to be considered during path selection when the "ignore_routes_with_linkdown" sysctl is set. This is done by assigning them a negative region boundary. However, when comparing the computed hash (unsigned) with the region boundary (signed), the negative region boundary is treated as unsigned, resulting in incorrect nexthop selection. Fix by treating the computed hash as signed. Note that the computed hash is always in range of [0, 2^31 - 1]. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Signed-off-by: Ido Schimmel Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250402114224.293392-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 864f0002034b..ab12b816ab94 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -442,6 +442,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, { struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; + int hash; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -468,7 +469,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (!first) goto out; - if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + hash = fl6->mp_hash; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && rt6_score_route(first->fib6_nh, first->fib6_flags, oif, strict) >= 0) { match = first; @@ -481,7 +483,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, int nh_upper_bound; nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); - if (fl6->mp_hash > nh_upper_bound) + if (hash > nh_upper_bound) continue; if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break; -- 2.50.1 From fda8c491db2a90ff3e6fbbae58e495b4ddddeca3 Mon Sep 17 00:00:00 2001 From: Henry Martin Date: Wed, 2 Apr 2025 21:50:36 +0800 Subject: [PATCH 04/16] arcnet: Add NULL check in com20020pci_probe() devm_kasprintf() returns NULL when memory allocation fails. Currently, com20020pci_probe() does not check for this case, which results in a NULL pointer dereference. Add NULL check after devm_kasprintf() to prevent this issue and ensure no resources are left allocated. Fixes: 6b17a597fc2f ("arcnet: restoring support for multiple Sohard Arcnet cards") Signed-off-by: Henry Martin Link: https://patch.msgid.link/20250402135036.44697-1-bsdhenrymartin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/com20020-pci.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index c5e571ec94c9..0472bcdff130 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -251,18 +251,33 @@ static int com20020pci_probe(struct pci_dev *pdev, card->tx_led.default_trigger = devm_kasprintf(&pdev->dev, GFP_KERNEL, "arc%d-%d-tx", dev->dev_id, i); + if (!card->tx_led.default_trigger) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->tx_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "pci:green:tx:%d-%d", dev->dev_id, i); - + if (!card->tx_led.name) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->tx_led.dev = &dev->dev; card->recon_led.brightness_set = led_recon_set; card->recon_led.default_trigger = devm_kasprintf(&pdev->dev, GFP_KERNEL, "arc%d-%d-recon", dev->dev_id, i); + if (!card->recon_led.default_trigger) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->recon_led.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "pci:red:recon:%d-%d", dev->dev_id, i); + if (!card->recon_led.name) { + ret = -ENOMEM; + goto err_free_arcdev; + } card->recon_led.dev = &dev->dev; ret = devm_led_classdev_register(&pdev->dev, &card->tx_led); -- 2.50.1 From 053f3ff67d7feefc75797863f3d84b47ad47086f Mon Sep 17 00:00:00 2001 From: Dave Marquardt Date: Wed, 2 Apr 2025 10:44:03 -0500 Subject: [PATCH 05/16] net: ibmveth: make veth_pool_store stop hanging v2: - Created a single error handling unlock and exit in veth_pool_store - Greatly expanded commit message with previous explanatory-only text Summary: Use rtnl_mutex to synchronize veth_pool_store with itself, ibmveth_close and ibmveth_open, preventing multiple calls in a row to napi_disable. Background: Two (or more) threads could call veth_pool_store through writing to /sys/devices/vio/30000002/pool*/*. You can do this easily with a little shell script. This causes a hang. I configured LOCKDEP, compiled ibmveth.c with DEBUG, and built a new kernel. I ran this test again and saw: Setting pool0/active to 0 Setting pool1/active to 1 [ 73.911067][ T4365] ibmveth 30000002 eth0: close starting Setting pool1/active to 1 Setting pool1/active to 0 [ 73.911367][ T4366] ibmveth 30000002 eth0: close starting [ 73.916056][ T4365] ibmveth 30000002 eth0: close complete [ 73.916064][ T4365] ibmveth 30000002 eth0: open starting [ 110.808564][ T712] systemd-journald[712]: Sent WATCHDOG=1 notification. [ 230.808495][ T712] systemd-journald[712]: Sent WATCHDOG=1 notification. [ 243.683786][ T123] INFO: task stress.sh:4365 blocked for more than 122 seconds. [ 243.683827][ T123] Not tainted 6.14.0-01103-g2df0c02dab82-dirty #8 [ 243.683833][ T123] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 243.683838][ T123] task:stress.sh state:D stack:28096 pid:4365 tgid:4365 ppid:4364 task_flags:0x400040 flags:0x00042000 [ 243.683852][ T123] Call Trace: [ 243.683857][ T123] [c00000000c38f690] [0000000000000001] 0x1 (unreliable) [ 243.683868][ T123] [c00000000c38f840] [c00000000001f908] __switch_to+0x318/0x4e0 [ 243.683878][ T123] [c00000000c38f8a0] [c000000001549a70] __schedule+0x500/0x12a0 [ 243.683888][ T123] [c00000000c38f9a0] [c00000000154a878] schedule+0x68/0x210 [ 243.683896][ T123] [c00000000c38f9d0] [c00000000154ac80] schedule_preempt_disabled+0x30/0x50 [ 243.683904][ T123] [c00000000c38fa00] [c00000000154dbb0] __mutex_lock+0x730/0x10f0 [ 243.683913][ T123] [c00000000c38fb10] [c000000001154d40] napi_enable+0x30/0x60 [ 243.683921][ T123] [c00000000c38fb40] [c000000000f4ae94] ibmveth_open+0x68/0x5dc [ 243.683928][ T123] [c00000000c38fbe0] [c000000000f4aa20] veth_pool_store+0x220/0x270 [ 243.683936][ T123] [c00000000c38fc70] [c000000000826278] sysfs_kf_write+0x68/0xb0 [ 243.683944][ T123] [c00000000c38fcb0] [c0000000008240b8] kernfs_fop_write_iter+0x198/0x2d0 [ 243.683951][ T123] [c00000000c38fd00] [c00000000071b9ac] vfs_write+0x34c/0x650 [ 243.683958][ T123] [c00000000c38fdc0] [c00000000071bea8] ksys_write+0x88/0x150 [ 243.683966][ T123] [c00000000c38fe10] [c0000000000317f4] system_call_exception+0x124/0x340 [ 243.683973][ T123] [c00000000c38fe50] [c00000000000d05c] system_call_vectored_common+0x15c/0x2ec ... [ 243.684087][ T123] Showing all locks held in the system: [ 243.684095][ T123] 1 lock held by khungtaskd/123: [ 243.684099][ T123] #0: c00000000278e370 (rcu_read_lock){....}-{1:2}, at: debug_show_all_locks+0x50/0x248 [ 243.684114][ T123] 4 locks held by stress.sh/4365: [ 243.684119][ T123] #0: c00000003a4cd3f8 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x88/0x150 [ 243.684132][ T123] #1: c000000041aea888 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x154/0x2d0 [ 243.684143][ T123] #2: c0000000366fb9a8 (kn->active#64){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x160/0x2d0 [ 243.684155][ T123] #3: c000000035ff4cb8 (&dev->lock){+.+.}-{3:3}, at: napi_enable+0x30/0x60 [ 243.684166][ T123] 5 locks held by stress.sh/4366: [ 243.684170][ T123] #0: c00000003a4cd3f8 (sb_writers#3){.+.+}-{0:0}, at: ksys_write+0x88/0x150 [ 243.684183][ T123] #1: c00000000aee2288 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x154/0x2d0 [ 243.684194][ T123] #2: c0000000366f4ba8 (kn->active#64){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x160/0x2d0 [ 243.684205][ T123] #3: c000000035ff4cb8 (&dev->lock){+.+.}-{3:3}, at: napi_disable+0x30/0x60 [ 243.684216][ T123] #4: c0000003ff9bbf18 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0x138/0x12a0 From the ibmveth debug, two threads are calling veth_pool_store, which calls ibmveth_close and ibmveth_open. Here's the sequence: T4365 T4366 ----------------- ----------------- --------- veth_pool_store veth_pool_store ibmveth_close ibmveth_close napi_disable napi_disable ibmveth_open napi_enable <- HANG ibmveth_close calls napi_disable at the top and ibmveth_open calls napi_enable at the top. https://docs.kernel.org/networking/napi.html]] says The control APIs are not idempotent. Control API calls are safe against concurrent use of datapath APIs but an incorrect sequence of control API calls may result in crashes, deadlocks, or race conditions. For example, calling napi_disable() multiple times in a row will deadlock. In the normal open and close paths, rtnl_mutex is acquired to prevent other callers. This is missing from veth_pool_store. Use rtnl_mutex in veth_pool_store fixes these hangs. Signed-off-by: Dave Marquardt Fixes: 860f242eb534 ("[PATCH] ibmveth change buffer pools dynamically") Reviewed-by: Nick Child Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250402154403.386744-1-davemarq@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 39 +++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index b619a3ec245b..04192190beba 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1802,18 +1802,22 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, long value = simple_strtol(buf, NULL, 10); long rc; + rtnl_lock(); + if (attr == &veth_active_attr) { if (value && !pool->active) { if (netif_running(netdev)) { if (ibmveth_alloc_buffer_pool(pool)) { netdev_err(netdev, "unable to alloc pool\n"); - return -ENOMEM; + rc = -ENOMEM; + goto unlock_err; } pool->active = 1; ibmveth_close(netdev); - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->active = 1; } @@ -1833,48 +1837,59 @@ static ssize_t veth_pool_store(struct kobject *kobj, struct attribute *attr, if (i == IBMVETH_NUM_BUFF_POOLS) { netdev_err(netdev, "no active pool >= MTU\n"); - return -EPERM; + rc = -EPERM; + goto unlock_err; } if (netif_running(netdev)) { ibmveth_close(netdev); pool->active = 0; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } pool->active = 0; } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) { - return -EINVAL; + rc = -EINVAL; + goto unlock_err; } else { if (netif_running(netdev)) { ibmveth_close(netdev); pool->size = value; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->size = value; } } } else if (attr == &veth_size_attr) { if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE) { - return -EINVAL; + rc = -EINVAL; + goto unlock_err; } else { if (netif_running(netdev)) { ibmveth_close(netdev); pool->buff_size = value; - if ((rc = ibmveth_open(netdev))) - return rc; + rc = ibmveth_open(netdev); + if (rc) + goto unlock_err; } else { pool->buff_size = value; } } } + rtnl_unlock(); /* kick the interrupt handler to allocate/deallocate pools */ ibmveth_interrupt(netdev->irq, netdev); return count; + +unlock_err: + rtnl_unlock(); + return rc; } -- 2.50.1 From ec304b70d46bd2ed66541c5b57b63276529e05b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:34:04 -0700 Subject: [PATCH 06/16] net: move mp dev config validation to __net_mp_open_rxq() devmem code performs a number of safety checks to avoid having to reimplement all of them in the drivers. Move those to __net_mp_open_rxq() and reuse that function for binding to make sure that io_uring ZC also benefits from them. While at it rename the queue ID variable to rxq_idx in __net_mp_open_rxq(), we touch most of the relevant lines. The XArray insertion is reordered after the netdev_rx_queue_restart() call, otherwise we'd need to duplicate the queue index check or risk inserting an invalid pointer. The XArray allocation failures should be extremely rare. Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Fixes: 6e18ed929d3b ("net: add helpers for setting a memory provider on an rx queue") Link: https://patch.msgid.link/20250403013405.2827250-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/memory_provider.h | 6 +++ net/core/devmem.c | 50 +++++-------------------- net/core/netdev-genl.c | 6 --- net/core/netdev_rx_queue.c | 49 ++++++++++++++++++------ 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/include/net/page_pool/memory_provider.h b/include/net/page_pool/memory_provider.h index b3e665897767..ada4f968960a 100644 --- a/include/net/page_pool/memory_provider.h +++ b/include/net/page_pool/memory_provider.h @@ -6,6 +6,7 @@ #include struct netdev_rx_queue; +struct netlink_ext_ack; struct sk_buff; struct memory_provider_ops { @@ -24,8 +25,13 @@ void net_mp_niov_clear_page_pool(struct net_iov *niov); int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *p); +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack); void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, struct pp_memory_provider_params *old_p); +void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p); /** * net_mp_netmem_place_in_cache() - give a netmem to a page pool diff --git a/net/core/devmem.c b/net/core/devmem.c index ee145a2aa41c..f2ce3c2ebc97 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,7 +8,6 @@ */ #include -#include #include #include #include @@ -143,57 +142,28 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack) { + struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; struct netdev_rx_queue *rxq; u32 xa_idx; int err; - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { - NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); - return -EINVAL; - } - - if (dev->cfg->hds_thresh) { - NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); - return -EINVAL; - } + err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack); + if (err) + return err; rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_ops) { - NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); - return -EEXIST; - } - -#ifdef CONFIG_XDP_SOCKETS - if (rxq->pool) { - NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); - return -EBUSY; - } -#endif - err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, GFP_KERNEL); if (err) - return err; - - rxq->mp_params.mp_priv = binding; - rxq->mp_params.mp_ops = &dmabuf_devmem_ops; - - err = netdev_rx_queue_restart(dev, rxq_idx); - if (err) - goto err_xa_erase; + goto err_close_rxq; return 0; -err_xa_erase: - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; - xa_erase(&binding->bound_rxqs, xa_idx); - +err_close_rxq: + __net_mp_close_rxq(dev, rxq_idx, &mp_params); return err; } diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 3afeaa8c5dc5..5d7af50fe702 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -874,12 +874,6 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock; } - if (dev_xdp_prog_count(netdev)) { - NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); - err = -EEXIST; - goto err_unlock; - } - binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); if (IS_ERR(binding)) { err = PTR_ERR(binding); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 3af716f77a13..556b5393ec9f 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include #include @@ -86,8 +87,9 @@ err_free_new_mem: } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *p) +int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { struct netdev_rx_queue *rxq; int ret; @@ -95,16 +97,41 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, if (!netdev_need_ops_lock(dev)) return -EOPNOTSUPP; - if (ifq_idx >= dev->real_num_rx_queues) + if (rxq_idx >= dev->real_num_rx_queues) return -EINVAL; - ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - rxq = __netif_get_rx_queue(dev, ifq_idx); - if (rxq->mp_params.mp_ops) + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + if (dev->cfg->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + if (dev_xdp_prog_count(dev)) { + NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); return -EEXIST; + } + + rxq = __netif_get_rx_queue(dev, rxq_idx); + if (rxq->mp_params.mp_ops) { + NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); + return -EEXIST; + } +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) { + NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); + return -EBUSY; + } +#endif rxq->mp_params = *p; - ret = netdev_rx_queue_restart(dev, ifq_idx); + ret = netdev_rx_queue_restart(dev, rxq_idx); if (ret) { rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; @@ -112,19 +139,19 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, return ret; } -int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, +int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, struct pp_memory_provider_params *p) { int ret; netdev_lock(dev); - ret = __net_mp_open_rxq(dev, ifq_idx, p); + ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL); netdev_unlock(dev); return ret; } -static void __net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p) +void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; -- 2.50.1 From 34f71de3f548eba0604c9cbabc1eb68b2f81fa0f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:34:05 -0700 Subject: [PATCH 07/16] net: avoid false positive warnings in __net_mp_close_rxq() Commit under Fixes solved the problem of spurious warnings when we uninstall an MP from a device while its down. The __net_mp_close_rxq() which is used by io_uring was not fixed. Move the fix over and reuse __net_mp_close_rxq() in the devmem path. Acked-by: Stanislav Fomichev Fixes: a70f891e0fa0 ("net: devmem: do not WARN conditionally after netdev_rx_queue_restart()") Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250403013405.2827250-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 12 +++++------- net/core/netdev_rx_queue.c | 4 +++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index f2ce3c2ebc97..6e27a47d0493 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -116,21 +116,19 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) struct netdev_rx_queue *rxq; unsigned long xa_idx; unsigned int rxq_idx; - int err; if (binding->list.next) list_del(&binding->list); xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { - WARN_ON(rxq->mp_params.mp_priv != binding); - - rxq->mp_params.mp_priv = NULL; - rxq->mp_params.mp_ops = NULL; + const struct pp_memory_provider_params mp_params = { + .mp_priv = binding, + .mp_ops = &dmabuf_devmem_ops, + }; rxq_idx = get_netdev_rx_queue_index(rxq); - err = netdev_rx_queue_restart(binding->dev, rxq_idx); - WARN_ON(err && err != -ENETDOWN); + __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } xa_erase(&net_devmem_dmabuf_bindings, binding->id); diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 556b5393ec9f..d126f10197bf 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -154,6 +154,7 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, const struct pp_memory_provider_params *old_p) { struct netdev_rx_queue *rxq; + int err; if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) return; @@ -173,7 +174,8 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, rxq->mp_params.mp_ops = NULL; rxq->mp_params.mp_priv = NULL; - WARN_ON(netdev_rx_queue_restart(dev, ifq_idx)); + err = netdev_rx_queue_restart(dev, ifq_idx); + WARN_ON(err && err != -ENETDOWN); } void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, -- 2.50.1 From 0802c32d4b03a26604c2db2c8a63b34a80361305 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:03 -0700 Subject: [PATCH 08/16] netlink: specs: rt_addr: fix the spec format / schema failures The spec is mis-formatted, schema validation says: Failed validating 'type' in schema['properties']['operations']['properties']['list']['items']['properties']['dump']['properties']['request']['properties']['value']: {'minimum': 0, 'type': 'integer'} On instance['operations']['list'][3]['dump']['request']['value']: '58 - ifa-family' The ifa-family clearly wants to be part of an attribute list. Reviewed-by: Jacob Keller Reviewed-by: Donald Hunter Reviewed-by: Yuyang Huang Fixes: 4f280376e531 ("selftests/net: Add selftest for IPv4 RTM_GETMULTICAST support") Link: https://patch.msgid.link/20250403013706.2828322-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 5dd5469044c7..3bc9b6f9087e 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -187,6 +187,7 @@ operations: dump: request: value: 58 + attributes: - ifa-family reply: value: 58 -- 2.50.1 From 524c03585fda36584cc7ada49a1827666d37eb4e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:04 -0700 Subject: [PATCH 09/16] netlink: specs: rt_addr: fix get multi command name Command names should match C defines, codegens may depend on it. Reviewed-by: Jacob Keller Fixes: 4f280376e531 ("selftests/net: Add selftest for IPv4 RTM_GETMULTICAST support") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 2 +- tools/testing/selftests/net/rtnetlink.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 3bc9b6f9087e..1650dc3f091a 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -169,7 +169,7 @@ operations: value: 20 attributes: *ifaddr-all - - name: getmaddrs + name: getmulticast doc: Get / dump IPv4/IPv6 multicast addresses. attribute-set: addr-attrs fixed-header: ifaddrmsg diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py index 80950888800b..69436415d56e 100755 --- a/tools/testing/selftests/net/rtnetlink.py +++ b/tools/testing/selftests/net/rtnetlink.py @@ -12,7 +12,7 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: At least the loopback interface should have this address. """ - addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) all_host_multicasts = [ addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST -- 2.50.1 From 0c8e30252d9fe8127f90b7a0a293872b368ebf3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:05 -0700 Subject: [PATCH 10/16] netlink: specs: rt_addr: pull the ifa- prefix out of the names YAML specs don't normally include the C prefix name in the name of the YAML attr. Remove the ifa- prefix from all attributes in addr-attrs and specify name-prefix instead. This is a bit risky, hopefully there aren't many users out there. Fixes: dfb0f7d9d979 ("doc/netlink: Add spec for rt addr messages") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_addr.yaml | 39 ++++++++++++------------ tools/testing/selftests/net/rtnetlink.py | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index 1650dc3f091a..df6b23f06a22 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -78,45 +78,46 @@ definitions: attribute-sets: - name: addr-attrs + name-prefix: ifa- attributes: - - name: ifa-address + name: address type: binary display-hint: ipv4 - - name: ifa-local + name: local type: binary display-hint: ipv4 - - name: ifa-label + name: label type: string - - name: ifa-broadcast + name: broadcast type: binary display-hint: ipv4 - - name: ifa-anycast + name: anycast type: binary - - name: ifa-cacheinfo + name: cacheinfo type: binary struct: ifa-cacheinfo - - name: ifa-multicast + name: multicast type: binary - - name: ifa-flags + name: flags type: u32 enum: ifa-flags enum-as-flags: true - - name: ifa-rt-priority + name: rt-priority type: u32 - - name: ifa-target-netnsid + name: target-netnsid type: binary - - name: ifa-proto + name: proto type: u8 @@ -137,10 +138,10 @@ operations: - ifa-prefixlen - ifa-scope - ifa-index - - ifa-address - - ifa-label - - ifa-local - - ifa-cacheinfo + - address + - label + - local + - cacheinfo - name: deladdr doc: Remove address @@ -154,8 +155,8 @@ operations: - ifa-prefixlen - ifa-scope - ifa-index - - ifa-address - - ifa-local + - address + - local - name: getaddr doc: Dump address information. @@ -182,8 +183,8 @@ operations: reply: value: 58 attributes: &mcaddr-attrs - - ifa-multicast - - ifa-cacheinfo + - multicast + - cacheinfo dump: request: value: 58 diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py index 69436415d56e..e9ad5e88da97 100755 --- a/tools/testing/selftests/net/rtnetlink.py +++ b/tools/testing/selftests/net/rtnetlink.py @@ -15,7 +15,7 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) all_host_multicasts = [ - addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST ] ksft_ge(len(all_host_multicasts), 1, -- 2.50.1 From 1a1eba0e9899c286914032c78708c614b016704b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Apr 2025 18:37:06 -0700 Subject: [PATCH 11/16] netlink: specs: rt_route: pull the ifa- prefix out of the names YAML specs don't normally include the C prefix name in the name of the YAML attr. Remove the ifa- prefix from all attributes in route-attrs and metrics and specify name-prefix instead. This is a bit risky, hopefully there aren't many users out there. Fixes: 023289b4f582 ("doc/netlink: Add spec for rt route messages") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250403013706.2828322-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_route.yaml | 180 +++++++++++----------- 1 file changed, 91 insertions(+), 89 deletions(-) diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt_route.yaml index a674103e5bc4..292469c7d4b9 100644 --- a/Documentation/netlink/specs/rt_route.yaml +++ b/Documentation/netlink/specs/rt_route.yaml @@ -80,165 +80,167 @@ definitions: attribute-sets: - name: route-attrs + name-prefix: rta- attributes: - - name: rta-dst + name: dst type: binary display-hint: ipv4 - - name: rta-src + name: src type: binary display-hint: ipv4 - - name: rta-iif + name: iif type: u32 - - name: rta-oif + name: oif type: u32 - - name: rta-gateway + name: gateway type: binary display-hint: ipv4 - - name: rta-priority + name: priority type: u32 - - name: rta-prefsrc + name: prefsrc type: binary display-hint: ipv4 - - name: rta-metrics + name: metrics type: nest - nested-attributes: rta-metrics + nested-attributes: metrics - - name: rta-multipath + name: multipath type: binary - - name: rta-protoinfo # not used + name: protoinfo # not used type: binary - - name: rta-flow + name: flow type: u32 - - name: rta-cacheinfo + name: cacheinfo type: binary struct: rta-cacheinfo - - name: rta-session # not used + name: session # not used type: binary - - name: rta-mp-algo # not used + name: mp-algo # not used type: binary - - name: rta-table + name: table type: u32 - - name: rta-mark + name: mark type: u32 - - name: rta-mfc-stats + name: mfc-stats type: binary - - name: rta-via + name: via type: binary - - name: rta-newdst + name: newdst type: binary - - name: rta-pref + name: pref type: u8 - - name: rta-encap-type + name: encap-type type: u16 - - name: rta-encap + name: encap type: binary # tunnel specific nest - - name: rta-expires + name: expires type: u32 - - name: rta-pad + name: pad type: binary - - name: rta-uid + name: uid type: u32 - - name: rta-ttl-propagate + name: ttl-propagate type: u8 - - name: rta-ip-proto + name: ip-proto type: u8 - - name: rta-sport + name: sport type: u16 - - name: rta-dport + name: dport type: u16 - - name: rta-nh-id + name: nh-id type: u32 - - name: rta-flowlabel + name: flowlabel type: u32 byte-order: big-endian display-hint: hex - - name: rta-metrics + name: metrics + name-prefix: rtax- attributes: - - name: rtax-unspec + name: unspec type: unused value: 0 - - name: rtax-lock + name: lock type: u32 - - name: rtax-mtu + name: mtu type: u32 - - name: rtax-window + name: window type: u32 - - name: rtax-rtt + name: rtt type: u32 - - name: rtax-rttvar + name: rttvar type: u32 - - name: rtax-ssthresh + name: ssthresh type: u32 - - name: rtax-cwnd + name: cwnd type: u32 - - name: rtax-advmss + name: advmss type: u32 - - name: rtax-reordering + name: reordering type: u32 - - name: rtax-hoplimit + name: hoplimit type: u32 - - name: rtax-initcwnd + name: initcwnd type: u32 - - name: rtax-features + name: features type: u32 - - name: rtax-rto-min + name: rto-min type: u32 - - name: rtax-initrwnd + name: initrwnd type: u32 - - name: rtax-quickack + name: quickack type: u32 - - name: rtax-cc-algo + name: cc-algo type: string - - name: rtax-fastopen-no-cookie + name: fastopen-no-cookie type: u32 operations: @@ -254,18 +256,18 @@ operations: value: 26 attributes: - rtm-family - - rta-src + - src - rtm-src-len - - rta-dst + - dst - rtm-dst-len - - rta-iif - - rta-oif - - rta-ip-proto - - rta-sport - - rta-dport - - rta-mark - - rta-uid - - rta-flowlabel + - iif + - oif + - ip-proto + - sport + - dport + - mark + - uid + - flowlabel reply: value: 24 attributes: &all-route-attrs @@ -278,34 +280,34 @@ operations: - rtm-scope - rtm-type - rtm-flags - - rta-dst - - rta-src - - rta-iif - - rta-oif - - rta-gateway - - rta-priority - - rta-prefsrc - - rta-metrics - - rta-multipath - - rta-flow - - rta-cacheinfo - - rta-table - - rta-mark - - rta-mfc-stats - - rta-via - - rta-newdst - - rta-pref - - rta-encap-type - - rta-encap - - rta-expires - - rta-pad - - rta-uid - - rta-ttl-propagate - - rta-ip-proto - - rta-sport - - rta-dport - - rta-nh-id - - rta-flowlabel + - dst + - src + - iif + - oif + - gateway + - priority + - prefsrc + - metrics + - multipath + - flow + - cacheinfo + - table + - mark + - mfc-stats + - via + - newdst + - pref + - encap-type + - encap + - expires + - pad + - uid + - ttl-propagate + - ip-proto + - sport + - dport + - nh-id + - flowlabel dump: request: value: 26 -- 2.50.1 From 94f68c0f99a548d33a102672690100bf76a7c460 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Apr 2025 07:56:36 -0700 Subject: [PATCH 12/16] selftests: net: amt: indicate progress in the stress test Our CI expects output from the test at least once every 10 minutes. The AMT test when running on debug kernel is just on the edge of that time for the stress test. Improve the output: - print the name of the test first, before starting it, - output a dot every 10% of the way. Output after: TEST: amt discovery [ OK ] TEST: IPv4 amt multicast forwarding [ OK ] TEST: IPv6 amt multicast forwarding [ OK ] TEST: IPv4 amt traffic forwarding torture .......... [ OK ] TEST: IPv6 amt traffic forwarding torture .......... [ OK ] Reviewed-by: Taehee Yoo Link: https://patch.msgid.link/20250403145636.2891166-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/amt.sh | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index d458b45c775b..3ef209cacb8e 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -194,15 +194,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -278,10 +284,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 -- 2.50.1 From 216a61d33c0728a8cf1650aaed2c523c6ce16354 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Apr 2025 12:21:25 +0000 Subject: [PATCH 13/16] net: ethtool: fix ethtool_ringparam_get_cfg() returns a hds_thresh value always as 0. When hds-thresh is configured, ethnl_set_rings() is called, and it calls ethtool_ringparam_get_cfg() to get ringparameters from .get_ringparam() callback and dev->cfg. Both hds_config and hds_thresh values should be set from dev->cfg, not from .get_ringparam(). But ethtool_ringparam_get_cfg() sets only hds_config from dev->cfg. So, ethtool_ringparam_get_cfg() returns always a hds_thresh as 0. If an input value of hds-thresh is 0, a hds_thresh value from ethtool_ringparam_get_cfg() are same. So ethnl_set_rings() does nothing and returns immediately. It causes a bug that setting a hds-thresh value to 0 is not working. Reproducer: modprobe netdevsim echo 1 > /sys/bus/netdevsim/new_device ethtool -G eth0 hds-thresh 100 ethtool -G eth0 hds-thresh 0 ethtool -g eth0 #hds-thresh value should be 0, but it shows 100. The tools/testing/selftests/drivers/net/hds.py can test it too with applying a following patch for hds.py. Fixes: 928459bbda19 ("net: ethtool: populate the default HDS params in the core") Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250404122126.1555648-2-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 0cb6da1f692a..49bea6b45bd5 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -830,6 +830,7 @@ void ethtool_ringparam_get_cfg(struct net_device *dev, /* Driver gives us current state, we want to return current config */ kparam->tcp_data_split = dev->cfg->hds_config; + kparam->hds_thresh = dev->cfg->hds_thresh; } static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) -- 2.50.1 From 22d3a63d5321326cb05a6dff7d2c488236cf56f2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Apr 2025 12:21:26 +0000 Subject: [PATCH 14/16] selftests: drv-net: test random value for hds-thresh hds.py has been testing 0(set_hds_thresh_zero()), MAX(set_hds_thresh_max()), GT(set_hds_thresh_gt()) values for hds-thresh. However if a hds-thresh value was already 0, set_hds_thresh_zero() can't test properly. So, it tests random value first and then tests 0, MAX, GT values. Testing bnxt: TAP version 13 1..13 ok 1 hds.get_hds ok 2 hds.get_hds_thresh ok 3 hds.set_hds_disable # SKIP disabling of HDS not supported by the device ok 4 hds.set_hds_enable ok 5 hds.set_hds_thresh_random ok 6 hds.set_hds_thresh_zero ok 7 hds.set_hds_thresh_max ok 8 hds.set_hds_thresh_gt ok 9 hds.set_xdp ok 10 hds.enabled_set_xdp ok 11 hds.ioctl ok 12 hds.ioctl_set_xdp ok 13 hds.ioctl_enabled_set_xdp # Totals: pass:12 fail:0 xfail:0 xpass:0 skip:1 error:0 Testing lo: TAP version 13 1..13 ok 1 hds.get_hds # SKIP tcp-data-split not supported by device ok 2 hds.get_hds_thresh # SKIP hds-thresh not supported by device ok 3 hds.set_hds_disable # SKIP ring-set not supported by the device ok 4 hds.set_hds_enable # SKIP ring-set not supported by the device ok 5 hds.set_hds_thresh_random # SKIP hds-thresh not supported by device ok 6 hds.set_hds_thresh_zero # SKIP ring-set not supported by the device ok 7 hds.set_hds_thresh_max # SKIP hds-thresh not supported by device ok 8 hds.set_hds_thresh_gt # SKIP hds-thresh not supported by device ok 9 hds.set_xdp # SKIP tcp-data-split not supported by device ok 10 hds.enabled_set_xdp # SKIP tcp-data-split not supported by device ok 11 hds.ioctl # SKIP tcp-data-split not supported by device ok 12 hds.ioctl_set_xdp # SKIP tcp-data-split not supported by device ok 13 hds.ioctl_enabled_set_xdp # SKIP tcp-data-split not supported by device # Totals: pass:0 fail:0 xfail:0 xpass:0 skip:13 error:0 Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250404122126.1555648-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hds.py | 33 +++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 8b7f6acad15f..7c90a040ce45 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -6,7 +6,7 @@ import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip +from lib.py import defer, ethtool, ip, random def _get_hds_mode(cfg, netnl) -> str: @@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) @@ -243,6 +273,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, -- 2.50.1 From 54f5fafcced113c7d203f6848f4f14840e74e9d1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 5 Apr 2025 20:57:51 -0700 Subject: [PATCH 15/16] ipv6: Fix null-ptr-deref in addrconf_add_ifaddr(). The cited commit placed netdev_lock_ops() just after __dev_get_by_index() in addrconf_add_ifaddr(), where dev could be NULL as reported. [0] Let's call netdev_lock_ops() only when dev is not NULL. [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000198: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000cc0-0x0000000000000cc7] CPU: 3 UID: 0 PID: 12032 Comm: syz.0.15 Not tainted 6.14.0-13408-g9f867ba24d36 #1 PREEMPT(full) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:addrconf_add_ifaddr (./include/net/netdev_lock.h:30 ./include/net/netdev_lock.h:41 net/ipv6/addrconf.c:3157) Code: 8b b4 24 94 00 00 00 4c 89 ef e8 7e 4c 2f ff 4c 8d b0 c5 0c 00 00 48 89 c3 48 b8 00 00 00 00 00 fc ff df 4c 89 f2 48 c1 ea 03 <0f> b6 04 02 4c 89 f2 83 e2 07 38 d0 7f 08 80 RSP: 0018:ffffc90015b0faa0 EFLAGS: 00010213 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000198 RSI: ffffffff893162f2 RDI: ffff888078cb0338 RBP: ffffc90015b0fbb0 R08: 0000000000000000 R09: fffffbfff20cbbe2 R10: ffffc90015b0faa0 R11: 0000000000000000 R12: 1ffff92002b61f54 R13: ffff888078cb0000 R14: 0000000000000cc5 R15: ffff888078cb0000 FS: 00007f92559ed640(0000) GS:ffff8882a8659000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92559ecfc8 CR3: 000000001c39e000 CR4: 00000000000006f0 Call Trace: inet6_ioctl (net/ipv6/af_inet6.c:580) sock_do_ioctl (net/socket.c:1196) sock_ioctl (net/socket.c:1314) __x64_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:906 fs/ioctl.c:892 fs/ioctl.c:892) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130 RIP: 0033:0x7f9254b9c62d Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff f8 RSP: 002b:00007f92559ecf98 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f9254d65f80 RCX: 00007f9254b9c62d RDX: 0000000020000040 RSI: 0000000000008916 RDI: 0000000000000003 RBP: 00007f9254c264d3 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f9254d65f80 R15: 00007f92559cd000 Modules linked in: Fixes: 8965c160b8f7 ("net: use netif_disable_lro in ipv6_add_dev") Reported-by: syzkaller Reported-by: Hui Guo Closes: https://lore.kernel.org/netdev/CAHOo4gK+tdU1B14Kh6tg-tNPqnQ1qGLfinONFVC43vmgEPnXXw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250406035755.69238-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b908fccbc1..9c52ed23ff23 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3154,12 +3154,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - netdev_lock_ops(dev); - if (dev) + if (dev) { + netdev_lock_ops(dev); err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); - else + netdev_unlock_ops(dev); + } else { err = -ENODEV; - netdev_unlock_ops(dev); + } rtnl_net_unlock(net); return err; } -- 2.50.1 From 04efcee6ef8d0f01eef495db047e7216d6e6e38f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 4 Apr 2025 09:11:22 -0700 Subject: [PATCH 16/16] net: hold instance lock during NETDEV_CHANGE Cosmin reports an issue with ipv6_add_dev being called from NETDEV_CHANGE notifier: [ 3455.008776] ? ipv6_add_dev+0x370/0x620 [ 3455.010097] ipv6_find_idev+0x96/0xe0 [ 3455.010725] addrconf_add_dev+0x1e/0xa0 [ 3455.011382] addrconf_init_auto_addrs+0xb0/0x720 [ 3455.013537] addrconf_notify+0x35f/0x8d0 [ 3455.014214] notifier_call_chain+0x38/0xf0 [ 3455.014903] netdev_state_change+0x65/0x90 [ 3455.015586] linkwatch_do_dev+0x5a/0x70 [ 3455.016238] rtnl_getlink+0x241/0x3e0 [ 3455.019046] rtnetlink_rcv_msg+0x177/0x5e0 Similarly, linkwatch might get to ipv6_add_dev without ops lock: [ 3456.656261] ? ipv6_add_dev+0x370/0x620 [ 3456.660039] ipv6_find_idev+0x96/0xe0 [ 3456.660445] addrconf_add_dev+0x1e/0xa0 [ 3456.660861] addrconf_init_auto_addrs+0xb0/0x720 [ 3456.661803] addrconf_notify+0x35f/0x8d0 [ 3456.662236] notifier_call_chain+0x38/0xf0 [ 3456.662676] netdev_state_change+0x65/0x90 [ 3456.663112] linkwatch_do_dev+0x5a/0x70 [ 3456.663529] __linkwatch_run_queue+0xeb/0x200 [ 3456.663990] linkwatch_event+0x21/0x30 [ 3456.664399] process_one_work+0x211/0x610 [ 3456.664828] worker_thread+0x1cc/0x380 [ 3456.665691] kthread+0xf4/0x210 Reclassify NETDEV_CHANGE as a notifier that consistently runs under the instance lock. Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/ Reported-by: Cosmin Ratiu Tested-by: Cosmin Ratiu Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 10 +++++---- include/linux/netdevice.h | 2 ++ include/linux/rtnetlink.h | 2 +- net/core/dev.c | 11 +--------- net/core/dev_api.c | 16 ++++++++++++++ net/core/link_watch.c | 28 ++++++++++++++++++++----- net/core/lock_debug.c | 2 +- net/core/rtnetlink.c | 15 +++++++------ net/ethtool/ioctl.c | 2 +- net/hsr/hsr_device.c | 6 +++--- 10 files changed, 63 insertions(+), 31 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 6c2d8945f597..eab601ab2db0 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -338,10 +338,11 @@ operations directly under the netdev instance lock. Devices drivers are encouraged to rely on the instance lock where possible. For the (mostly software) drivers that need to interact with the core stack, -there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., -``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle -acquiring the instance lock themselves, while the ``netif_xxx`` functions -assume that the driver has already acquired the instance lock. +there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx`` +(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx`` +functions handle acquiring the instance lock themselves, while the +``netif_xxx`` functions assume that the driver has already acquired +the instance lock. Notifiers and netdev instance lock ================================== @@ -354,6 +355,7 @@ For devices with locked ops, currently only the following notifiers are running under the lock: * ``NETDEV_REGISTER`` * ``NETDEV_UP`` +* ``NETDEV_CHANGE`` The following notifiers are running without the lock: * ``NETDEV_UNREGISTER`` diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cf3b6445817b..2d11d013cabe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4974,6 +4975,7 @@ void dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ccaaf4c7d5f6..ea39dd23a197 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 0608605cfc24..75e104322ad5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1518,15 +1518,7 @@ void netdev_features_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_features_change); -/** - * netdev_state_change - device changes state - * @dev: device to cause notification - * - * Called to indicate a device has changed state. This function calls - * the notifier chains for netdev_chain and sends a NEWLINK message - * to the routing socket. - */ -void netdev_state_change(struct net_device *dev) +void netif_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { @@ -1538,7 +1530,6 @@ void netdev_state_change(struct net_device *dev) rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); } } -EXPORT_SYMBOL(netdev_state_change); /** * __netdev_notify_peers - notify network peers about existence of @dev, diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90bafb0b1b8c..90898cd540ce 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -327,3 +327,19 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) return ret; } EXPORT_SYMBOL_GPL(dev_xdp_propagate); + +/** + * netdev_state_change() - device changes state + * @dev: device to cause notification + * + * Called to indicate a device has changed state. This function calls + * the notifier chains for netdev_chain and sends a NEWLINK message + * to the routing socket. + */ +void netdev_state_change(struct net_device *dev) +{ + netdev_lock_ops(dev); + netif_state_change(dev); + netdev_unlock_ops(dev); +} +EXPORT_SYMBOL(netdev_state_change); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index cb04ef2b9807..864f3bbc3a4c 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -183,7 +183,7 @@ static void linkwatch_do_dev(struct net_device *dev) else dev_deactivate(dev); - netdev_state_change(dev); + netif_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). @@ -240,7 +240,9 @@ static void __linkwatch_run_queue(int urgent_only) */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); + netdev_lock_ops(dev); linkwatch_do_dev(dev); + netdev_unlock_ops(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } @@ -253,25 +255,41 @@ static void __linkwatch_run_queue(int urgent_only) spin_unlock_irq(&lweventlist_lock); } -void linkwatch_sync_dev(struct net_device *dev) +static bool linkwatch_clean_dev(struct net_device *dev) { unsigned long flags; - int clean = 0; + bool clean = false; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); - clean = 1; + clean = true; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); - if (clean) + + return clean; +} + +void __linkwatch_sync_dev(struct net_device *dev) +{ + netdev_ops_assert_locked(dev); + + if (linkwatch_clean_dev(dev)) linkwatch_do_dev(dev); } +void linkwatch_sync_dev(struct net_device *dev) +{ + if (linkwatch_clean_dev(dev)) { + netdev_lock_ops(dev); + linkwatch_do_dev(dev); + netdev_unlock_ops(dev); + } +} /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c index b7f22dc92a6f..941e26c1343d 100644 --- a/net/core/lock_debug.c +++ b/net/core/lock_debug.c @@ -20,11 +20,11 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event, switch (cmd) { case NETDEV_REGISTER: case NETDEV_UP: + case NETDEV_CHANGE: netdev_ops_assert_locked(dev); fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: - case NETDEV_CHANGE: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c23852835050..d8d03ff87a3b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1043,7 +1043,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); -void netdev_set_operstate(struct net_device *dev, int newstate) +void netif_set_operstate(struct net_device *dev, int newstate) { unsigned int old = READ_ONCE(dev->operstate); @@ -1052,9 +1052,9 @@ void netdev_set_operstate(struct net_device *dev, int newstate) return; } while (!try_cmpxchg(&dev->operstate, &old, newstate)); - netdev_state_change(dev); + netif_state_change(dev); } -EXPORT_SYMBOL(netdev_set_operstate); +EXPORT_SYMBOL(netif_set_operstate); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -1080,7 +1080,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) break; } - netdev_set_operstate(dev, operstate); + netif_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) @@ -3396,7 +3396,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) - netdev_state_change(dev); + netif_state_change(dev); if (err < 0) net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", @@ -3676,8 +3676,11 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) + if (tb[IFLA_OPERSTATE]) { + netdev_lock_ops(dev); set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + netdev_unlock_ops(dev); + } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 221639407c72..8262cc10f98d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -60,7 +60,7 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev) u32 ethtool_op_get_link(struct net_device *dev) { /* Synchronize carrier state with link watch, see also rtnl_getlink() */ - linkwatch_sync_dev(dev); + __linkwatch_sync_dev(dev); return netif_carrier_ok(dev) ? 1 : 0; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 439cfb7ad5d1..1b1b700ec05e 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -33,14 +33,14 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier) struct net_device *dev = master->dev; if (!is_admin_up(dev)) { - netdev_set_operstate(dev, IF_OPER_DOWN); + netif_set_operstate(dev, IF_OPER_DOWN); return; } if (has_carrier) - netdev_set_operstate(dev, IF_OPER_UP); + netif_set_operstate(dev, IF_OPER_UP); else - netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); + netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); } static bool hsr_check_carrier(struct hsr_port *master) -- 2.50.1