From 63fb100bf5241ffdfa404b49c6a443cadd08447c Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 2 May 2025 10:52:41 +0200 Subject: [PATCH 01/16] net: ethtool: netlink: Use netdev_hold for dumpit() operations Move away from dev_hold and use netdev_hold with a local reftracker when performing a DUMP on each netdev. Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250502085242.248645-4-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index c8ed5a6f1e92..9de828df46cd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -603,18 +603,19 @@ static int ethnl_default_dumpit(struct sk_buff *skb, { struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); struct net *net = sock_net(skb->sk); + netdevice_tracker dev_tracker; struct net_device *dev; int ret = 0; rcu_read_lock(); for_each_netdev_dump(net, dev, ctx->pos_ifindex) { - dev_hold(dev); + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); rcu_read_unlock(); ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb)); rcu_read_lock(); - dev_put(dev); + netdev_put(dev, &dev_tracker); if (ret < 0 && ret != -EOPNOTSUPP) { if (likely(skb->len)) -- 2.51.0 From c2dbda07662eb84dfe07887775d08eb1536c2d22 Mon Sep 17 00:00:00 2001 From: Ruben Wauters Date: Thu, 1 May 2025 21:23:55 +0100 Subject: [PATCH 02/16] ipv4: ip_tunnel: Replace strcpy use with strscpy Use of strcpy is decpreated, replaces the use of strcpy with strscpy as recommended. strscpy was chosen as it requires a NUL terminated non-padded string, which is the case here. I am aware there is an explicit bounds check above the second instance, however using strscpy protects against buffer overflows in any future code, and there is no good reason I can see to not use it. I have also replaced the scrscpy above that had 3 params with the version using 2 params. These are functionally equivalent, but it is cleaner to have both using 2 params. Signed-off-by: Ruben Wauters Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250501202935.46318-1-rubenru09@aol.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3913ec89ad20..678b8f96e3e9 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -243,11 +243,11 @@ static struct net_device *__ip_tunnel_create(struct net *net, if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; - strscpy(name, parms->name, IFNAMSIZ); + strscpy(name, parms->name); } else { if (strlen(ops->kind) > (IFNAMSIZ - 3)) goto failed; - strcpy(name, ops->kind); + strscpy(name, ops->kind); strcat(name, "%d"); } -- 2.51.0 From 8c2e6b26ffe243be1e78f5a4bfb1a857d6e6f6d6 Mon Sep 17 00:00:00 2001 From: Jon Kohler Date: Wed, 30 Apr 2025 19:04:28 -0700 Subject: [PATCH 03/16] vhost/net: Defer TX queue re-enable until after sendmsg In handle_tx_copy, TX batching processes packets below ~PAGE_SIZE and batches up to 64 messages before calling sock->sendmsg. Currently, when there are no more messages on the ring to dequeue, handle_tx_copy re-enables kicks on the ring *before* firing off the batch sendmsg. However, sock->sendmsg incurs a non-zero delay, especially if it needs to wake up a thread (e.g., another vhost worker). If the guest submits additional messages immediately after the last ring check and disablement, it triggers an EPT_MISCONFIG vmexit to attempt to kick the vhost worker. This may happen while the worker is still processing the sendmsg, leading to wasteful exit(s). This is particularly problematic for single-threaded guest submission threads, as they must exit, wait for the exit to be processed (potentially involving a TTWU), and then resume. In scenarios like a constant stream of UDP messages, this results in a sawtooth pattern where the submitter frequently vmexits, and the vhost-net worker alternates between sleeping and waking. A common solution is to configure vhost-net busy polling via userspace (e.g., qemu poll-us). However, treating the sendmsg as the "busy" period by keeping kicks disabled during the final sendmsg and performing one additional ring check afterward provides a significant performance improvement without any excess busy poll cycles. If messages are found in the ring after the final sendmsg, requeue the TX handler. This ensures fairness for the RX handler and allows vhost_run_work_list to cond_resched() as needed. Test Case TX VM: taskset -c 2 iperf3 -c rx-ip-here -t 60 -p 5200 -b 0 -u -i 5 RX VM: taskset -c 2 iperf3 -s -p 5200 -D 6.12.0, each worker backed by tun interface with IFF_NAPI setup. Note: TCP side is largely unchanged as that was copy bound 6.12.0 unpatched EPT_MISCONFIG/second: 5411 Datagrams/second: ~382k Interval Transfer Bitrate Lost/Total Datagrams 0.00-30.00 sec 15.5 GBytes 4.43 Gbits/sec 0/11481630 (0%) sender 6.12.0 patched EPT_MISCONFIG/second: 58 (~93x reduction) Datagrams/second: ~650k (~1.7x increase) Interval Transfer Bitrate Lost/Total Datagrams 0.00-30.00 sec 26.4 GBytes 7.55 Gbits/sec 0/19554720 (0%) sender Acked-by: Jason Wang Signed-off-by: Jon Kohler Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20250501020428.1889162-1-jon@nutanix.com Signed-off-by: Jakub Kicinski --- drivers/vhost/net.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index b9b9e9d40951..7cbfc7d718b3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -755,10 +755,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) int err; int sent_pkts = 0; bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX); + bool busyloop_intr; do { - bool busyloop_intr = false; - + busyloop_intr = false; if (nvq->done_idx == VHOST_NET_BATCH) vhost_tx_batch(net, nvq, sock, &msg); @@ -769,13 +769,10 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(busyloop_intr)) { - vhost_poll_queue(&vq->poll); - } else if (unlikely(vhost_enable_notify(&net->dev, - vq))) { - vhost_disable_notify(&net->dev, vq); - continue; - } + /* Kicks are disabled at this point, break loop and + * process any remaining batched packets. Queue will + * be re-enabled afterwards. + */ break; } @@ -825,7 +822,22 @@ done: ++nvq->done_idx; } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); + /* Kicks are still disabled, dispatch any remaining batched msgs. */ vhost_tx_batch(net, nvq, sock, &msg); + + if (unlikely(busyloop_intr)) + /* If interrupted while doing busy polling, requeue the + * handler to be fair handle_rx as well as other tasks + * waiting on cpu. + */ + vhost_poll_queue(&vq->poll); + else + /* All of our work has been completed; however, before + * leaving the TX handler, do one last check for work, + * and requeue handler if necessary. If there is no work, + * queue will be reenabled. + */ + vhost_net_busy_poll_try_queue(net, vq); } static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) -- 2.51.0 From b30978515430508afda2c6c838c6fcd4de9971e9 Mon Sep 17 00:00:00 2001 From: Dave Marquardt Date: Thu, 1 May 2025 14:49:42 -0500 Subject: [PATCH 04/16] net: ibmveth: Indented struct ibmveth_adapter correctly Made struct ibmveth_adapter follow indentation rules Signed-off-by: Dave Marquardt Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250501194944.283729-2-davemarq@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ibm/ibmveth.h | 64 +++++++++++++++--------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 8468e2c59d7a..0f72ce54e7cf 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -134,38 +134,38 @@ struct ibmveth_rx_q { }; struct ibmveth_adapter { - struct vio_dev *vdev; - struct net_device *netdev; - struct napi_struct napi; - unsigned int mcastFilterSize; - void * buffer_list_addr; - void * filter_list_addr; - void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; - unsigned int tx_ltb_size; - dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; - dma_addr_t buffer_list_dma; - dma_addr_t filter_list_dma; - struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; - struct ibmveth_rx_q rx_queue; - int rx_csum; - int large_send; - bool is_active_trunk; - - u64 fw_ipv6_csum_support; - u64 fw_ipv4_csum_support; - u64 fw_large_send_support; - /* adapter specific stats */ - u64 replenish_task_cycles; - u64 replenish_no_mem; - u64 replenish_add_buff_failure; - u64 replenish_add_buff_success; - u64 rx_invalid_buffer; - u64 rx_no_buffer; - u64 tx_map_failed; - u64 tx_send_failed; - u64 tx_large_packets; - u64 rx_large_packets; - /* Ethtool settings */ + struct vio_dev *vdev; + struct net_device *netdev; + struct napi_struct napi; + unsigned int mcastFilterSize; + void *buffer_list_addr; + void *filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; + dma_addr_t buffer_list_dma; + dma_addr_t filter_list_dma; + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; + struct ibmveth_rx_q rx_queue; + int rx_csum; + int large_send; + bool is_active_trunk; + + u64 fw_ipv6_csum_support; + u64 fw_ipv4_csum_support; + u64 fw_large_send_support; + /* adapter specific stats */ + u64 replenish_task_cycles; + u64 replenish_no_mem; + u64 replenish_add_buff_failure; + u64 replenish_add_buff_success; + u64 rx_invalid_buffer; + u64 rx_no_buffer; + u64 tx_map_failed; + u64 tx_send_failed; + u64 tx_large_packets; + u64 rx_large_packets; + /* Ethtool settings */ u8 duplex; u32 speed; }; -- 2.51.0 From 2c91e2319ed95f9b7608c9ac2ebd1a070918f1fc Mon Sep 17 00:00:00 2001 From: Dave Marquardt Date: Thu, 1 May 2025 14:49:43 -0500 Subject: [PATCH 05/16] net: ibmveth: Reset the adapter when unexpected states are detected Reset the adapter through new function ibmveth_reset, called in WARN_ON situations. Removed conflicting and unneeded forward declaration. Signed-off-by: Dave Marquardt Link: https://patch.msgid.link/20250501194944.283729-3-davemarq@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ibm/ibmveth.c | 118 ++++++++++++++++++++++++----- drivers/net/ethernet/ibm/ibmveth.h | 1 + 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 04192190beba..cff494739bc9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -39,8 +39,6 @@ #include "ibmveth.h" static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance); -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse); static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev); static struct kobj_type ktype_veth_pool; @@ -231,7 +229,10 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, index = pool->free_map[free_index]; skb = NULL; - BUG_ON(index == IBM_VETH_INVALID_MAP); + if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { + schedule_work(&adapter->work); + goto bad_index_failure; + } /* are we allocating a new buffer or recycling an old one */ if (pool->skbuff[index]) @@ -300,6 +301,7 @@ failure: DMA_FROM_DEVICE); dev_kfree_skb_any(pool->skbuff[index]); pool->skbuff[index] = NULL; +bad_index_failure: adapter->replenish_add_buff_failure++; mb(); @@ -370,20 +372,36 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter, } } -/* remove a buffer from a pool */ -static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, - u64 correlator, bool reuse) +/** + * ibmveth_remove_buffer_from_pool - remove a buffer from a pool + * @adapter: adapter instance + * @correlator: identifies pool and index + * @reuse: whether to reuse buffer + * + * Return: + * * %0 - success + * * %-EINVAL - correlator maps to pool or index out of range + * * %-EFAULT - pool and index map to null skb + */ +static int ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, + u64 correlator, bool reuse) { unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; unsigned int free_index; struct sk_buff *skb; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return -EINVAL; + } skb = adapter->rx_buff_pool[pool].skbuff[index]; - BUG_ON(skb == NULL); + if (WARN_ON(!skb)) { + schedule_work(&adapter->work); + return -EFAULT; + } /* if we are going to reuse the buffer then keep the pointers around * but mark index as available. replenish will see the skb pointer and @@ -411,6 +429,8 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter, mb(); atomic_dec(&(adapter->rx_buff_pool[pool].available)); + + return 0; } /* get the current buffer on the rx queue */ @@ -420,24 +440,44 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada unsigned int pool = correlator >> 32; unsigned int index = correlator & 0xffffffffUL; - BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); - BUG_ON(index >= adapter->rx_buff_pool[pool].size); + if (WARN_ON(pool >= IBMVETH_NUM_BUFF_POOLS) || + WARN_ON(index >= adapter->rx_buff_pool[pool].size)) { + schedule_work(&adapter->work); + return NULL; + } return adapter->rx_buff_pool[pool].skbuff[index]; } -static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, - bool reuse) +/** + * ibmveth_rxq_harvest_buffer - Harvest buffer from pool + * + * @adapter: pointer to adapter + * @reuse: whether to reuse buffer + * + * Context: called from ibmveth_poll + * + * Return: + * * %0 - success + * * other - non-zero return from ibmveth_remove_buffer_from_pool + */ +static int ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter, + bool reuse) { u64 cor; + int rc; cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator; - ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + rc = ibmveth_remove_buffer_from_pool(adapter, cor, reuse); + if (unlikely(rc)) + return rc; if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } + + return 0; } static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) @@ -709,6 +749,35 @@ static int ibmveth_close(struct net_device *netdev) return 0; } +/** + * ibmveth_reset - Handle scheduled reset work + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: This routine acquires rtnl_mutex and disables its NAPI through + * ibmveth_close. It can't be called directly in a context that has + * already acquired rtnl_mutex or disabled its NAPI, or directly from + * a poll routine. + * + * Return: void + */ +static void ibmveth_reset(struct work_struct *w) +{ + struct ibmveth_adapter *adapter = container_of(w, struct ibmveth_adapter, work); + struct net_device *netdev = adapter->netdev; + + netdev_dbg(netdev, "reset starting\n"); + + rtnl_lock(); + + dev_close(adapter->netdev); + dev_open(adapter->netdev, NULL); + + rtnl_unlock(); + + netdev_dbg(netdev, "reset complete\n"); +} + static int ibmveth_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { @@ -1324,7 +1393,8 @@ restart_poll: wmb(); /* suggested by larson1 */ adapter->rx_invalid_buffer++; netdev_dbg(netdev, "recycling invalid buffer\n"); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; } else { struct sk_buff *skb, *new_skb; int length = ibmveth_rxq_frame_length(adapter); @@ -1334,6 +1404,8 @@ restart_poll: __sum16 iph_check = 0; skb = ibmveth_rxq_get_buffer(adapter); + if (unlikely(!skb)) + break; /* if the large packet bit is set in the rx queue * descriptor, the mss will be written by PHYP eight @@ -1357,10 +1429,12 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); - ibmveth_rxq_harvest_buffer(adapter, true); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, true))) + break; skb = new_skb; } else { - ibmveth_rxq_harvest_buffer(adapter, false); + if (unlikely(ibmveth_rxq_harvest_buffer(adapter, false))) + break; skb_reserve(skb, offset); } @@ -1407,7 +1481,10 @@ restart_poll: * then check once more to make sure we are done. */ lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); - BUG_ON(lpar_rc != H_SUCCESS); + if (WARN_ON(lpar_rc != H_SUCCESS)) { + schedule_work(&adapter->work); + goto out; + } if (ibmveth_rxq_pending_buffer(adapter) && napi_schedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, @@ -1428,7 +1505,7 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) if (napi_schedule_prep(&adapter->napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - BUG_ON(lpar_rc != H_SUCCESS); + WARN_ON(lpar_rc != H_SUCCESS); __napi_schedule(&adapter->napi); } return IRQ_HANDLED; @@ -1670,6 +1747,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->vdev = dev; adapter->netdev = netdev; + INIT_WORK(&adapter->work, ibmveth_reset); adapter->mcastFilterSize = be32_to_cpu(*mcastFilterSize_p); ibmveth_init_link_settings(netdev); @@ -1762,6 +1840,8 @@ static void ibmveth_remove(struct vio_dev *dev) struct ibmveth_adapter *adapter = netdev_priv(netdev); int i; + cancel_work_sync(&adapter->work); + for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) kobject_put(&adapter->rx_buff_pool[i].kobj); diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 0f72ce54e7cf..b0a2460ec9f9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -137,6 +137,7 @@ struct ibmveth_adapter { struct vio_dev *vdev; struct net_device *netdev; struct napi_struct napi; + struct work_struct work; unsigned int mcastFilterSize; void *buffer_list_addr; void *filter_list_addr; -- 2.51.0 From 8a97de243df51c740e62388f9858c7f2e3603495 Mon Sep 17 00:00:00 2001 From: Dave Marquardt Date: Thu, 1 May 2025 14:49:44 -0500 Subject: [PATCH 06/16] net: ibmveth: added KUnit tests for some buffer pool functions Added KUnit tests for ibmveth_remove_buffer_from_pool and ibmveth_rxq_get_buffer under new IBMVETH_KUNIT_TEST config option. Signed-off-by: Dave Marquardt Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250501194944.283729-4-davemarq@linux.ibm.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ibm/Kconfig | 13 +++ drivers/net/ethernet/ibm/ibmveth.c | 129 +++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/drivers/net/ethernet/ibm/Kconfig b/drivers/net/ethernet/ibm/Kconfig index c0c112d95b89..4f4b23465c47 100644 --- a/drivers/net/ethernet/ibm/Kconfig +++ b/drivers/net/ethernet/ibm/Kconfig @@ -27,6 +27,19 @@ config IBMVETH To compile this driver as a module, choose M here. The module will be called ibmveth. +config IBMVETH_KUNIT_TEST + bool "KUnit test for IBM LAN Virtual Ethernet support" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on KUNIT=y && IBMVETH=y + default KUNIT_ALL_TESTS + help + This builds unit tests for the IBM LAN Virtual Ethernet driver. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + source "drivers/net/ethernet/ibm/emac/Kconfig" config EHEA diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index cff494739bc9..45143467286e 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -2042,3 +2042,132 @@ static void __exit ibmveth_module_exit(void) module_init(ibmveth_module_init); module_exit(ibmveth_module_exit); + +#ifdef CONFIG_IBMVETH_KUNIT_TEST +#include + +/** + * ibmveth_reset_kunit - reset routine for running in KUnit environment + * + * @w: pointer to work_struct embedded in adapter structure + * + * Context: Called in the KUnit environment. Does nothing. + * + * Return: void + */ +static void ibmveth_reset_kunit(struct work_struct *w) +{ + netdev_dbg(NULL, "reset_kunit starting\n"); + netdev_dbg(NULL, "reset_kunit complete\n"); +} + +/** + * ibmveth_remove_buffer_from_pool_test - unit test for some of + * ibmveth_remove_buffer_from_pool + * @test: pointer to kunit structure + * + * Tests the error returns from ibmveth_remove_buffer_from_pool. + * ibmveth_remove_buffer_from_pool also calls WARN_ON, so dmesg should be + * checked to see that these warnings happened. + * + * Return: void + */ +static void ibmveth_remove_buffer_from_pool_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + u64 correlator; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + correlator = ((u64)IBMVETH_NUM_BUFF_POOLS << 32) | 0; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = ((u64)0 << 32) | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EINVAL, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + correlator = (u64)0 | 0; + pool->skbuff[0] = NULL; + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, false)); + KUNIT_EXPECT_EQ(test, -EFAULT, ibmveth_remove_buffer_from_pool(adapter, correlator, true)); + + flush_work(&adapter->work); +} + +/** + * ibmveth_rxq_get_buffer_test - unit test for ibmveth_rxq_get_buffer + * @test: pointer to kunit structure + * + * Tests ibmveth_rxq_get_buffer. ibmveth_rxq_get_buffer also calls WARN_ON for + * the NULL returns, so dmesg should be checked to see that these warnings + * happened. + * + * Return: void + */ +static void ibmveth_rxq_get_buffer_test(struct kunit *test) +{ + struct ibmveth_adapter *adapter = kunit_kzalloc(test, sizeof(*adapter), GFP_KERNEL); + struct sk_buff *skb = kunit_kzalloc(test, sizeof(*skb), GFP_KERNEL); + struct ibmveth_buff_pool *pool; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb); + + INIT_WORK(&adapter->work, ibmveth_reset_kunit); + + adapter->rx_queue.queue_len = 1; + adapter->rx_queue.index = 0; + adapter->rx_queue.queue_addr = kunit_kzalloc(test, sizeof(struct ibmveth_rx_q_entry), + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, adapter->rx_queue.queue_addr); + + /* Set sane values for buffer pools */ + for (int i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) + ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i, + pool_count[i], pool_size[i], + pool_active[i]); + + pool = &adapter->rx_buff_pool[0]; + pool->skbuff = kunit_kcalloc(test, pool->size, sizeof(void *), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pool->skbuff); + + adapter->rx_queue.queue_addr[0].correlator = (u64)IBMVETH_NUM_BUFF_POOLS << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | adapter->rx_buff_pool[0].size; + KUNIT_EXPECT_PTR_EQ(test, NULL, ibmveth_rxq_get_buffer(adapter)); + + pool->skbuff[0] = skb; + adapter->rx_queue.queue_addr[0].correlator = (u64)0 << 32 | 0; + KUNIT_EXPECT_PTR_EQ(test, skb, ibmveth_rxq_get_buffer(adapter)); + + flush_work(&adapter->work); +} + +static struct kunit_case ibmveth_test_cases[] = { + KUNIT_CASE(ibmveth_remove_buffer_from_pool_test), + KUNIT_CASE(ibmveth_rxq_get_buffer_test), + {} +}; + +static struct kunit_suite ibmveth_test_suite = { + .name = "ibmveth-kunit-test", + .test_cases = ibmveth_test_cases, +}; + +kunit_test_suite(ibmveth_test_suite); +#endif -- 2.51.0 From 8f0ae19346ce1cadf17f5ea6b01e7b6eb815e2fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 2 May 2025 18:18:56 -0700 Subject: [PATCH 07/16] selftests: net: exit cleanly on SIGTERM / timeout ksft runner sends 2 SIGTERMs in a row if a test runs out of time. Handle this in a similar way we handle SIGINT - cleanup and stop running further tests. Because we get 2 signals we need a bit of logic to ignore the subsequent one, they come immediately one after the other (due to commit 9616cb34b08e ("kselftest/runner.sh: Propagate SIGTERM to runner child")). This change makes sure we run cleanup (scheduled defer()s) and also print a stack trace on SIGTERM, which doesn't happen by default. Tests occasionally hang in NIPA and it's impossible to tell what they are waiting from or doing. Signed-off-by: Jakub Kicinski Link: https://patch.msgid.link/20250503011856.46308-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/lib/py/ksft.py | 24 +++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3cfad0fd4570..61287c203b6e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -3,6 +3,7 @@ import builtins import functools import inspect +import signal import sys import time import traceback @@ -26,6 +27,10 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + def ksft_pr(*objs, **kwargs): print("#", *objs, **kwargs) @@ -193,6 +198,17 @@ def ksft_setup(env): return env +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} print("TAP version 13") @@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for line in tb.strip().split('\n'): ksft_pr("Exception|", line) if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' @@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) -- 2.51.0 From fbaeb7b0f0ffb660bcc91dd5f96eaf8d99721656 Mon Sep 17 00:00:00 2001 From: Mohsin Bashir Date: Fri, 2 May 2025 19:01:45 -0700 Subject: [PATCH 08/16] eth: fbnic: fix `tx_dropped` counting Fix the tracking of rtnl_link_stats.tx_dropped. The counter `tmi.drop.frames` is being double counted whereas, the counter `tti.cm_drop.frames` is being skipped. Fixes: f2957147ae7a ("eth: fbnic: add support for TTI HW stats") Signed-off-by: Jakub Kicinski Signed-off-by: Mohsin Bashir Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250503020145.1868252-1-mohsin.bashr@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index d699f58dda21..f994cbf1857d 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -425,9 +425,9 @@ static void fbnic_get_stats64(struct net_device *dev, /* Record drops from Tx HW Datapath */ tx_dropped += fbd->hw_stats.tmi.drop.frames.value + + fbd->hw_stats.tti.cm_drop.frames.value + fbd->hw_stats.tti.frame_drop.frames.value + - fbd->hw_stats.tti.tbi_drop.frames.value + - fbd->hw_stats.tmi.drop.frames.value; + fbd->hw_stats.tti.tbi_drop.frames.value; for (i = 0; i < fbn->num_tx_queues; i++) { struct fbnic_ring *txr = fbn->tx[i]; -- 2.51.0 From f3b265358b911fe9e495619bdfa7797749474f95 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:11 +0200 Subject: [PATCH 09/16] net: phy: realtek: remove unsed RTL821x_PHYSR* macros These macros have there since the first revision but were never used, so let's just remove them. Signed-off-by: Michael Klein Link: https://patch.msgid.link/20250504172916.243185-2-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 05c4f4d394a5..a6d21dfb1073 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -18,10 +18,6 @@ #include "realtek.h" -#define RTL821x_PHYSR 0x11 -#define RTL821x_PHYSR_DUPLEX BIT(13) -#define RTL821x_PHYSR_SPEED GENMASK(15, 14) - #define RTL821x_INER 0x12 #define RTL8211B_INER_INIT 0x6400 #define RTL8211E_INER_LINK_STATUS BIT(10) -- 2.51.0 From 7c6fa3ffd2650347b1d37f028e232e53d617c1af Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:12 +0200 Subject: [PATCH 10/16] net: phy: realtek: Clean up RTL821x ExtPage access Factor out RTL8211E extension page access code to rtl821x_modify_ext_page() and clean up rtl8211e_config_init() Signed-off-by: Michael Klein Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250504172916.243185-3-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 38 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index a6d21dfb1073..0f005a449719 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -26,7 +26,9 @@ #define RTL821x_INSR 0x13 #define RTL821x_EXT_PAGE_SELECT 0x1e + #define RTL821x_PAGE_SELECT 0x1f +#define RTL821x_SET_EXT_PAGE 0x07 #define RTL8211F_PHYCR1 0x18 #define RTL8211F_PHYCR2 0x19 @@ -69,9 +71,12 @@ #define RTL8211F_ALDPS_ENABLE BIT(2) #define RTL8211F_ALDPS_XTAL_OFF BIT(12) +#define RTL8211E_RGMII_EXT_PAGE 0xa4 +#define RTL8211E_RGMII_DELAY 0x1c #define RTL8211E_CTRL_DELAY BIT(13) #define RTL8211E_TX_DELAY BIT(12) #define RTL8211E_RX_DELAY BIT(11) +#define RTL8211E_DELAY_MASK GENMASK(13, 11) #define RTL8201F_ISR 0x1e #define RTL8201F_ISR_ANERR BIT(15) @@ -151,6 +156,21 @@ static int rtl821x_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, RTL821x_PAGE_SELECT, page); } +static int rtl821x_modify_ext_page(struct phy_device *phydev, u16 ext_page, + u32 regnum, u16 mask, u16 set) +{ + int oldpage, ret = 0; + + oldpage = phy_select_page(phydev, RTL821x_SET_EXT_PAGE); + if (oldpage >= 0) { + ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, ext_page); + if (ret == 0) + ret = __phy_modify(phydev, regnum, mask, set); + } + + return phy_restore_page(phydev, oldpage, ret); +} + static int rtl821x_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -670,7 +690,6 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, static int rtl8211e_config_init(struct phy_device *phydev) { - int ret = 0, oldpage; u16 val; /* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */ @@ -700,20 +719,9 @@ static int rtl8211e_config_init(struct phy_device *phydev) * 12 = RX Delay, 11 = TX Delay * 10:0 = Test && debug settings reserved by realtek */ - oldpage = phy_select_page(phydev, 0x7); - if (oldpage < 0) - goto err_restore_page; - - ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, 0xa4); - if (ret) - goto err_restore_page; - - ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY - | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, - val); - -err_restore_page: - return phy_restore_page(phydev, oldpage, ret); + return rtl821x_modify_ext_page(phydev, RTL8211E_RGMII_EXT_PAGE, + RTL8211E_RGMII_DELAY, + RTL8211E_DELAY_MASK, val); } static int rtl8211b_suspend(struct phy_device *phydev) -- 2.51.0 From 12d40df259e38851a0d973535e6023b33e2ea4f9 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:13 +0200 Subject: [PATCH 11/16] net: phy: realtek: add RTL8211F register defines Add some more defines for RTL8211F page and register numbers. Signed-off-by: Michael Klein Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250504172916.243185-4-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 34 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 0f005a449719..ca6d2903b1c9 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -30,11 +30,14 @@ #define RTL821x_PAGE_SELECT 0x1f #define RTL821x_SET_EXT_PAGE 0x07 +/* RTL8211F PHY configuration */ +#define RTL8211F_PHYCR_PAGE 0xa43 #define RTL8211F_PHYCR1 0x18 #define RTL8211F_PHYCR2 0x19 #define RTL8211F_CLKOUT_EN BIT(0) #define RTL8211F_PHYCR2_PHY_EEE_ENABLE BIT(5) +#define RTL8211F_INSR_PAGE 0xa43 #define RTL8211F_INSR 0x1d /* RTL8211F WOL interrupt configuration */ @@ -55,6 +58,8 @@ #define RTL8211F_PHYSICAL_ADDR_WORD1 17 #define RTL8211F_PHYSICAL_ADDR_WORD2 18 +/* RTL8211F LED configuration */ +#define RTL8211F_LEDCR_PAGE 0xd04 #define RTL8211F_LEDCR 0x10 #define RTL8211F_LEDCR_MODE BIT(15) #define RTL8211F_LEDCR_ACT_TXRX BIT(4) @@ -64,7 +69,13 @@ #define RTL8211F_LEDCR_MASK GENMASK(4, 0) #define RTL8211F_LEDCR_SHIFT 5 +/* RTL8211F RGMII configuration */ +#define RTL8211F_RGMII_PAGE 0xd08 + +#define RTL8211F_TXCR 0x11 #define RTL8211F_TX_DELAY BIT(8) + +#define RTL8211F_RXCR 0x15 #define RTL8211F_RX_DELAY BIT(3) #define RTL8211F_ALDPS_PLL_OFF BIT(1) @@ -187,7 +198,7 @@ static int rtl821x_probe(struct phy_device *phydev) return dev_err_probe(dev, PTR_ERR(priv->clk), "failed to get phy clock\n"); - ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR1); + ret = phy_read_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR1); if (ret < 0) return ret; @@ -197,7 +208,7 @@ static int rtl821x_probe(struct phy_device *phydev) priv->has_phycr2 = !(phy_id == RTL_8211FVD_PHYID); if (priv->has_phycr2) { - ret = phy_read_paged(phydev, 0xa43, RTL8211F_PHYCR2); + ret = phy_read_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2); if (ret < 0) return ret; @@ -233,7 +244,7 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) { int err; - err = phy_read_paged(phydev, 0xa43, RTL8211F_INSR); + err = phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR); return (err < 0) ? err : 0; } @@ -376,7 +387,7 @@ static irqreturn_t rtl8211f_handle_interrupt(struct phy_device *phydev) { int irq_status; - irq_status = phy_read_paged(phydev, 0xa43, RTL8211F_INSR); + irq_status = phy_read_paged(phydev, RTL8211F_INSR_PAGE, RTL8211F_INSR); if (irq_status < 0) { phy_error(phydev); return IRQ_NONE; @@ -473,7 +484,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) u16 val_txdly, val_rxdly; int ret; - ret = phy_modify_paged_changed(phydev, 0xa43, RTL8211F_PHYCR1, + ret = phy_modify_paged_changed(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR1, RTL8211F_ALDPS_PLL_OFF | RTL8211F_ALDPS_ENABLE | RTL8211F_ALDPS_XTAL_OFF, priv->phycr1); if (ret < 0) { @@ -507,7 +518,8 @@ static int rtl8211f_config_init(struct phy_device *phydev) return 0; } - ret = phy_modify_paged_changed(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, + ret = phy_modify_paged_changed(phydev, RTL8211F_RGMII_PAGE, + RTL8211F_TXCR, RTL8211F_TX_DELAY, val_txdly); if (ret < 0) { dev_err(dev, "Failed to update the TX delay register\n"); @@ -522,7 +534,8 @@ static int rtl8211f_config_init(struct phy_device *phydev) str_enabled_disabled(val_txdly)); } - ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY, + ret = phy_modify_paged_changed(phydev, RTL8211F_RGMII_PAGE, + RTL8211F_RXCR, RTL8211F_RX_DELAY, val_rxdly); if (ret < 0) { dev_err(dev, "Failed to update the RX delay register\n"); @@ -538,14 +551,15 @@ static int rtl8211f_config_init(struct phy_device *phydev) } /* Disable PHY-mode EEE so LPI is passed to the MAC */ - ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, RTL8211F_PHYCR2, RTL8211F_PHYCR2_PHY_EEE_ENABLE, 0); if (ret) return ret; if (priv->has_phycr2) { - ret = phy_modify_paged(phydev, 0xa43, RTL8211F_PHYCR2, - RTL8211F_CLKOUT_EN, priv->phycr2); + ret = phy_modify_paged(phydev, RTL8211F_PHYCR_PAGE, + RTL8211F_PHYCR2, RTL8211F_CLKOUT_EN, + priv->phycr2); if (ret < 0) { dev_err(dev, "clkout configuration failed: %pe\n", ERR_PTR(ret)); -- 2.51.0 From 8c4d0172657c1f2d86b9c19172150abcd0e35c39 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:14 +0200 Subject: [PATCH 12/16] net: phy: realtek: Group RTL82* macro definitions Group macro definitions by PHY in lexicographic order. Within each PHY block, definitions are order by page number and then register number. Signed-off-by: Michael Klein Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250504172916.243185-5-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 72 +++++++++++++------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index ca6d2903b1c9..e01b13a9b5c3 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -18,6 +18,16 @@ #include "realtek.h" +#define RTL8201F_IER 0x13 + +#define RTL8201F_ISR 0x1e +#define RTL8201F_ISR_ANERR BIT(15) +#define RTL8201F_ISR_DUPLEX BIT(13) +#define RTL8201F_ISR_LINK BIT(11) +#define RTL8201F_ISR_MASK (RTL8201F_ISR_ANERR | \ + RTL8201F_ISR_DUPLEX | \ + RTL8201F_ISR_LINK) + #define RTL821x_INER 0x12 #define RTL8211B_INER_INIT 0x6400 #define RTL8211E_INER_LINK_STATUS BIT(10) @@ -30,9 +40,21 @@ #define RTL821x_PAGE_SELECT 0x1f #define RTL821x_SET_EXT_PAGE 0x07 +/* RTL8211E extension page 164/0xa4 */ +#define RTL8211E_RGMII_EXT_PAGE 0xa4 +#define RTL8211E_RGMII_DELAY 0x1c +#define RTL8211E_CTRL_DELAY BIT(13) +#define RTL8211E_TX_DELAY BIT(12) +#define RTL8211E_RX_DELAY BIT(11) +#define RTL8211E_DELAY_MASK GENMASK(13, 11) + /* RTL8211F PHY configuration */ #define RTL8211F_PHYCR_PAGE 0xa43 #define RTL8211F_PHYCR1 0x18 +#define RTL8211F_ALDPS_PLL_OFF BIT(1) +#define RTL8211F_ALDPS_ENABLE BIT(2) +#define RTL8211F_ALDPS_XTAL_OFF BIT(12) + #define RTL8211F_PHYCR2 0x19 #define RTL8211F_CLKOUT_EN BIT(0) #define RTL8211F_PHYCR2_PHY_EEE_ENABLE BIT(5) @@ -40,24 +62,6 @@ #define RTL8211F_INSR_PAGE 0xa43 #define RTL8211F_INSR 0x1d -/* RTL8211F WOL interrupt configuration */ -#define RTL8211F_INTBCR_PAGE 0xd40 -#define RTL8211F_INTBCR 0x16 -#define RTL8211F_INTBCR_INTB_PMEB BIT(5) - -/* RTL8211F WOL settings */ -#define RTL8211F_WOL_SETTINGS_PAGE 0xd8a -#define RTL8211F_WOL_SETTINGS_EVENTS 16 -#define RTL8211F_WOL_EVENT_MAGIC BIT(12) -#define RTL8211F_WOL_SETTINGS_STATUS 17 -#define RTL8211F_WOL_STATUS_RESET (BIT(15) | 0x1fff) - -/* RTL8211F Unique phyiscal and multicast address (WOL) */ -#define RTL8211F_PHYSICAL_ADDR_PAGE 0xd8c -#define RTL8211F_PHYSICAL_ADDR_WORD0 16 -#define RTL8211F_PHYSICAL_ADDR_WORD1 17 -#define RTL8211F_PHYSICAL_ADDR_WORD2 18 - /* RTL8211F LED configuration */ #define RTL8211F_LEDCR_PAGE 0xd04 #define RTL8211F_LEDCR 0x10 @@ -78,25 +82,23 @@ #define RTL8211F_RXCR 0x15 #define RTL8211F_RX_DELAY BIT(3) -#define RTL8211F_ALDPS_PLL_OFF BIT(1) -#define RTL8211F_ALDPS_ENABLE BIT(2) -#define RTL8211F_ALDPS_XTAL_OFF BIT(12) +/* RTL8211F WOL interrupt configuration */ +#define RTL8211F_INTBCR_PAGE 0xd40 +#define RTL8211F_INTBCR 0x16 +#define RTL8211F_INTBCR_INTB_PMEB BIT(5) -#define RTL8211E_RGMII_EXT_PAGE 0xa4 -#define RTL8211E_RGMII_DELAY 0x1c -#define RTL8211E_CTRL_DELAY BIT(13) -#define RTL8211E_TX_DELAY BIT(12) -#define RTL8211E_RX_DELAY BIT(11) -#define RTL8211E_DELAY_MASK GENMASK(13, 11) +/* RTL8211F WOL settings */ +#define RTL8211F_WOL_SETTINGS_PAGE 0xd8a +#define RTL8211F_WOL_SETTINGS_EVENTS 16 +#define RTL8211F_WOL_EVENT_MAGIC BIT(12) +#define RTL8211F_WOL_SETTINGS_STATUS 17 +#define RTL8211F_WOL_STATUS_RESET (BIT(15) | 0x1fff) -#define RTL8201F_ISR 0x1e -#define RTL8201F_ISR_ANERR BIT(15) -#define RTL8201F_ISR_DUPLEX BIT(13) -#define RTL8201F_ISR_LINK BIT(11) -#define RTL8201F_ISR_MASK (RTL8201F_ISR_ANERR | \ - RTL8201F_ISR_DUPLEX | \ - RTL8201F_ISR_LINK) -#define RTL8201F_IER 0x13 +/* RTL8211F Unique phyiscal and multicast address (WOL) */ +#define RTL8211F_PHYSICAL_ADDR_PAGE 0xd8c +#define RTL8211F_PHYSICAL_ADDR_WORD0 16 +#define RTL8211F_PHYSICAL_ADDR_WORD1 17 +#define RTL8211F_PHYSICAL_ADDR_WORD2 18 #define RTL822X_VND1_SERDES_OPTION 0x697a #define RTL822X_VND1_SERDES_OPTION_MODE_MASK GENMASK(5, 0) -- 2.51.0 From be1cc96ddf82bb0c0a159751f73239d6d3e9594a Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:15 +0200 Subject: [PATCH 13/16] net: phy: realtek: use __set_bit() in rtl8211f_led_hw_control_get() rtl8211f_led_hw_control_get() does not need atomic bit operations, replace set_bit() by __set_bit(). Signed-off-by: Michael Klein Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250504172916.243185-6-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index e01b13a9b5c3..e26098a2ff27 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -659,17 +659,17 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, val &= RTL8211F_LEDCR_MASK; if (val & RTL8211F_LEDCR_LINK_10) - set_bit(TRIGGER_NETDEV_LINK_10, rules); + __set_bit(TRIGGER_NETDEV_LINK_10, rules); if (val & RTL8211F_LEDCR_LINK_100) - set_bit(TRIGGER_NETDEV_LINK_100, rules); + __set_bit(TRIGGER_NETDEV_LINK_100, rules); if (val & RTL8211F_LEDCR_LINK_1000) - set_bit(TRIGGER_NETDEV_LINK_1000, rules); + __set_bit(TRIGGER_NETDEV_LINK_1000, rules); if (val & RTL8211F_LEDCR_ACT_TXRX) { - set_bit(TRIGGER_NETDEV_RX, rules); - set_bit(TRIGGER_NETDEV_TX, rules); + __set_bit(TRIGGER_NETDEV_RX, rules); + __set_bit(TRIGGER_NETDEV_TX, rules); } return 0; -- 2.51.0 From 708686132ba02659267c0cebcc414348ece389a5 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Sun, 4 May 2025 19:29:16 +0200 Subject: [PATCH 14/16] net: phy: realtek: Add support for PHY LEDs on RTL8211E Like the RTL8211F, the RTL8211E PHY supports up to three LEDs. Add netdev trigger support for them, too. Signed-off-by: Michael Klein Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20250504172916.243185-7-michael@fossekall.de Signed-off-by: Paolo Abeni --- drivers/net/phy/realtek/realtek_main.c | 125 +++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index e26098a2ff27..301fbe141b9b 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -40,6 +40,20 @@ #define RTL821x_PAGE_SELECT 0x1f #define RTL821x_SET_EXT_PAGE 0x07 +/* RTL8211E extension page 44/0x2c */ +#define RTL8211E_LEDCR_EXT_PAGE 0x2c +#define RTL8211E_LEDCR1 0x1a +#define RTL8211E_LEDCR1_ACT_TXRX BIT(4) +#define RTL8211E_LEDCR1_MASK BIT(4) +#define RTL8211E_LEDCR1_SHIFT 1 + +#define RTL8211E_LEDCR2 0x1c +#define RTL8211E_LEDCR2_LINK_1000 BIT(2) +#define RTL8211E_LEDCR2_LINK_100 BIT(1) +#define RTL8211E_LEDCR2_LINK_10 BIT(0) +#define RTL8211E_LEDCR2_MASK GENMASK(2, 0) +#define RTL8211E_LEDCR2_SHIFT 4 + /* RTL8211E extension page 164/0xa4 */ #define RTL8211E_RGMII_EXT_PAGE 0xa4 #define RTL8211E_RGMII_DELAY 0x1c @@ -145,7 +159,8 @@ #define RTL_8221B_VN_CG 0x001cc84a #define RTL_8251B 0x001cc862 -#define RTL8211F_LED_COUNT 3 +/* RTL8211E and RTL8211F support up to three LEDs */ +#define RTL8211x_LED_COUNT 3 MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); @@ -169,6 +184,21 @@ static int rtl821x_write_page(struct phy_device *phydev, int page) return __phy_write(phydev, RTL821x_PAGE_SELECT, page); } +static int rtl821x_read_ext_page(struct phy_device *phydev, u16 ext_page, + u32 regnum) +{ + int oldpage, ret = 0; + + oldpage = phy_select_page(phydev, RTL821x_SET_EXT_PAGE); + if (oldpage >= 0) { + ret = __phy_write(phydev, RTL821x_EXT_PAGE_SELECT, ext_page); + if (ret == 0) + ret = __phy_read(phydev, regnum); + } + + return phy_restore_page(phydev, oldpage, ret); +} + static int rtl821x_modify_ext_page(struct phy_device *phydev, u16 ext_page, u32 regnum, u16 mask, u16 set) { @@ -608,7 +638,7 @@ static int rtl821x_resume(struct phy_device *phydev) return 0; } -static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, +static int rtl8211x_led_hw_is_supported(struct phy_device *phydev, u8 index, unsigned long rules) { const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) | @@ -627,9 +657,11 @@ static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, * rates and Active indication always at all three 10+100+1000 * link rates. * This code currently uses mode B only. + * + * RTL8211E PHY LED has one mode, which works like RTL8211F mode B. */ - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; /* Filter out any other unsupported triggers. */ @@ -648,7 +680,7 @@ static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, { int val; - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); @@ -681,7 +713,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); u16 reg = 0; - if (index >= RTL8211F_LED_COUNT) + if (index >= RTL8211x_LED_COUNT) return -EINVAL; if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) @@ -704,6 +736,84 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } +static int rtl8211e_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int ret; + u16 cr1, cr2; + + if (index >= RTL8211x_LED_COUNT) + return -EINVAL; + + ret = rtl821x_read_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR1); + if (ret < 0) + return ret; + + cr1 = ret >> RTL8211E_LEDCR1_SHIFT * index; + if (cr1 & RTL8211E_LEDCR1_ACT_TXRX) { + __set_bit(TRIGGER_NETDEV_RX, rules); + __set_bit(TRIGGER_NETDEV_TX, rules); + } + + ret = rtl821x_read_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR2); + if (ret < 0) + return ret; + + cr2 = ret >> RTL8211E_LEDCR2_SHIFT * index; + if (cr2 & RTL8211E_LEDCR2_LINK_10) + __set_bit(TRIGGER_NETDEV_LINK_10, rules); + + if (cr2 & RTL8211E_LEDCR2_LINK_100) + __set_bit(TRIGGER_NETDEV_LINK_100, rules); + + if (cr2 & RTL8211E_LEDCR2_LINK_1000) + __set_bit(TRIGGER_NETDEV_LINK_1000, rules); + + return ret; +} + +static int rtl8211e_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const u16 cr1mask = + RTL8211E_LEDCR1_MASK << (RTL8211E_LEDCR1_SHIFT * index); + const u16 cr2mask = + RTL8211E_LEDCR2_MASK << (RTL8211E_LEDCR2_SHIFT * index); + u16 cr1 = 0, cr2 = 0; + int ret; + + if (index >= RTL8211x_LED_COUNT) + return -EINVAL; + + if (test_bit(TRIGGER_NETDEV_RX, &rules) || + test_bit(TRIGGER_NETDEV_TX, &rules)) { + cr1 |= RTL8211E_LEDCR1_ACT_TXRX; + } + + cr1 <<= RTL8211E_LEDCR1_SHIFT * index; + ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR1, cr1mask, cr1); + if (ret < 0) + return ret; + + if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_10; + + if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_100; + + if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + cr2 |= RTL8211E_LEDCR2_LINK_1000; + + cr2 <<= RTL8211E_LEDCR2_SHIFT * index; + ret = rtl821x_modify_ext_page(phydev, RTL8211E_LEDCR_EXT_PAGE, + RTL8211E_LEDCR2, cr2mask, cr2); + + return ret; +} + static int rtl8211e_config_init(struct phy_device *phydev) { u16 val; @@ -1479,6 +1589,9 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, + .led_hw_is_supported = rtl8211x_led_hw_is_supported, + .led_hw_control_get = rtl8211e_led_hw_control_get, + .led_hw_control_set = rtl8211e_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(0x001cc916), .name = "RTL8211F Gigabit Ethernet", @@ -1494,7 +1607,7 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, - .led_hw_is_supported = rtl8211f_led_hw_is_supported, + .led_hw_is_supported = rtl8211x_led_hw_is_supported, .led_hw_control_get = rtl8211f_led_hw_control_get, .led_hw_control_set = rtl8211f_led_hw_control_set, }, { -- 2.51.0 From df6a69bc8f31fc34ac1f5408a82e60a3f31d905e Mon Sep 17 00:00:00 2001 From: David Wei Date: Fri, 2 May 2025 21:30:07 -0700 Subject: [PATCH 15/16] io_uring/zcrx: selftests: fix setting ntuple rule into rss Fix ethtool syntax for setting ntuple rule into rss. It should be `context' instead of `action'. Signed-off-by: David Wei Link: https://patch.msgid.link/20250503043007.857215-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index aef43f82edd5..9c03fd777f3d 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -19,8 +19,8 @@ def _get_combined_channels(cfg): return int(values[1]) -def _create_rss_ctx(cfg, chans): - output = ethtool(f"-X {cfg.ifname} context new start {chans - 1} equal 1", host=cfg.remote).stdout +def _create_rss_ctx(cfg, chan): + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout values = re.search(r'New RSS context is (\d+)', output).group(1) ctx_id = int(values) return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) @@ -32,8 +32,8 @@ def _set_flow_rule(cfg, port, chan): return int(values) -def _set_flow_rule_rss(cfg, port, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout +def _set_flow_rule_rss(cfg, port, ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -121,7 +121,7 @@ def test_zcrx_rss(cfg) -> None: ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) - (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans) + (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) -- 2.51.0 From 37006af675e8ced690ffb2285d0f15a98323a7de Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 5 May 2025 13:45:10 +0200 Subject: [PATCH 16/16] tools: ynl-gen: allow noncontiguous enums in case the enum has holes, instead of hard stop, generate a validation callback to check valid enum values. Signed-off-by: Jiri Pirko Link: https://patch.msgid.link/20250505114513.53370-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 58 ++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 09b87c9a6908..acba03bbe67d 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -389,11 +389,14 @@ class TypeScalar(Type): if 'enum' in self.attr: enum = self.family.consts[self.attr['enum']] low, high = enum.value_range() - if 'min' not in self.checks: - if low != 0 or self.type[0] == 's': - self.checks['min'] = low - if 'max' not in self.checks: - self.checks['max'] = high + if low == None and high == None: + self.checks['sparse'] = True + else: + if 'min' not in self.checks: + if low != 0 or self.type[0] == 's': + self.checks['min'] = low + if 'max' not in self.checks: + self.checks['max'] = high if 'min' in self.checks and 'max' in self.checks: if self.get_limit('min') > self.get_limit('max'): @@ -425,6 +428,8 @@ class TypeScalar(Type): return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})" elif 'max' in self.checks: return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})" + elif 'sparse' in self.checks: + return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)" return super()._attr_policy(policy) def _attr_typol(self): @@ -930,7 +935,7 @@ class EnumSet(SpecEnumSet): high = max([x.value for x in self.entries.values()]) if high - low + 1 != len(self.entries): - raise Exception("Can't get value range for a noncontiguous enum") + return None, None return low, high @@ -2426,6 +2431,46 @@ def print_kernel_policy_ranges(family, cw): cw.nl() +def print_kernel_policy_sparse_enum_validates(family, cw): + first = True + for _, attr_set in family.attr_sets.items(): + if attr_set.subset_of: + continue + + for _, attr in attr_set.items(): + if not attr.request: + continue + if not attr.enum_name: + continue + if 'sparse' not in attr.checks: + continue + + if first: + cw.p('/* Sparse enums validation callbacks */') + first = False + + sign = '' if attr.type[0] == 'u' else '_signed' + suffix = 'ULL' if attr.type[0] == 'u' else 'LL' + cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate', + ['const struct nlattr *attr', 'struct netlink_ext_ack *extack']) + cw.block_start() + cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))') + enum = family.consts[attr['enum']] + first_entry = True + for entry in enum.entries.values(): + if first_entry: + first_entry = False + else: + cw.p('fallthrough;') + cw.p(f'case {entry.c_name}:') + cw.p('return 0;') + cw.block_end() + cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");') + cw.p('return -EINVAL;') + cw.block_end() + cw.nl() + + def print_kernel_op_table_fwd(family, cw, terminate): exported = not kernel_can_gen_family_struct(family) @@ -3097,6 +3142,7 @@ def main(): print_kernel_family_struct_hdr(parsed, cw) else: print_kernel_policy_ranges(parsed, cw) + print_kernel_policy_sparse_enum_validates(parsed, cw) for _, struct in sorted(parsed.pure_nested_structs.items()): if struct.request: -- 2.51.0