From 663ad7481f068057f6f692c5368c47150e855370 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 13 Dec 2024 13:07:11 +0000 Subject: [PATCH 01/16] tools/net/ynl: fix sub-message key lookup for nested attributes Use the correct attribute space for sub-message key lookup in nested attributes when adding attributes. This fixes rt_link where the "kind" key and "data" sub-message are nested attributes in "linkinfo". For example: ./tools/net/ynl/cli.py \ --create \ --spec Documentation/netlink/specs/rt_link.yaml \ --do newlink \ --json '{"link": 99, "linkinfo": { "kind": "vlan", "data": {"id": 4 } } }' Signed-off-by: Donald Hunter Fixes: ab463c4342d1 ("tools/net/ynl: Add support for encoding sub-messages") Link: https://patch.msgid.link/20241213130711.40267-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 01ec01a90e76..eea29359a899 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -556,10 +556,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' - sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) + sub_space = attr['nested-attributes'] + sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], - subname, subvalue, sub_attrs) + attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs) elif attr["type"] == 'flag': if not value: # If value is absent or false then skip attribute creation. -- 2.50.1 From 9590d32e090ea2751e131ae5273859ca22f5ac14 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 12 Dec 2024 13:31:55 -0800 Subject: [PATCH 02/16] ionic: Fix netdev notifier unregister on failure If register_netdev() fails, then the driver leaks the netdev notifier. Fix this by calling ionic_lif_unregister() on register_netdev() failure. This will also call ionic_lif_unregister_phc() if it has already been registered. Fixes: 30b87ab4c0b3 ("ionic: remove lif list concept") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241212213157.12212-2-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 40496587b2b3..3d3f936779f7 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3869,8 +3869,8 @@ int ionic_lif_register(struct ionic_lif *lif) /* only register LIF0 for now */ err = register_netdev(lif->netdev); if (err) { - dev_err(lif->ionic->dev, "Cannot register net device, aborting\n"); - ionic_lif_unregister_phc(lif); + dev_err(lif->ionic->dev, "Cannot register net device: %d, aborting\n", err); + ionic_lif_unregister(lif); return err; } -- 2.50.1 From 746e6ae2e202b062b9deee7bd86d94937997ecd7 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Dec 2024 13:31:56 -0800 Subject: [PATCH 03/16] ionic: no double destroy workqueue There are some FW error handling paths that can cause us to try to destroy the workqueue more than once, so let's be sure we're checking for that. The case where this popped up was in an AER event where the handlers got called in such a way that ionic_reset_prepare() and thus ionic_dev_teardown() got called twice in a row. The second time through the workqueue was already destroyed, and destroy_workqueue() choked on the bad wq pointer. We didn't hit this in AER handler testing before because at that time we weren't using a private workqueue. Later we replaced the use of the system workqueue with our own private workqueue but hadn't rerun the AER handler testing since then. Fixes: 9e25450da700 ("ionic: add private workqueue per-device") Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241212213157.12212-3-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 9e42d599840d..57edcde9e6f8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -277,7 +277,10 @@ void ionic_dev_teardown(struct ionic *ionic) idev->phy_cmb_pages = 0; idev->cmb_npages = 0; - destroy_workqueue(ionic->wq); + if (ionic->wq) { + destroy_workqueue(ionic->wq); + ionic->wq = NULL; + } mutex_destroy(&idev->cmb_inuse_lock); } -- 2.50.1 From b096d62ba1323391b2db98b7704e2468cf3b1588 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 12 Dec 2024 13:31:57 -0800 Subject: [PATCH 04/16] ionic: use ee->offset when returning sprom data Some calls into ionic_get_module_eeprom() don't use a single full buffer size, but instead multiple calls with an offset. Teach our driver to use the offset correctly so we can respond appropriately to the caller. Fixes: 4d03e00a2140 ("ionic: Add initial ethtool support") Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241212213157.12212-4-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index dda22fa4448c..9b7f78b6cdb1 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -961,8 +961,8 @@ static int ionic_get_module_eeprom(struct net_device *netdev, len = min_t(u32, sizeof(xcvr->sprom), ee->len); do { - memcpy(data, xcvr->sprom, len); - memcpy(tbuf, xcvr->sprom, len); + memcpy(data, &xcvr->sprom[ee->offset], len); + memcpy(tbuf, &xcvr->sprom[ee->offset], len); /* Let's make sure we got a consistent copy */ if (!memcmp(data, tbuf, len)) -- 2.50.1 From 922b4b955a03d19fea98938f33ef0e62d01f5159 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Thu, 12 Dec 2024 11:25:58 +0500 Subject: [PATCH 05/16] net: renesas: rswitch: rework ts tags management The existing linked list based implementation of how ts tags are assigned and managed is unsafe against concurrency and corner cases: - element addition in tx processing can race against element removal in ts queue completion, - element removal in ts queue completion can race against element removal in device close, - if a large number of frames gets added to tx queue without ts queue completions in between, elements with duplicate tag values can get added. Use a different implementation, based on per-port used tags bitmaps and saved skb arrays. Safety for addition in tx processing vs removal in ts completion is provided by: tag = find_first_zero_bit(...); smp_mb(); ts_skb[tag]> set_bit(...); vs ts_skb[tag]> smp_mb(); clear_bit(...); Safety for removal in ts completion vs removal in device close is provided by using atomic read-and-clear for rdev->ts_skb[tag]: ts_skb = xchg(&rdev->ts_skb[tag], NULL); if (ts_skb) Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Nikita Yushchenko Link: https://patch.msgid.link/20241212062558.436455-1-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 74 ++++++++++++++------------ drivers/net/ethernet/renesas/rswitch.h | 13 ++--- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index dbbbf024e7ab..9ac6e2aad18f 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -547,7 +547,6 @@ static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) desc = &gq->ts_ring[gq->ring_size]; desc->desc.die_dt = DT_LINKFIX; rswitch_desc_set_dptr(&desc->desc, gq->ring_dma); - INIT_LIST_HEAD(&priv->gwca.ts_info_list); return 0; } @@ -1003,9 +1002,10 @@ static int rswitch_gwca_request_irqs(struct rswitch_private *priv) static void rswitch_ts(struct rswitch_private *priv) { struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; - struct rswitch_gwca_ts_info *ts_info, *ts_info2; struct skb_shared_hwtstamps shhwtstamps; struct rswitch_ts_desc *desc; + struct rswitch_device *rdev; + struct sk_buff *ts_skb; struct timespec64 ts; unsigned int num; u32 tag, port; @@ -1015,23 +1015,28 @@ static void rswitch_ts(struct rswitch_private *priv) dma_rmb(); port = TS_DESC_DPN(__le32_to_cpu(desc->desc.dptrl)); - tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl)); - - list_for_each_entry_safe(ts_info, ts_info2, &priv->gwca.ts_info_list, list) { - if (!(ts_info->port == port && ts_info->tag == tag)) - continue; - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - ts.tv_sec = __le32_to_cpu(desc->ts_sec); - ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff)); - shhwtstamps.hwtstamp = timespec64_to_ktime(ts); - skb_tstamp_tx(ts_info->skb, &shhwtstamps); - dev_consume_skb_irq(ts_info->skb); - list_del(&ts_info->list); - kfree(ts_info); - break; - } + if (unlikely(port >= RSWITCH_NUM_PORTS)) + goto next; + rdev = priv->rdev[port]; + tag = TS_DESC_TSUN(__le32_to_cpu(desc->desc.dptrl)); + if (unlikely(tag >= TS_TAGS_PER_PORT)) + goto next; + ts_skb = xchg(&rdev->ts_skb[tag], NULL); + smp_mb(); /* order rdev->ts_skb[] read before bitmap update */ + clear_bit(tag, rdev->ts_skb_used); + + if (unlikely(!ts_skb)) + goto next; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + ts.tv_sec = __le32_to_cpu(desc->ts_sec); + ts.tv_nsec = __le32_to_cpu(desc->ts_nsec & cpu_to_le32(0x3fffffff)); + shhwtstamps.hwtstamp = timespec64_to_ktime(ts); + skb_tstamp_tx(ts_skb, &shhwtstamps); + dev_consume_skb_irq(ts_skb); + +next: gq->cur = rswitch_next_queue_index(gq, true, 1); desc = &gq->ts_ring[gq->cur]; } @@ -1576,8 +1581,9 @@ static int rswitch_open(struct net_device *ndev) static int rswitch_stop(struct net_device *ndev) { struct rswitch_device *rdev = netdev_priv(ndev); - struct rswitch_gwca_ts_info *ts_info, *ts_info2; + struct sk_buff *ts_skb; unsigned long flags; + unsigned int tag; netif_tx_stop_all_queues(ndev); @@ -1594,12 +1600,13 @@ static int rswitch_stop(struct net_device *ndev) if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); - list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) { - if (ts_info->port != rdev->port) - continue; - dev_kfree_skb_irq(ts_info->skb); - list_del(&ts_info->list); - kfree(ts_info); + for (tag = find_first_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT); + tag < TS_TAGS_PER_PORT; + tag = find_next_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT, tag + 1)) { + ts_skb = xchg(&rdev->ts_skb[tag], NULL); + clear_bit(tag, rdev->ts_skb_used); + if (ts_skb) + dev_kfree_skb(ts_skb); } return 0; @@ -1612,20 +1619,17 @@ static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev, desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT); if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { - struct rswitch_gwca_ts_info *ts_info; + unsigned int tag; - ts_info = kzalloc(sizeof(*ts_info), GFP_ATOMIC); - if (!ts_info) + tag = find_first_zero_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT); + if (tag == TS_TAGS_PER_PORT) return false; + smp_mb(); /* order bitmap read before rdev->ts_skb[] write */ + rdev->ts_skb[tag] = skb_get(skb); + set_bit(tag, rdev->ts_skb_used); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - rdev->ts_tag++; - desc->info1 |= cpu_to_le64(INFO1_TSUN(rdev->ts_tag) | INFO1_TXC); - - ts_info->skb = skb_get(skb); - ts_info->port = rdev->port; - ts_info->tag = rdev->ts_tag; - list_add_tail(&ts_info->list, &rdev->priv->gwca.ts_info_list); + desc->info1 |= cpu_to_le64(INFO1_TSUN(tag) | INFO1_TXC); skb_tx_timestamp(skb); } diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index e020800dcc57..d8d4ed7d7f8b 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -972,14 +972,6 @@ struct rswitch_gwca_queue { }; }; -struct rswitch_gwca_ts_info { - struct sk_buff *skb; - struct list_head list; - - int port; - u8 tag; -}; - #define RSWITCH_NUM_IRQ_REGS (RSWITCH_MAX_NUM_QUEUES / BITS_PER_TYPE(u32)) struct rswitch_gwca { unsigned int index; @@ -989,7 +981,6 @@ struct rswitch_gwca { struct rswitch_gwca_queue *queues; int num_queues; struct rswitch_gwca_queue ts_queue; - struct list_head ts_info_list; DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES); u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS]; u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS]; @@ -997,6 +988,7 @@ struct rswitch_gwca { }; #define NUM_QUEUES_PER_NDEV 2 +#define TS_TAGS_PER_PORT 256 struct rswitch_device { struct rswitch_private *priv; struct net_device *ndev; @@ -1004,7 +996,8 @@ struct rswitch_device { void __iomem *addr; struct rswitch_gwca_queue *tx_queue; struct rswitch_gwca_queue *rx_queue; - u8 ts_tag; + struct sk_buff *ts_skb[TS_TAGS_PER_PORT]; + DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT); bool disabled; int port; -- 2.50.1 From b1f3a2f5a742c1e939a73031bd31b9e557a2d77d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:40 -0800 Subject: [PATCH 06/16] netdev: fix repeated netlink messages in queue dump The context is supposed to record the next queue to dump, not last dumped. If the dump doesn't fit we will restart from the already-dumped queue, duplicating the message. Before this fix and with the selftest improvements later in this series we see: # ./run_kselftest.sh -t drivers/net:queues.py timeout set to 45 selftests: drivers/net: queues.py KTAP version 1 1..2 # Check| At /root/ksft-net-drv/drivers/net/./queues.py, line 32, in get_queues: # Check| ksft_eq(queues, expected) # Check failed 102 != 100 # Check| At /root/ksft-net-drv/drivers/net/./queues.py, line 32, in get_queues: # Check| ksft_eq(queues, expected) # Check failed 101 != 100 not ok 1 queues.get_queues ok 2 queues.addremove_queues # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0 not ok 1 selftests: drivers/net: queues.py # exit=1 With the fix: # ./ksft-net-drv/run_kselftest.sh -t drivers/net:queues.py timeout set to 45 selftests: drivers/net: queues.py KTAP version 1 1..2 ok 1 queues.get_queues ok 2 queues.addremove_queues # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Fixes: 6b6171db7fc8 ("netdev-genl: Add netlink framework functions for queue") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20241213152244.3080955-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 9527dd46e4dc..9f086b190619 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -488,24 +488,21 @@ netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, struct netdev_nl_dump_ctx *ctx) { int err = 0; - int i; if (!(netdev->flags & IFF_UP)) return err; - for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) { - err = netdev_nl_queue_fill_one(rsp, netdev, i, + for (; ctx->rxq_idx < netdev->real_num_rx_queues; ctx->rxq_idx++) { + err = netdev_nl_queue_fill_one(rsp, netdev, ctx->rxq_idx, NETDEV_QUEUE_TYPE_RX, info); if (err) return err; - ctx->rxq_idx = i++; } - for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) { - err = netdev_nl_queue_fill_one(rsp, netdev, i, + for (; ctx->txq_idx < netdev->real_num_tx_queues; ctx->txq_idx++) { + err = netdev_nl_queue_fill_one(rsp, netdev, ctx->txq_idx, NETDEV_QUEUE_TYPE_TX, info); if (err) return err; - ctx->txq_idx = i++; } return err; -- 2.50.1 From ecc391a541573da46b7ccc188105efedd40aef1b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:41 -0800 Subject: [PATCH 07/16] netdev: fix repeated netlink messages in queue stats The context is supposed to record the next queue to dump, not last dumped. If the dump doesn't fit we will restart from the already-dumped queue, duplicating the message. Before this fix and with the selftest improvements later in this series we see: # ./run_kselftest.sh -t drivers/net:stats.py timeout set to 45 selftests: drivers/net: stats.py KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 125, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), len(set(queues[qtype])), # Check failed 45 != 44 repeated queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 127, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, # Check failed 45 != 44 missing queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 125, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), len(set(queues[qtype])), # Check failed 45 != 44 repeated queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 127, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, # Check failed 45 != 44 missing queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 125, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), len(set(queues[qtype])), # Check failed 103 != 100 repeated queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 127, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, # Check failed 103 != 100 missing queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 125, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), len(set(queues[qtype])), # Check failed 102 != 100 repeated queue keys # Check| At /root/ksft-net-drv/drivers/net/./stats.py, line 127, in qstat_by_ifindex: # Check| ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, # Check failed 102 != 100 missing queue keys not ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:4 fail:1 xfail:0 xpass:0 skip:0 error:0 With the fix: # ./ksft-net-drv/run_kselftest.sh -t drivers/net:stats.py timeout set to 45 selftests: drivers/net: stats.py KTAP version 1 1..5 ok 1 stats.check_pause ok 2 stats.check_fec ok 3 stats.pkt_byte_sum ok 4 stats.qstat_by_ifindex ok 5 stats.check_down # Totals: pass:5 fail:0 xfail:0 xpass:0 skip:0 error:0 Fixes: ab63a2387cb9 ("netdev: add per-queue statistics") Reviewed-by: Joe Damato Link: https://patch.msgid.link/20241213152244.3080955-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/netdev-genl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 9f086b190619..1be8c7c21d19 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -668,7 +668,7 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, i, info); if (err) return err; - ctx->rxq_idx = i++; + ctx->rxq_idx = ++i; } i = ctx->txq_idx; while (ops->get_queue_stats_tx && i < netdev->real_num_tx_queues) { @@ -676,7 +676,7 @@ netdev_nl_stats_by_queue(struct net_device *netdev, struct sk_buff *rsp, i, info); if (err) return err; - ctx->txq_idx = i++; + ctx->txq_idx = ++i; } ctx->rxq_idx = 0; -- 2.50.1 From 0518863407b8dcc7070fdbc1c015046d66777e78 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:42 -0800 Subject: [PATCH 08/16] selftests: net: support setting recv_size in YNL recv_size parameter allows constraining the buffer size for dumps. It's useful in testing kernel handling of dump continuation, IOW testing dumps which span multiple skbs. Let the tests set this parameter when initializing the YNL family. Keep the normal default, we don't want tests to unintentionally behave very differently than normal code. Reviewed-by: Joe Damato Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/ynl.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index a0d689d58c57..076a7e8dc3eb 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -32,23 +32,23 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetshaperFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) -- 2.50.1 From 1234810b1649e9d781aeafd4b23fb1fcfbf95d8f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:43 -0800 Subject: [PATCH 09/16] selftests: net-drv: queues: sanity check netlink dumps This test already catches a netlink bug fixed by this series, but only when running on HW with many queues. Make sure the netdevsim instance created has a lot of queues, and constrain the size of the recv_buffer used by netlink. While at it test both rx and tx queues. Reviewed-by: Joe Damato Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/queues.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..9c5473abbd78 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -8,25 +8,28 @@ from lib.py import cmd import glob -def sys_get_queues(ifname) -> int: - folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') return len(folders) -def nl_get_queues(cfg, nl): +def nl_get_queues(cfg, nl, qtype='rx'): queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) if queues: - return len([q for q in queues if q['type'] == 'rx']) + return len([q for q in queues if q['type'] == qtype]) return None def get_queues(cfg, nl) -> None: - queues = nl_get_queues(cfg, nl) - if not queues: - raise KsftSkipEx('queue-get not supported by device') + snl = NetdevFamily(recv_size=4096) - expected = sys_get_queues(cfg.dev['ifname']) - ksft_eq(queues, expected) + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) def addremove_queues(cfg, nl) -> None: @@ -57,7 +60,7 @@ def addremove_queues(cfg, nl) -> None: def main() -> None: - with NetDrvEnv(__file__, queue_count=3) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) ksft_exit() -- 2.50.1 From 5712e323d4c3ad03bba4d28f83e80593171ac3f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 13 Dec 2024 07:22:44 -0800 Subject: [PATCH 10/16] selftests: net-drv: stats: sanity check netlink dumps Sanity check netlink dumps, to make sure dumps don't have repeated entries or gaps in IDs. Reviewed-by: Petr Machata Link: https://patch.msgid.link/20241213152244.3080955-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/stats.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 63e3c045a3b2..031ac9def6c0 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -110,6 +110,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -158,7 +175,7 @@ def check_down(cfg) -> None: def main() -> None: - with NetDrvEnv(__file__) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down], args=(cfg, )) -- 2.50.1 From fbbd84af6ba70334335bdeba3ae536cf751c14c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Dec 2024 12:47:27 +0300 Subject: [PATCH 11/16] chelsio/chtls: prevent potential integer overflow on 32bit The "gl->tot_len" variable is controlled by the user. It comes from process_responses(). On 32bit systems, the "gl->tot_len + sizeof(struct cpl_pass_accept_req) + sizeof(struct rss_header)" addition could have an integer wrapping bug. Use size_add() to prevent this. Fixes: a08943947873 ("crypto: chtls - Register chtls with net tls") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Link: https://patch.msgid.link/c6bfb23c-2db2-4e1b-b8ab-ba3925c82ef5@stanley.mountain Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c index 96fd31d75dfd..daa1ebaef511 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c @@ -346,8 +346,9 @@ static struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, * driver. Once driver synthesizes cpl_pass_accept_req the skb will go * through the regular cpl_pass_accept_req processing in TOM. */ - skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) - - pktshift, GFP_ATOMIC); + skb = alloc_skb(size_add(gl->tot_len, + sizeof(struct cpl_pass_accept_req)) - + pktshift, GFP_ATOMIC); if (unlikely(!skb)) return NULL; __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) -- 2.50.1 From e78c20f327bd94dabac68b98218dff069a8780f0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Dec 2024 13:36:57 +0100 Subject: [PATCH 12/16] team: Fix feature exposure when no ports are present Small follow-up to align this to an equivalent behavior as the bond driver. The change in 3625920b62c3 ("teaming: fix vlan_features computing") removed the netdevice vlan_features when there is no team port attached, yet it leaves the full set of enc_features intact. Instead, leave the default features as pre 3625920b62c3, and recompute once we do have ports attached. Also, similarly as in bonding case, call the netdev_base_features() helper on the enc_features. Fixes: 3625920b62c3 ("teaming: fix vlan_features computing") Signed-off-by: Daniel Borkmann Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20241213123657.401868-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- drivers/net/team/team_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 69ea2c3c76bf..c7690adec8db 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -998,9 +998,13 @@ static void __team_compute_features(struct team *team) unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; + rcu_read_lock(); + if (list_empty(&team->port_list)) + goto done; + vlan_features = netdev_base_features(vlan_features); + enc_features = netdev_base_features(enc_features); - rcu_read_lock(); list_for_each_entry_rcu(port, &team->port_list, list) { vlan_features = netdev_increment_features(vlan_features, port->dev->vlan_features, @@ -1010,11 +1014,11 @@ static void __team_compute_features(struct team *team) port->dev->hw_enc_features, TEAM_ENC_FEATURES); - dst_release_flag &= port->dev->priv_flags; if (port->dev->hard_header_len > max_hard_header_len) max_hard_header_len = port->dev->hard_header_len; } +done: rcu_read_unlock(); team->dev->vlan_features = vlan_features; -- 2.50.1 From 7203d10e93b6e6e1d19481ef7907de6a9133a467 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Dec 2024 17:28:11 +0300 Subject: [PATCH 13/16] net: hinic: Fix cleanup in create_rxqs/txqs() There is a check for NULL at the start of create_txqs() and create_rxqs() which tess if "nic_dev->txqs" is non-NULL. The intention is that if the device is already open and the queues are already created then we don't create them a second time. However, the bug is that if we have an error in the create_txqs() then the pointer doesn't get set back to NULL. The NULL check at the start of the function will say that it's already open when it's not and the device can't be used. Set ->txqs back to NULL on cleanup on error. Fixes: c3e79baf1b03 ("net-next/hinic: Add logical Txq and Rxq") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Link: https://patch.msgid.link/0cc98faf-a0ed-4565-a55b-0fa2734bc205@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 890f213da8d1..ae1f523d6841 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -172,6 +172,7 @@ err_init_txq: hinic_sq_dbgfs_uninit(nic_dev); devm_kfree(&netdev->dev, nic_dev->txqs); + nic_dev->txqs = NULL; return err; } @@ -268,6 +269,7 @@ err_init_rxq: hinic_rq_dbgfs_uninit(nic_dev); devm_kfree(&netdev->dev, nic_dev->rxqs); + nic_dev->rxqs = NULL; return err; } -- 2.50.1 From 94901b7a74d82bfd30420f1d9d00898278fdc8bf Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Dec 2024 22:00:15 +0900 Subject: [PATCH 14/16] rust: net::phy fix module autoloading The alias symbol name was renamed. Adjust module_phy_driver macro to create the proper symbol name to fix module autoloading. Fixes: 054a9cd395a7 ("modpost: rename alias symbol for MODULE_DEVICE_TABLE()") Signed-off-by: FUJITA Tomonori Link: https://patch.msgid.link/20241212130015.238863-1-fujita.tomonori@gmail.com Signed-off-by: Paolo Abeni --- rust/kernel/net/phy.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index b89c681d97c0..2fbfb6a94c11 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -860,7 +860,7 @@ impl DeviceMask { /// ]; /// #[cfg(MODULE)] /// #[no_mangle] -/// static __mod_mdio__phydev_device_table: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE; +/// static __mod_device_table__mdio__phydev: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE; /// ``` #[macro_export] macro_rules! module_phy_driver { @@ -883,7 +883,7 @@ macro_rules! module_phy_driver { #[cfg(MODULE)] #[no_mangle] - static __mod_mdio__phydev_device_table: [$crate::bindings::mdio_device_id; + static __mod_device_table__mdio__phydev: [$crate::bindings::mdio_device_id; $crate::module_phy_driver!(@count_devices $($dev),+) + 1] = _DEVICE_TABLE; }; -- 2.50.1 From 7d2f320e12744e5906a4fab40381060a81d22c12 Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Fri, 13 Dec 2024 18:01:58 +0530 Subject: [PATCH 15/16] net: ethernet: oa_tc6: fix infinite loop error when tx credits becomes 0 SPI thread wakes up to perform SPI transfer whenever there is an TX skb from n/w stack or interrupt from MAC-PHY. Ethernet frame from TX skb is transferred based on the availability tx credits in the MAC-PHY which is reported from the previous SPI transfer. Sometimes there is a possibility that TX skb is available to transmit but there is no tx credits from MAC-PHY. In this case, there will not be any SPI transfer but the thread will be running in an endless loop until tx credits available again. So checking the availability of tx credits along with TX skb will prevent the above infinite loop. When the tx credits available again that will be notified through interrupt which will trigger the SPI transfer to get the available tx credits. Fixes: 53fbde8ab21e ("net: ethernet: oa_tc6: implement transmit path to transfer tx ethernet frames") Reviewed-by: Jacob Keller Signed-off-by: Parthiban Veerasooran Signed-off-by: Paolo Abeni --- drivers/net/ethernet/oa_tc6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c index f9c0dcd965c2..4c8b0ca922b7 100644 --- a/drivers/net/ethernet/oa_tc6.c +++ b/drivers/net/ethernet/oa_tc6.c @@ -1111,8 +1111,9 @@ static int oa_tc6_spi_thread_handler(void *data) /* This kthread will be waken up if there is a tx skb or mac-phy * interrupt to perform spi transfer with tx chunks. */ - wait_event_interruptible(tc6->spi_wq, tc6->waiting_tx_skb || - tc6->int_flag || + wait_event_interruptible(tc6->spi_wq, tc6->int_flag || + (tc6->waiting_tx_skb && + tc6->tx_credits) || kthread_should_stop()); if (kthread_should_stop()) -- 2.50.1 From e592b5110b3e9393881b0a019d86832bbf71a47f Mon Sep 17 00:00:00 2001 From: Parthiban Veerasooran Date: Fri, 13 Dec 2024 18:01:59 +0530 Subject: [PATCH 16/16] net: ethernet: oa_tc6: fix tx skb race condition between reference pointers There are two skb pointers to manage tx skb's enqueued from n/w stack. waiting_tx_skb pointer points to the tx skb which needs to be processed and ongoing_tx_skb pointer points to the tx skb which is being processed. SPI thread prepares the tx data chunks from the tx skb pointed by the ongoing_tx_skb pointer. When the tx skb pointed by the ongoing_tx_skb is processed, the tx skb pointed by the waiting_tx_skb is assigned to ongoing_tx_skb and the waiting_tx_skb pointer is assigned with NULL. Whenever there is a new tx skb from n/w stack, it will be assigned to waiting_tx_skb pointer if it is NULL. Enqueuing and processing of a tx skb handled in two different threads. Consider a scenario where the SPI thread processed an ongoing_tx_skb and it moves next tx skb from waiting_tx_skb pointer to ongoing_tx_skb pointer without doing any NULL check. At this time, if the waiting_tx_skb pointer is NULL then ongoing_tx_skb pointer is also assigned with NULL. After that, if a new tx skb is assigned to waiting_tx_skb pointer by the n/w stack and there is a chance to overwrite the tx skb pointer with NULL in the SPI thread. Finally one of the tx skb will be left as unhandled, resulting packet missing and memory leak. - Consider the below scenario where the TXC reported from the previous transfer is 10 and ongoing_tx_skb holds an tx ethernet frame which can be transported in 20 TXCs and waiting_tx_skb is still NULL. tx_credits = 10; /* 21 are filled in the previous transfer */ ongoing_tx_skb = 20; waiting_tx_skb = NULL; /* Still NULL */ - So, (tc6->ongoing_tx_skb || tc6->waiting_tx_skb) becomes true. - After oa_tc6_prepare_spi_tx_buf_for_tx_skbs() ongoing_tx_skb = 10; waiting_tx_skb = NULL; /* Still NULL */ - Perform SPI transfer. - Process SPI rx buffer to get the TXC from footers. - Now let's assume previously filled 21 TXCs are freed so we are good to transport the next remaining 10 tx chunks from ongoing_tx_skb. tx_credits = 21; ongoing_tx_skb = 10; waiting_tx_skb = NULL; - So, (tc6->ongoing_tx_skb || tc6->waiting_tx_skb) becomes true again. - In the oa_tc6_prepare_spi_tx_buf_for_tx_skbs() ongoing_tx_skb = NULL; waiting_tx_skb = NULL; - Now the below bad case might happen, Thread1 (oa_tc6_start_xmit) Thread2 (oa_tc6_spi_thread_handler) --------------------------- ----------------------------------- - if waiting_tx_skb is NULL - if ongoing_tx_skb is NULL - ongoing_tx_skb = waiting_tx_skb - waiting_tx_skb = skb - waiting_tx_skb = NULL ... - ongoing_tx_skb = NULL - if waiting_tx_skb is NULL - waiting_tx_skb = skb To overcome the above issue, protect the moving of tx skb reference from waiting_tx_skb pointer to ongoing_tx_skb pointer and assigning new tx skb to waiting_tx_skb pointer, so that the other thread can't access the waiting_tx_skb pointer until the current thread completes moving the tx skb reference safely. Fixes: 53fbde8ab21e ("net: ethernet: oa_tc6: implement transmit path to transfer tx ethernet frames") Signed-off-by: Parthiban Veerasooran Reviewed-by: Larysa Zaremba Signed-off-by: Paolo Abeni --- drivers/net/ethernet/oa_tc6.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c index 4c8b0ca922b7..db200e4ec284 100644 --- a/drivers/net/ethernet/oa_tc6.c +++ b/drivers/net/ethernet/oa_tc6.c @@ -113,6 +113,7 @@ struct oa_tc6 { struct mii_bus *mdiobus; struct spi_device *spi; struct mutex spi_ctrl_lock; /* Protects spi control transfer */ + spinlock_t tx_skb_lock; /* Protects tx skb handling */ void *spi_ctrl_tx_buf; void *spi_ctrl_rx_buf; void *spi_data_tx_buf; @@ -1004,8 +1005,10 @@ static u16 oa_tc6_prepare_spi_tx_buf_for_tx_skbs(struct oa_tc6 *tc6) for (used_tx_credits = 0; used_tx_credits < tc6->tx_credits; used_tx_credits++) { if (!tc6->ongoing_tx_skb) { + spin_lock_bh(&tc6->tx_skb_lock); tc6->ongoing_tx_skb = tc6->waiting_tx_skb; tc6->waiting_tx_skb = NULL; + spin_unlock_bh(&tc6->tx_skb_lock); } if (!tc6->ongoing_tx_skb) break; @@ -1210,7 +1213,9 @@ netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb) return NETDEV_TX_OK; } + spin_lock_bh(&tc6->tx_skb_lock); tc6->waiting_tx_skb = skb; + spin_unlock_bh(&tc6->tx_skb_lock); /* Wake spi kthread to perform spi transfer */ wake_up_interruptible(&tc6->spi_wq); @@ -1240,6 +1245,7 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev) tc6->netdev = netdev; SET_NETDEV_DEV(netdev, &spi->dev); mutex_init(&tc6->spi_ctrl_lock); + spin_lock_init(&tc6->tx_skb_lock); /* Set the SPI controller to pump at realtime priority */ tc6->spi->rt = true; -- 2.50.1