From c84f6ce918a9e6f4996597cbc62536bbf2247c96 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:28:50 +0300 Subject: [PATCH 01/16] net: dsa: mv88e6xxx: avoid unregistering devlink regions which were never registered Russell King reports that a system with mv88e6xxx dereferences a NULL pointer when unbinding this driver: https://lore.kernel.org/netdev/Z_lRkMlTJ1KQ0kVX@shell.armlinux.org.uk/ The crash seems to be in devlink_region_destroy(), which is not NULL tolerant but is given a NULL devlink global region pointer. At least on some chips, some devlink regions are conditionally registered since the blamed commit, see mv88e6xxx_setup_devlink_regions_global(): if (cond && !cond(chip)) continue; These are MV88E6XXX_REGION_STU and MV88E6XXX_REGION_PVT. If the chip does not have an STU or PVT, it should crash like this. To fix the issue, avoid unregistering those regions which are NULL, i.e. were skipped at mv88e6xxx_setup_devlink_regions_global() time. Fixes: 836021a2d0e0 ("net: dsa: mv88e6xxx: Export cross-chip PVT as devlink region") Tested-by: Russell King (Oracle) Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212850.2953957-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/devlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 795c8df7b6a7..195460a0a0d4 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -736,7 +736,8 @@ void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) - dsa_devlink_region_destroy(chip->regions[i]); + if (chip->regions[i]) + dsa_devlink_region_destroy(chip->regions[i]); } void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) -- 2.51.0 From ea08dfc35f83cfc73493c52f63ae4f2e29edfe8d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:13 +0300 Subject: [PATCH 02/16] net: dsa: mv88e6xxx: fix -ENOENT when deleting VLANs and MST is unsupported Russell King reports that on the ZII dev rev B, deleting a bridge VLAN from a user port fails with -ENOENT: https://lore.kernel.org/netdev/Z_lQXNP0s5-IiJzd@shell.armlinux.org.uk/ This comes from mv88e6xxx_port_vlan_leave() -> mv88e6xxx_mst_put(), which tries to find an MST entry in &chip->msts associated with the SID, but fails and returns -ENOENT as such. But we know that this chip does not support MST at all, so that is not surprising. The question is why does the guard in mv88e6xxx_mst_put() not exit early: if (!sid) return 0; And the answer seems to be simple: the sid comes from vlan.sid which supposedly was previously populated by mv88e6xxx_vtu_get(). But some chip->info->ops->vtu_getnext() implementations do not populate vlan.sid, for example see mv88e6185_g1_vtu_getnext(). In that case, later in mv88e6xxx_port_vlan_leave() we are using a garbage sid which is just residual stack memory. Testing for sid == 0 covers all cases of a non-bridge VLAN or a bridge VLAN mapped to the default MSTI. For some chips, SID 0 is valid and installed by mv88e6xxx_stu_setup(). A chip which does not support the STU would implicitly only support mapping all VLANs to the default MSTI, so although SID 0 is not valid, it would be sufficient, if we were to zero-initialize the vlan structure, to fix the bug, due to the coincidence that a test for vlan.sid == 0 already exists and leads to the same (correct) behavior. Another option which would be sufficient would be to add a test for mv88e6xxx_has_stu() inside mv88e6xxx_mst_put(), symmetric to the one which already exists in mv88e6xxx_mst_get(). But that placement means the caller will have to dereference vlan.sid, which means it will access uninitialized memory, which is not nice even if it ignores it later. So we end up making both modifications, in order to not rely just on the sid == 0 coincidence, but also to avoid having uninitialized structure fields which might get temporarily accessed. Fixes: acaf4d2e36b3 ("net: dsa: mv88e6xxx: MST Offloading") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212913.2955253-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 29a89ab4b789..08db846cda8d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1852,6 +1852,8 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, if (!chip->info->ops->vtu_getnext) return -EOPNOTSUPP; + memset(entry, 0, sizeof(*entry)); + entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip); entry->valid = false; @@ -1960,7 +1962,16 @@ static int mv88e6xxx_mst_put(struct mv88e6xxx_chip *chip, u8 sid) struct mv88e6xxx_mst *mst, *tmp; int err; - if (!sid) + /* If the SID is zero, it is for a VLAN mapped to the default MSTI, + * and mv88e6xxx_stu_setup() made sure it is always present, and thus, + * should not be removed here. + * + * If the chip lacks STU support, numerically the "sid" variable will + * happen to also be zero, but we don't want to rely on that fact, so + * we explicitly test that first. In that case, there is also nothing + * to do here. + */ + if (!mv88e6xxx_has_stu(chip) || !sid) return 0; list_for_each_entry_safe(mst, tmp, &chip->msts, node) { -- 2.51.0 From 7afb5fb42d4950f33af2732b8147c552659f79b7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:30 +0300 Subject: [PATCH 03/16] net: dsa: clean up FDB, MDB, VLAN entries on unbind As explained in many places such as commit b117e1e8a86d ("net: dsa: delete dsa_legacy_fdb_add and dsa_legacy_fdb_del"), DSA is written given the assumption that higher layers have balanced additions/deletions. As such, it only makes sense to be extremely vocal when those assumptions are violated and the driver unbinds with entries still present. But Ido Schimmel points out a very simple situation where that is wrong: https://lore.kernel.org/netdev/ZDazSM5UsPPjQuKr@shredder/ (also briefly discussed by me in the aforementioned commit). Basically, while the bridge bypass operations are not something that DSA explicitly documents, and for the majority of DSA drivers this API simply causes them to go to promiscuous mode, that isn't the case for all drivers. Some have the necessary requirements for bridge bypass operations to do something useful - see dsa_switch_supports_uc_filtering(). Although in tools/testing/selftests/net/forwarding/local_termination.sh, we made an effort to popularize better mechanisms to manage address filters on DSA interfaces from user space - namely macvlan for unicast, and setsockopt(IP_ADD_MEMBERSHIP) - through mtools - for multicast, the fact is that 'bridge fdb add ... self static local' also exists as kernel UAPI, and might be useful to someone, even if only for a quick hack. It seems counter-productive to block that path by implementing shim .ndo_fdb_add and .ndo_fdb_del operations which just return -EOPNOTSUPP in order to prevent the ndo_dflt_fdb_add() and ndo_dflt_fdb_del() from running, although we could do that. Accepting that cleanup is necessary seems to be the only option. Especially since we appear to be coming back at this from a different angle as well. Russell King is noticing that the WARN_ON() triggers even for VLANs: https://lore.kernel.org/netdev/Z_li8Bj8bD4-BYKQ@shell.armlinux.org.uk/ What happens in the bug report above is that dsa_port_do_vlan_del() fails, then the VLAN entry lingers on, and then we warn on unbind and leak it. This is not a straight revert of the blamed commit, but we now add an informational print to the kernel log (to still have a way to see that bugs exist), and some extra comments gathered from past years' experience, to justify the logic. Fixes: 0832cd9f1f02 ("net: dsa: warn if port lists aren't empty in dsa_port_teardown") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212930.2956310-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e827775baf2e..e7e32956070a 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1478,12 +1478,44 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { + struct dsa_mac_addr *a, *tmp; struct dsa_port *dp, *next; + struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { - WARN_ON(!list_empty(&dp->fdbs)); - WARN_ON(!list_empty(&dp->mdbs)); - WARN_ON(!list_empty(&dp->vlans)); + /* These are either entries that upper layers lost track of + * (probably due to bugs), or installed through interfaces + * where one does not necessarily have to remove them, like + * ndo_dflt_fdb_add(). + */ + list_for_each_entry_safe(a, tmp, &dp->fdbs, list) { + dev_info(ds->dev, + "Cleaning up unicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + list_for_each_entry_safe(a, tmp, &dp->mdbs, list) { + dev_info(ds->dev, + "Cleaning up multicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + /* These are entries that upper layers have lost track of, + * probably due to bugs, but also due to dsa_port_do_vlan_del() + * having failed and the VLAN entry still lingering on. + */ + list_for_each_entry_safe(v, n, &dp->vlans, list) { + dev_info(ds->dev, + "Cleaning up vid %u from port %d\n", + v->vid, dp->index); + list_del(&v->list); + kfree(v); + } + list_del(&dp->list); kfree(dp); } -- 2.51.0 From 8bf108d7161ffc6880ad13a0cc109de3cf631727 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:01 +0300 Subject: [PATCH 04/16] net: dsa: free routing table on probe failure If complete = true in dsa_tree_setup(), it means that we are the last switch of the tree which is successfully probing, and we should be setting up all switches from our probe path. After "complete" becomes true, dsa_tree_setup_cpu_ports() or any subsequent function may fail. If that happens, the entire tree setup is in limbo: the first N-1 switches have successfully finished probing (doing nothing but having allocated persistent memory in the tree's dst->ports, and maybe dst->rtable), and switch N failed to probe, ending the tree setup process before anything is tangible from the user's PoV. If switch N fails to probe, its memory (ports) will be freed and removed from dst->ports. However, the dst->rtable elements pointing to its ports, as created by dsa_link_touch(), will remain there, and will lead to use-after-free if dereferenced. If dsa_tree_setup_switches() returns -EPROBE_DEFER, which is entirely possible because that is where ds->ops->setup() is, we get a kasan report like this: ================================================================== BUG: KASAN: slab-use-after-free in mv88e6xxx_setup_upstream_port+0x240/0x568 Read of size 8 at addr ffff000004f56020 by task kworker/u8:3/42 Call trace: __asan_report_load8_noabort+0x20/0x30 mv88e6xxx_setup_upstream_port+0x240/0x568 mv88e6xxx_setup+0xebc/0x1eb0 dsa_register_switch+0x1af4/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Allocated by task 42: __kasan_kmalloc+0x84/0xa0 __kmalloc_cache_noprof+0x298/0x490 dsa_switch_touch_ports+0x174/0x3d8 dsa_register_switch+0x800/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Freed by task 42: __kasan_slab_free+0x48/0x68 kfree+0x138/0x418 dsa_register_switch+0x2694/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 The simplest way to fix the bug is to delete the routing table in its entirety. dsa_tree_setup_routing_table() has no problem in regenerating it even if we deleted links between ports other than those of switch N, because dsa_link_touch() first checks whether the port pair already exists in dst->rtable, allocating if not. The deletion of the routing table in its entirety already exists in dsa_tree_teardown(), so refactor that into a function that can also be called from the tree setup error path. In my analysis of the commit to blame, it is the one which added dsa_link elements to dst->rtable. Prior to that, each switch had its own ds->rtable which is freed when the switch fails to probe. But the tree is potentially persistent memory. Fixes: c5f51765a1f6 ("net: dsa: list DSA links in the fabric") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213001.2957964-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e7e32956070a..436a7e1b412a 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -862,6 +862,16 @@ static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) kfree(dst->lags); } +static void dsa_tree_teardown_routing_table(struct dsa_switch_tree *dst) +{ + struct dsa_link *dl, *next; + + list_for_each_entry_safe(dl, next, &dst->rtable, list) { + list_del(&dl->list); + kfree(dl); + } +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -879,7 +889,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) err = dsa_tree_setup_cpu_ports(dst); if (err) - return err; + goto teardown_rtable; err = dsa_tree_setup_switches(dst); if (err) @@ -911,14 +921,14 @@ teardown_switches: dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); +teardown_rtable: + dsa_tree_teardown_routing_table(dst); return err; } static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_link *dl, *next; - if (!dst->setup) return; @@ -932,10 +942,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_cpu_ports(dst); - list_for_each_entry_safe(dl, next, &dst->rtable, list) { - list_del(&dl->list); - kfree(dl); - } + dsa_tree_teardown_routing_table(dst); pr_info("DSA: tree %d torn down\n", dst->index); -- 2.51.0 From 514eff7b0aa1c5eb645ddbb8676ef3e2d88a8b99 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:20 +0300 Subject: [PATCH 05/16] net: dsa: avoid refcount warnings when ds->ops->tag_8021q_vlan_del() fails This is very similar to the problem and solution from commit 232deb3f9567 ("net: dsa: avoid refcount warnings when ->port_{fdb,mdb}_del returns error"), except for the dsa_port_do_tag_8021q_vlan_del() operation. Fixes: c64b9c05045a ("net: dsa: tag_8021q: add proper cross-chip notifier support") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213020.2959021-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3ee53e28ec2e..53e03fd8071b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -197,7 +197,7 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) err = ds->ops->tag_8021q_vlan_del(ds, port, vid); if (err) { - refcount_inc(&v->refcount); + refcount_set(&v->refcount, 1); return err; } -- 2.51.0 From 2a5970d5aaff8f3e33ce3bfaa403ae88c40de40d Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Tue, 15 Apr 2025 08:31:31 +0300 Subject: [PATCH 06/16] ptp: ocp: fix start time alignment in ptp_ocp_signal_set In ptp_ocp_signal_set, the start time for periodic signals is not aligned to the next period boundary. The current code rounds up the start time and divides by the period but fails to multiply back by the period, causing misaligned signal starts. Fix this by multiplying the rounded-up value by the period to ensure the start time is the closest next period. Fixes: 4bd46bb037f8e ("ptp: ocp: Use DIV64_U64_ROUND_UP for rounding.") Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250415053131.129413-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 7945c6be1f7c..faf6e027f89a 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2067,6 +2067,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) if (!s->start) { /* roundup() does not work on 32-bit systems */ s->start = DIV64_U64_ROUND_UP(start_ns, s->period); + s->start *= s->period; s->start = ktime_add(s->start, s->phase); } -- 2.51.0 From 4798cfa2097f0833d54d8f5ce20ef14631917839 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Apr 2025 08:15:52 -0700 Subject: [PATCH 07/16] net: don't try to ops lock uninitialized devs We need to be careful when operating on dev while in rtnl_create_link(). Some devices (vxlan) initialize netdev_ops in ->newlink, so later on. Avoid using netdev_lock_ops(), the device isn't registered so we cannot legally call its ops or generate any notifications for it. netdev_ops_assert_locked_or_invisible() is safe to use, it checks registration status first. Reported-by: syzbot+de1c7d68a10e3f123bdd@syzkaller.appspotmail.com Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250415151552.768373-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5fcbc66d865e..1be7cb73a602 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1520,6 +1520,8 @@ EXPORT_SYMBOL(netdev_features_change); void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39a5b72e861f..c5a7f41982a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3676,11 +3676,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) { - netdev_lock_ops(dev); + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - netdev_unlock_ops(dev); - } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) -- 2.51.0 From c7b67ddc3c999aa2f8d77be7ef1913298fe78f0e Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Wed, 9 Apr 2025 12:39:56 +0000 Subject: [PATCH 08/16] xfs: document zoned rt specifics in admin-guide Document the lifetime, nolifetime and max_open_zones mount options added for zoned rt file systems. Also add documentation describing the max_open_zones sysfs attribute exposed in /sys/fs/xfs//zoned/ Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Hans Holmberg Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- Documentation/admin-guide/xfs.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst index 7b0811d650f9..3e76276bd488 100644 --- a/Documentation/admin-guide/xfs.rst +++ b/Documentation/admin-guide/xfs.rst @@ -124,6 +124,14 @@ When mounting an XFS filesystem, the following options are accepted. controls the size of each buffer and so is also relevant to this case. + lifetime (default) or nolifetime + Enable data placement based on write life time hints provided + by the user. This turns on co-allocation of data of similar + life times when statistically favorable to reduce garbage + collection cost. + + These options are only available for zoned rt file systems. + logbsize=value Set the size of each in-memory log buffer. The size may be specified in bytes, or in kilobytes with a "k" suffix. @@ -143,6 +151,14 @@ When mounting an XFS filesystem, the following options are accepted. optional, and the log section can be separate from the data section or contained within it. + max_open_zones=value + Specify the max number of zones to keep open for writing on a + zoned rt device. Many open zones aids file data separation + but may impact performance on HDDs. + + If ``max_open_zones`` is not specified, the value is determined + by the capabilities and the size of the zoned rt device. + noalign Data allocations will not be aligned at stripe unit boundaries. This is only relevant to filesystems created @@ -546,6 +562,19 @@ The interesting knobs for XFS workqueues are as follows: Zoned Filesystems ================= +For zoned file systems, the following attribute is exposed in: + + /sys/fs/xfs//zoned/ + + max_open_zones (Min: 1 Default: Varies Max: UINTMAX) + This read-only attribute exposes the maximum number of open zones + available for data placement. The value is determined at mount time and + is limited by the capabilities of the backing zoned device, file system + size and the max_open_zones mount option. + +Zoned Filesystems +================= + For zoned file systems, the following attributes are exposed in: /sys/fs/xfs//zoned/ -- 2.51.0 From d2d31ea8cd80b9830cdab624e94f9d41178fc99d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Apr 2025 15:53:48 +0200 Subject: [PATCH 09/16] netfilter: conntrack: fix erronous removal of offload bit The blamed commit exposes a possible issue with flow_offload_teardown(): We might remove the offload bit of a conntrack entry that has been offloaded again. 1. conntrack entry c1 is offloaded via flow f1 (f1->ct == c1). 2. f1 times out and is pushed back to slowpath, c1 offload bit is removed. Due to bug, f1 is not unlinked from rhashtable right away. 3. a new packet arrives for the flow and re-offload is triggered, i.e. f2->ct == c1. This is because lookup in flowtable skip entries with teardown bit set. 4. Next flowtable gc cycle finds f1 again 5. flow_offload_teardown() is called again for f1 and c1 offload bit is removed again, even though we have f2 referencing the same entry. This is harmless, but clearly not correct. Fix the bug that exposes this: set 'teardown = true' to have the gc callback unlink the flowtable entry from the table right away instead of the unintentional defer to the next round. Also prevent flow_offload_teardown() from fixing up the ct state more than once: We could also be called from the data path or a notifier, not only from the flowtable gc callback. NF_FLOW_TEARDOWN can never be unset, so we can use it as synchronization point: if we observe did not see a 0 -> 1 transition, then another CPU is already doing the ct state fixups for us. Fixes: 03428ca5cee9 ("netfilter: conntrack: rework offload nf_conn timeout extension logic") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9d8361526f82..9441ac3d8c1a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -383,8 +383,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - set_bit(NF_FLOW_TEARDOWN, &flow->flags); - flow_offload_fixup_ct(flow); + if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) + flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -558,10 +558,12 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_custom_gc(flow_table, flow)) + nf_flow_custom_gc(flow_table, flow)) { flow_offload_teardown(flow); - else if (!teardown) + teardown = true; + } else if (!teardown) { nf_flow_table_extend_ct_timeout(flow->ct); + } if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { -- 2.51.0 From 75bc744466444ef417b5f709f72b91c83301bcd1 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:41 +0530 Subject: [PATCH 10/16] net: ti: icssg-prueth: Fix kernel warning while bringing down network interface During network interface initialization, the NIC driver needs to register its Rx queue with the XDP, to ensure the incoming XDP buffer carries a pointer reference to this info and is stored inside xdp_rxq_info. While this struct isn't tied to XDP prog, if there are any changes in Rx queue, the NIC driver needs to stop the Rx queue by unregistering with XDP before purging and reallocating memory. Drop page_pool destroy during Rx channel reset as this is already handled by XDP during xdp_rxq_info_unreg (Rx queue unregister), failing to do will cause the following warning: warning logs: https://gist.github.com/MeghanaMalladiTI/eb627e5dc8de24e42d7d46572c13e576 Fixes: 46eeb90f03e0 ("net: ti: icssg-prueth: Use page_pool API for RX buffer allocation") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-2-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 14002b026452..ec643fb69d30 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1215,9 +1215,6 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, prueth_rx_cleanup); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); - - page_pool_destroy(chn->pg_pool); - chn->pg_pool = NULL; } EXPORT_SYMBOL_GPL(prueth_reset_rx_chan); -- 2.51.0 From 8ed2fa661350f0b49edb765d18173b5c766c3686 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:42 +0530 Subject: [PATCH 11/16] net: ti: icssg-prueth: Fix possible NULL pointer dereference inside emac_xmit_xdp_frame() There is an error check inside emac_xmit_xdp_frame() function which is called when the driver wants to transmit XDP frame, to check if the allocated tx descriptor is NULL, if true to exit and return ICSSG_XDP_CONSUMED implying failure in transmission. In this case trying to free a descriptor which is NULL will result in kernel crash due to NULL pointer dereference. Fix this error handling and increase netdev tx_dropped stats in the caller of this function if the function returns ICSSG_XDP_CONSUMED. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/70d8dd76-0c76-42fc-8611-9884937c82f5@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-3-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index ec643fb69d30..b4be76e13a2f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -583,7 +583,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (!first_desc) { netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n"); - goto drop_free_descs; /* drop */ + return ICSSG_XDP_CONSUMED; /* drop */ } if (page) { /* already DMA mapped by page_pool */ @@ -671,8 +671,10 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, q_idx = smp_processor_id() % emac->tx_ch_num; result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); - if (result == ICSSG_XDP_CONSUMED) + if (result == ICSSG_XDP_CONSUMED) { + ndev->stats.tx_dropped++; goto drop; + } dev_sw_netstats_rx_add(ndev, xdpf->len); return result; -- 2.51.0 From 7349c9e9979333abfce42da5f9025598083b59c9 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:43 +0530 Subject: [PATCH 12/16] net: ti: icss-iep: Fix possible NULL pointer dereference for perout request The ICSS IEP driver tracks perout and pps enable state with flags. Currently when disabling pps and perout signals during icss_iep_exit(), results in NULL pointer dereference for perout. To fix the null pointer dereference issue, the icss_iep_perout_enable_hw function can be modified to directly clear the IEP CMP registers when disabling PPS or PEROUT, without referencing the ptp_perout_request structure, as its contents are irrelevant in this case. Fixes: 9b115361248d ("net: ti: icssg-prueth: Fix clearing of IEP_CMP_CFG registers during iep_init") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/7b1c7c36-363a-4085-b26c-4f210bee1df6@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-4-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icss_iep.c | 121 +++++++++++------------ 1 file changed, 58 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index b4a34c57b7b4..2a1c43316f46 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -412,6 +412,22 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, int ret; u64 cmp; + if (!on) { + /* Disable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), 0); + + /* clear CMP regs */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); + + /* Disable sync */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); + + return 0; + } + /* Calculate width of the signal for PPS/PEROUT handling */ ts.tv_sec = req->on.sec; ts.tv_nsec = req->on.nsec; @@ -430,64 +446,39 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, if (ret) return ret; - if (on) { - /* Configure CMP */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); - /* Configure SYNC, based on req on width */ - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - } + /* Configure CMP */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); + /* Configure SYNC, based on req on width */ + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } else { - if (on) { - u64 start_ns; - - iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + - req->period.nsec; - start_ns = ((u64)req->period.sec * NSEC_PER_SEC) - + req->period.nsec; - icss_iep_update_to_next_boundary(iep, start_ns); - - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - /* Enable Sync in single shot mode */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, - IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear CMP regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - - /* Disable sync */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); - } + u64 start_ns; + + iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + start_ns = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + icss_iep_update_to_next_boundary(iep, start_ns); + + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + /* Enable Sync in single shot mode */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, + IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } return 0; @@ -498,11 +489,21 @@ static int icss_iep_perout_enable(struct icss_iep *iep, { int ret = 0; + if (!on) + goto disable; + /* Reject requests with unsupported flags */ if (req->flags & ~(PTP_PEROUT_DUTY_CYCLE | PTP_PEROUT_PHASE)) return -EOPNOTSUPP; + /* Set default "on" time (1ms) for the signal if not passed by the app */ + if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { + req->on.sec = 0; + req->on.nsec = NSEC_PER_MSEC; + } + +disable: mutex_lock(&iep->ptp_clk_mutex); if (iep->pps_enabled) { @@ -513,12 +514,6 @@ static int icss_iep_perout_enable(struct icss_iep *iep, if (iep->perout_enabled == !!on) goto exit; - /* Set default "on" time (1ms) for the signal if not passed by the app */ - if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { - req->on.sec = 0; - req->on.nsec = NSEC_PER_MSEC; - } - ret = icss_iep_perout_enable_hw(iep, req, on); if (!ret) iep->perout_enabled = !!on; -- 2.51.0 From 6bc2b6c6f16d8e60de518d26da1bc6bc436cf71d Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:50:46 +0100 Subject: [PATCH 13/16] net: ethernet: mtk_eth_soc: reapply mdc divider on reset In the current method, the MDC divider was reset to the default setting of 2.5MHz after the NETSYS SER. Therefore, we need to reapply the MDC divider configuration function in mtk_hw_init() after reset. Fixes: c0a440031d431 ("net: ethernet: mtk_eth_soc: set MDIO bus clock frequency") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/8ab7381447e6cdcb317d5b5a6ddd90a1734efcb0.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 39 +++++++++++++-------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 43197b28b3e7..1a235283b0e9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -871,9 +871,25 @@ static const struct phylink_mac_ops mtk_phylink_ops = { .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; +static void mtk_mdio_config(struct mtk_eth *eth) +{ + u32 val; + + /* Configure MDC Divider */ + val = FIELD_PREP(PPSC_MDC_CFG, eth->mdc_divider); + + /* Configure MDC Turbo Mode */ + if (mtk_is_netsys_v3_or_greater(eth)) + mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); + else + val |= PPSC_MDC_TURBO; + + mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); +} + static int mtk_mdio_init(struct mtk_eth *eth) { - unsigned int max_clk = 2500000, divider; + unsigned int max_clk = 2500000; struct device_node *mii_np; int ret; u32 val; @@ -908,20 +924,9 @@ static int mtk_mdio_init(struct mtk_eth *eth) } max_clk = val; } - divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); - - /* Configure MDC Turbo Mode */ - if (mtk_is_netsys_v3_or_greater(eth)) - mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); - - /* Configure MDC Divider */ - val = FIELD_PREP(PPSC_MDC_CFG, divider); - if (!mtk_is_netsys_v3_or_greater(eth)) - val |= PPSC_MDC_TURBO; - mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); - - dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / divider); - + eth->mdc_divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); + mtk_mdio_config(eth); + dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / eth->mdc_divider); ret = of_mdiobus_register(eth->mii_bus, mii_np); err_put_node: @@ -3974,6 +3979,10 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) else mtk_hw_reset(eth); + /* No MT7628/88 support yet */ + if (reset && !MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + mtk_mdio_config(eth); + if (mtk_is_netsys_v3_or_greater(eth)) { /* Set FE to PDMAv2 if necessary */ val = mtk_r32(eth, MTK_FE_GLO_MISC); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 90a377ab4359..39709649ea8d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1271,6 +1271,7 @@ struct mtk_eth { struct clk *clks[MTK_CLK_MAX]; struct mii_bus *mii_bus; + unsigned int mdc_divider; struct work_struct pending_work; unsigned long state; -- 2.51.0 From 6b02eb372c6776c9abb8bc81cf63f96039c24664 Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:07 +0100 Subject: [PATCH 14/16] net: ethernet: mtk_eth_soc: correct the max weight of the queue limit for 100Mbps Without this patch, the maximum weight of the queue limit will be incorrect when linked at 100Mbps due to an apparent typo. Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/74111ba0bdb13743313999ed467ce564e8189006.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1a235283b0e9..5a3cfb8908a1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -734,7 +734,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: @@ -757,7 +757,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: -- 2.51.0 From 1b66124135f5f8640bd540fadda4b20cdd23114b Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:25 +0100 Subject: [PATCH 15/16] net: ethernet: mtk_eth_soc: revise QDMA packet scheduler settings The QDMA packet scheduler suffers from a performance issue. Fix this by picking up changes from MediaTek's SDK which change to use Token Bucket instead of Leaky Bucket and fix the SPEED_1000 configuration. Fixes: 160d3a9b1929 ("net: ethernet: mtk_eth_soc: introduce MTK_NETSYS_V2 support") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/18040f60f9e2f5855036b75b28c4332a2d2ebdd8.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a3cfb8908a1..bdb98c9d8b1c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -762,8 +762,8 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, break; case SPEED_1000: val |= MTK_QTX_SCH_MAX_RATE_EN | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 6) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); break; default: @@ -3320,7 +3320,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (mtk_is_netsys_v2_or_greater(eth)) val |= MTK_MUTLI_CNT | MTK_RESV_BUF | MTK_WCOMP_EN | MTK_DMAD_WR_WDONE | - MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN; + MTK_CHK_DDONE_EN; else val |= MTK_RX_BT_32DWORDS; mtk_w32(eth, val, reg_map->qdma.glo_cfg); -- 2.51.0 From ad748a73675cce9f95ece7b11ec233c95ff56c31 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 21 Mar 2025 16:42:56 +0300 Subject: [PATCH 16/16] wifi: rt2x00: remove weird self-assignment in rt2800_loft_search() Remove weird self-assignment in 'rt2800_loft_search()' assuming that it was just a typo. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Antipov Acked-by: Stanislaw Gruszka Link: https://patch.msgid.link/20250321134256.821596-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index e5f553a1ea24..b7ea606bda08 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -9393,7 +9393,7 @@ static void rt2800_loft_search(struct rt2x00_dev *rt2x00dev, u8 ch_idx, p0, p1, pf, idx0, idx1, ibit); if (bidx != 5 && pf <= p0 && pf < p1) { - idxf[iorq] = idxf[iorq]; + /* no need to adjust idxf[] */; } else if (p0 < p1) { pf = p0; idxf[iorq] = idx0 & 0x3F; -- 2.51.0