From 5a1ccffd30a08f5a2428cd5fbb3ab03e8eb6c66d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Feb 2025 16:24:58 +0900 Subject: [PATCH 01/16] ip: fib_rules: Fetch net from fib_rule in fib[46]_rule_configure(). The following patch will not set skb->sk from VRF path. Let's fetch net from fib_rule->fr_net instead of sock_net(skb->sk) in fib[46]_rule_configure(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250207072502.87775-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_rules.c | 4 ++-- net/ipv6/fib6_rules.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9517b8667e00..041c46787d94 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -245,9 +245,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + struct fib4_rule *rule4 = (struct fib4_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct fib4_rule *rule4 = (struct fib4_rule *) rule; if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { NL_SET_ERR_MSG(extack, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 67d39114d9a6..40af8fd6efa7 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -399,9 +399,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct nlattr **tb, struct netlink_ext_ack *extack) { + struct fib6_rule *rule6 = (struct fib6_rule *)rule; + struct net *net = rule->fr_net; int err = -EINVAL; - struct net *net = sock_net(skb->sk); - struct fib6_rule *rule6 = (struct fib6_rule *) rule; if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, -- 2.51.0 From a0596c2c63fc9d9ad16d701044293e11d8f97953 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Feb 2025 16:24:59 +0900 Subject: [PATCH 02/16] net: fib_rules: Factorise fib_newrule() and fib_delrule(). fib_nl_newrule() / fib_nl_delrule() is the doit() handler for RTM_NEWRULE / RTM_DELRULE but also called from vrf_newlink(). Currently, we hold RTNL on both paths but will not on the former. Also, we set dev_net(dev)->rtnl to skb->sk in vrf_fib_rule() because fib_nl_newrule() / fib_nl_delrule() fetch net as sock_net(skb->sk). Let's Factorise the two functions and pass net and rtnl_held flag. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250207072502.87775-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 6 ++---- include/net/fib_rules.h | 8 ++++---- net/core/fib_rules.c | 36 +++++++++++++++++++++++------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ca81b212a246..5f21ce1013c4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1537,14 +1537,12 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) nlmsg_end(skb, nlh); - /* fib_nl_{new,del}rule handling looks for net from skb->sk */ - skb->sk = dev_net(dev)->rtnl; if (add_it) { - err = fib_nl_newrule(skb, nlh, NULL); + err = fib_newrule(dev_net(dev), skb, nlh, NULL, true); if (err == -EEXIST) err = 0; } else { - err = fib_nl_delrule(skb, nlh, NULL); + err = fib_delrule(dev_net(dev), skb, nlh, NULL, true); if (err == -ENOENT) err = 0; } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 04383d90a1e3..710caacad9da 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -178,10 +178,10 @@ int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(const struct net *net, int family); -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack); +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 694a8c2884a8..d68332d9cac6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -783,15 +783,14 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, }; -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) { - struct net *net = sock_net(skb->sk); + struct fib_rule *rule = NULL, *r, *last = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); + int err = -EINVAL, unresolved = 0; struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *last = NULL; struct nlattr *tb[FRA_MAX + 1]; - int err = -EINVAL, unresolved = 0; bool user_priority = false; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { @@ -893,18 +892,23 @@ errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_newrule); +EXPORT_SYMBOL_GPL(fib_newrule); -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) { - struct net *net = sock_net(skb->sk); + return fib_newrule(sock_net(skb->sk), skb, nlh, extack, true); +} + +int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack, bool rtnl_held) +{ + struct fib_rule *rule = NULL, *nlrule = NULL; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; - struct fib_rule *rule = NULL, *r, *nlrule = NULL; struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; bool user_priority = false; + int err = -EINVAL; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) { NL_SET_ERR_MSG(extack, "Invalid msg length"); @@ -969,7 +973,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, * current if it is goto rule, have actually been added. */ if (ops->nr_goto_rules > 0) { - struct fib_rule *n; + struct fib_rule *n, *r; n = list_next_entry(rule, list); if (&n->list == &ops->rules_list || n->pref != rule->pref) @@ -998,7 +1002,13 @@ errout: rules_ops_put(ops); return err; } -EXPORT_SYMBOL_GPL(fib_nl_delrule); +EXPORT_SYMBOL_GPL(fib_delrule); + +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + return fib_delrule(sock_net(skb->sk), skb, nlh, extack, true); +} static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) -- 2.51.0 From 98d3a6f681caad8e89e365be00a11d55176bc328 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Feb 2025 16:25:00 +0900 Subject: [PATCH 03/16] net: fib_rules: Convert RTM_NEWRULE to per-netns RTNL. fib_nl_newrule() is the doit() handler for RTM_NEWRULE but also called from vrf_newlink(). In the latter case, RTNL is already held and the 4th arg is true. Let's hold per-netns RTNL in fib_newrule() if rtnl_held is false. Note that we call fib_rule_get() before releasing per-netns RTNL to call notify_rule_change() without RTNL and prevent freeing the new rule. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250207072502.87775-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d68332d9cac6..d0995ea9d852 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -816,6 +816,9 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + err = fib_nl2rule_rtnl(rule, ops, tb, extack); if (err) goto errout_free; @@ -881,12 +884,20 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + fib_rule_get(rule); + + if (!rtnl_held) + rtnl_net_unlock(net); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); + fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); return 0; errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(rule); errout: rules_ops_put(ops); @@ -897,7 +908,7 @@ EXPORT_SYMBOL_GPL(fib_newrule); static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - return fib_newrule(sock_net(skb->sk), skb, nlh, extack, true); + return fib_newrule(sock_net(skb->sk), skb, nlh, extack, false); } int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1320,7 +1331,8 @@ static struct pernet_operations fib_rules_net_ops = { }; static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { - {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule}, + {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, .flags = RTNL_FLAG_DUMP_UNLOCKED}, -- 2.51.0 From 1cf770da01120c0a84d53c4dddc57a62d1ca4f96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Feb 2025 16:25:01 +0900 Subject: [PATCH 04/16] net: fib_rules: Add error_free label in fib_delrule(). We will hold RTNL just before calling fib_nl2rule_rtnl() in fib_delrule() and release it before kfree(nlrule). Let's add a new rule to make the following change cleaner. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250207072502.87775-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d0995ea9d852..10a7336b8d44 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -946,23 +946,23 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, err = fib_nl2rule_rtnl(nlrule, ops, tb, extack); if (err) - goto errout; + goto errout_free; rule = rule_find(ops, frh, tb, nlrule, user_priority); if (!rule) { err = -ENOENT; - goto errout; + goto errout_free; } if (rule->flags & FIB_RULE_PERMANENT) { err = -EPERM; - goto errout; + goto errout_free; } if (ops->delete) { err = ops->delete(rule); if (err) - goto errout; + goto errout_free; } if (rule->tun_id) @@ -1008,8 +1008,9 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, kfree(nlrule); return 0; -errout: +errout_free: kfree(nlrule); +errout: rules_ops_put(ops); return err; } -- 2.51.0 From 88b9cfca8d7735b0e8c809241365577f07c52cb2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Feb 2025 16:25:02 +0900 Subject: [PATCH 05/16] net: fib_rules: Convert RTM_DELRULE to per-netns RTNL. fib_nl_delrule() is the doit() handler for RTM_DELRULE but also called from vrf_newlink() in case something fails in vrf_add_fib_rules(). In the latter case, RTNL is already held and the 4th arg is true. Let's hold per-netns RTNL in fib_delrule() if rtnl_held is false. Now we can place ASSERT_RTNL_NET() in call_fib_rule_notifiers(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://patch.msgid.link/20250207072502.87775-9-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/core/fib_rules.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 10a7336b8d44..5045affeac78 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -371,7 +371,8 @@ static int call_fib_rule_notifiers(struct net *net, .rule = rule, }; - ASSERT_RTNL(); + ASSERT_RTNL_NET(net); + /* Paired with READ_ONCE() in fib_rules_seq() */ WRITE_ONCE(ops->fib_rules_seq, ops->fib_rules_seq + 1); return call_fib_notifiers(net, event_type, &info.info); @@ -944,6 +945,9 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, if (err) goto errout; + if (!rtnl_held) + rtnl_net_lock(net); + err = fib_nl2rule_rtnl(nlrule, ops, tb, extack); if (err) goto errout_free; @@ -998,10 +1002,12 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, } } - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, - NULL); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).portid); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, NULL); + + if (!rtnl_held) + rtnl_net_unlock(net); + + notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); flush_route_cache(ops); rules_ops_put(ops); @@ -1009,6 +1015,8 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, return 0; errout_free: + if (!rtnl_held) + rtnl_net_unlock(net); kfree(nlrule); errout: rules_ops_put(ops); @@ -1019,7 +1027,7 @@ EXPORT_SYMBOL_GPL(fib_delrule); static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - return fib_delrule(sock_net(skb->sk), skb, nlh, extack, true); + return fib_delrule(sock_net(skb->sk), skb, nlh, extack, false); } static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, @@ -1334,7 +1342,8 @@ static struct pernet_operations fib_rules_net_ops = { static const struct rtnl_msg_handler fib_rules_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWRULE, .doit = fib_nl_newrule, .flags = RTNL_FLAG_DOIT_PERNET}, - {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule}, + {.msgtype = RTM_DELRULE, .doit = fib_nl_delrule, + .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_GETRULE, .dumpit = fib_nl_dumprule, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; -- 2.51.0 From a980da54b6a457e8d9ceb02d7c2ba6a47fc3d502 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Feb 2025 10:31:19 -0800 Subject: [PATCH 06/16] selftests: drv-net: remove an unnecessary libmnl include ncdevmem doesn't need libmnl, remove the unnecessary include. Since YNL doesn't depend on libmnl either, any more, it's actually possible to build selftests without having libmnl installed. Reviewed-by: Mina Almasry Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250207183119.1721424-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 19a6969643f4..2bf14ac2b8c6 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include -- 2.51.0 From 29604bc2aaed5a8f5264018b29d73cb4a7a34960 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Feb 2025 10:41:39 -0800 Subject: [PATCH 07/16] selftests: drv-net: factor out a DrvEnv base class We have separate Env classes for local tests and tests with a remote endpoint. Make it easier to share the code by creating a base class. Make env loading a method of this class. Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250207184140.1730466-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/py/env.py | 63 ++++++++++--------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 987e452d3a45..2f17880e411d 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -10,38 +10,46 @@ from lib.py import NetNS, NetdevSimDev from .remote import Remote -def _load_env_file(src_path): - env = os.environ.copy() - - src_dir = Path(src_path).parent.resolve() - if not (src_dir / "net.config").exists(): +class NetDrvEnvBase: + """ + Base class for a NIC / host envirnoments + """ + def __init__(self, src_path): + self.src_path = src_path + self.env = self._load_env_file() + + def _load_env_file(self): + env = os.environ.copy() + + src_dir = Path(self.src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return ksft_setup(env) + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] return ksft_setup(env) - with open((src_dir / "net.config").as_posix(), 'r') as fp: - for line in fp.readlines(): - full_file = line - # Strip comments - pos = line.find("#") - if pos >= 0: - line = line[:pos] - line = line.strip() - if not line: - continue - pair = line.split('=', maxsplit=1) - if len(pair) != 2: - raise Exception("Can't parse configuration line:", full_file) - env[pair[0]] = pair[1] - return ksft_setup(env) - - -class NetDrvEnv: + +class NetDrvEnv(NetDrvEnvBase): """ Class for a single NIC / host env, with no remote end """ def __init__(self, src_path, **kwargs): - self._ns = None + super().__init__(src_path) - self.env = _load_env_file(src_path) + self._ns = None if 'NETIF' in self.env: self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] @@ -68,7 +76,7 @@ class NetDrvEnv: self._ns = None -class NetDrvEpEnv: +class NetDrvEpEnv(NetDrvEnvBase): """ Class for an environment with a local device and "remote endpoint" which can be used to send traffic in. @@ -82,8 +90,7 @@ class NetDrvEpEnv: nsim_v6_pfx = "2001:db8::" def __init__(self, src_path, nsim_test=None): - - self.env = _load_env_file(src_path) + super().__init__(src_path) self._stats_settle_time = None -- 2.51.0 From 3337064f4204572a97f701b936436336e37b99d9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 7 Feb 2025 10:41:40 -0800 Subject: [PATCH 08/16] selftests: drv-net: add helper for path resolution Refering to C binaries from Python code is going to be a common need. Add a helper to convert from path in relation to the test. Meaning, if the test is in the same directory as the binary, the call would be simply: cfg.rpath("binary"). The helper name "rpath" is not great. I can't think of a better name that would be accurate yet concise. Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250207184140.1730466-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/csum.py | 2 +- tools/testing/selftests/drivers/net/lib/py/env.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cb40497faee4..cd477f3440ca 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -100,7 +100,7 @@ def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: check_nic_features(cfg) - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_local = cfg.rpath("../../../net/lib/csum") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) cases = [] diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 2f17880e411d..886b4904613c 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -18,6 +18,18 @@ class NetDrvEnvBase: self.src_path = src_path self.env = self._load_env_file() + def rpath(self, path): + """ + Get an absolute path to a file based on a path relative to the directory + containing the test which constructed env. + + For example, if the test.py is in the same directory as + a binary (built from helper.c), the test can use env.rpath("helper") + to get the absolute path to the binary + """ + src_dir = Path(self.src_path).parent.resolve() + return (src_dir / path).as_posix() + def _load_env_file(self): env = os.environ.copy() -- 2.51.0 From 907dd32b4a8aa7fedc2ef1867a6c3a1033738bcb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 7 Feb 2025 19:00:44 +0100 Subject: [PATCH 09/16] mlxsw: Enable Tx checksum offload The device is able to checksum plain TCP / UDP packets over IPv4 / IPv6 when the 'ipcs' bit in the send descriptor is set. Advertise support for the 'NETIF_F_IP{,6}_CSUM' features in net devices registered by the driver and VLAN uppers and set the 'ipcs' bit when the stack requests Tx checksum offload. Note that the device also calculates the IPv4 checksum, but it first zeroes the current checksum so there should not be any difference compared to the checksum calculated by the kernel. On SN5600 (Spectrum-4) there is about 10% improvement in Tx packet rate with 1400 byte packets when using pktgen. Tested on Spectrum-{1,2,3,4} with all the combinations of IPv4 / IPv6, TCP / UDP, with and without VLAN. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://patch.msgid.link/8dc86c95474ce10572a0fa83b8adb0259558e982.1738950446.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5b44c931b660..058dcabfaa2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -2214,6 +2214,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL); + /* Everything is set up, ring producer doorbell to get HW going */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 6bed495dcf0f..7fa94e5828de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -90,6 +90,11 @@ MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); */ MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); +/* pci_wqe_ipcs + * Calculate IPv4 and TCP / UDP checksums. + */ +MLXSW_ITEM32(pci, wqe, ipcs, 0x00, 14, 1); + /* pci_wqe_byte_count * Size of i-th scatter/gather entry, 0 if entry is unused. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d714311fd884..1f8362788c75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1574,8 +1574,10 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_TC; - dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; dev->lltx = true; dev->netns_local = true; -- 2.51.0 From 67800d296191d0a9bde0a7776f99ca1ddfa0fc26 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Cs=C3=B3k=C3=A1s=2C=20Bence?= Date: Fri, 7 Feb 2025 13:12:55 +0100 Subject: [PATCH 10/16] net: fec: Refactor MAC reset to function MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The core is reset both in `fec_restart()` (called on link-up) and `fec_stop()` (going to sleep, driver remove etc.). These two functions had their separate implementations, which was at first only a register write and a `udelay()` (and the accompanying block comment). However, since then we got soft-reset (MAC disable) and Wake-on-LAN support, which meant that these implementations diverged, often causing bugs. For instance, as of now, `fec_stop()` does not check for `FEC_QUIRK_NO_HARD_RESET`, meaning the MII/RMII mode is cleared on eg. a PM power-down event; and `fec_restart()` missed the refactor renaming the "magic" constant `1` to `FEC_ECR_RESET`. To harmonize current implementations, and eliminate this source of potential future bugs, refactor implementation to a common function. Reviewed-by: Michal Swiatkowski Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Signed-off-by: Csókás, Bence Link: https://patch.msgid.link/20250207121255.161146-2-csokas.bence@prolan.hu Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 52 +++++++++++------------ 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f7c4ce8e9a26..a86cfebedaa8 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1093,6 +1093,29 @@ static void fec_enet_enable_ring(struct net_device *ndev) } } +/* Whack a reset. We should wait for this. + * For i.MX6SX SOC, enet use AXI bus, we use disable MAC + * instead of reset MAC itself. + */ +static void fec_ctrl_reset(struct fec_enet_private *fep, bool allow_wol) +{ + u32 val; + + if (!allow_wol || !(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { + if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || + ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { + writel(0, fep->hwp + FEC_ECNTRL); + } else { + writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); + udelay(10); + } + } else { + val = readl(fep->hwp + FEC_ECNTRL); + val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); + writel(val, fep->hwp + FEC_ECNTRL); + } +} + /* * This function is called to start or restart the FEC during a link * change, transmit timeout, or to reconfigure the FEC. The network @@ -1109,17 +1132,7 @@ fec_restart(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES || - ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(1, fep->hwp + FEC_ECNTRL); - udelay(10); - } + fec_ctrl_reset(fep, false); /* * enet-mac reset will reset mac address registers too, @@ -1373,22 +1386,7 @@ fec_stop(struct net_device *ndev) if (fep->bufdesc_ex) fec_ptp_save_state(fep); - /* Whack a reset. We should wait for this. - * For i.MX6SX SOC, enet use AXI bus, we use disable MAC - * instead of reset MAC itself. - */ - if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) { - if (fep->quirks & FEC_QUIRK_HAS_MULTI_QUEUES) { - writel(0, fep->hwp + FEC_ECNTRL); - } else { - writel(FEC_ECR_RESET, fep->hwp + FEC_ECNTRL); - udelay(10); - } - } else { - val = readl(fep->hwp + FEC_ECNTRL); - val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP); - writel(val, fep->hwp + FEC_ECNTRL); - } + fec_ctrl_reset(fep, true); writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); -- 2.51.0 From eb4e17a1d915d3c550e5a58c4bf370659dbe0dc8 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Fri, 7 Feb 2025 20:08:35 +0900 Subject: [PATCH 11/16] netlink: support dumping IPv4 multicast addresses MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Extended RTM_GETMULTICAST to support dumping joined IPv4 multicast addresses, in addition to the existing IPv6 functionality. This allows userspace applications to retrieve both IPv4 and IPv6 multicast addresses through similar netlink command and then monitor future changes by registering to RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Reviewed-by: Eric Dumazet Signed-off-by: Yuyang Huang Link: https://patch.msgid.link/20250207110836.2407224-1-yuyanghuang@google.com Signed-off-by: Paolo Abeni --- net/ipv4/devinet.c | 77 ++++++++++++++++++++++++++++++++-------- net/ipv4/igmp.c | 14 +++++--- net/ipv4/igmp_internal.h | 17 +++++++++ 3 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 net/ipv4/igmp_internal.h diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c8b3cf5fba4c..f51e026d7a84 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -46,6 +46,7 @@ #include #include #include +#include "igmp_internal.h" #include #include #ifdef CONFIG_SYSCTL @@ -107,15 +108,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_PROTO] = { .type = NLA_U8 }, }; -struct inet_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; -}; - #define IN4_ADDR_HSIZE_SHIFT 8 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT) @@ -1846,9 +1838,38 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, return 0; } -static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, - struct netlink_callback *cb, int *s_ip_idx, - struct inet_fill_args *fillargs) +static int in_dev_dump_ifmcaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + struct ip_mc_list *im; + int ip_idx = 0; + int err; + + for (im = rcu_dereference(in_dev->mc_list); + im; + im = rcu_dereference(im->next_rcu)) { + if (ip_idx < *s_ip_idx) { + ip_idx++; + continue; + } + err = inet_fill_ifmcaddr(skb, in_dev->dev, im, fillargs); + if (err < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + ip_idx++; + } + err = 0; + ip_idx = 0; +done: + *s_ip_idx = ip_idx; + return err; +} + +static int in_dev_dump_ifaddr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) { struct in_ifaddr *ifa; int ip_idx = 0; @@ -1874,6 +1895,21 @@ done: return err; } +static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, + struct netlink_callback *cb, int *s_ip_idx, + struct inet_fill_args *fillargs) +{ + switch (fillargs->event) { + case RTM_NEWADDR: + return in_dev_dump_ifaddr(in_dev, skb, cb, s_ip_idx, fillargs); + case RTM_GETMULTICAST: + return in_dev_dump_ifmcaddr(in_dev, skb, cb, s_ip_idx, + fillargs); + default: + return -EINVAL; + } +} + /* Combine dev_addr_genid and dev_base_seq to detect changes. */ static u32 inet_base_seq(const struct net *net) @@ -1889,13 +1925,14 @@ static u32 inet_base_seq(const struct net *net) return res; } -static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, + int event) { const struct nlmsghdr *nlh = cb->nlh; struct inet_fill_args fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = nlh->nlmsg_seq, - .event = RTM_NEWADDR, + .event = event, .flags = NLM_F_MULTI, .netnsid = -1, }; @@ -1949,6 +1986,16 @@ done: return err; } +static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_NEWADDR); +} + +static int inet_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + return inet_dump_addr(skb, cb, RTM_GETMULTICAST); +} + static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, u32 portid) { @@ -2845,6 +2892,8 @@ static const struct rtnl_msg_handler devinet_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_GETNETCONF, .doit = inet_netconf_get_devconf, .dumpit = inet_netconf_dump_devconf, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, + {.owner = THIS_MODULE, .protocol = PF_INET, .msgtype = RTM_GETMULTICAST, + .dumpit = inet_dump_ifmcaddr, .flags = RTNL_FLAG_DUMP_UNLOCKED}, }; void __init devinet_init(void) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 3da126cea884..2c394c364cb9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -81,6 +81,7 @@ #include #include #include +#include "igmp_internal.h" #include #include #include @@ -1432,14 +1433,16 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } -static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, - const struct ip_mc_list *im, int event) +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args) { struct ifa_cacheinfo ci; struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + nlh = nlmsg_put(skb, args->portid, args->seq, args->event, + sizeof(struct ifaddrmsg), args->flags); if (!nlh) return -EMSGSIZE; @@ -1468,6 +1471,9 @@ static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, static void inet_ifmcaddr_notify(struct net_device *dev, const struct ip_mc_list *im, int event) { + struct inet_fill_args fillargs = { + .event = event, + }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; @@ -1479,7 +1485,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev, if (!skb) goto error; - err = inet_fill_ifmcaddr(skb, dev, im, event); + err = inet_fill_ifmcaddr(skb, dev, im, &fillargs); if (err < 0) { WARN_ON_ONCE(err == -EMSGSIZE); nlmsg_free(skb); diff --git a/net/ipv4/igmp_internal.h b/net/ipv4/igmp_internal.h new file mode 100644 index 000000000000..0a1bcc8ec8e1 --- /dev/null +++ b/net/ipv4/igmp_internal.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IGMP_INTERNAL_H +#define _LINUX_IGMP_INTERNAL_H + +struct inet_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; +}; + +int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, + struct inet_fill_args *args); +#endif -- 2.51.0 From 4f280376e5318f17f7388999f9a0098ccaae931d Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Fri, 7 Feb 2025 20:08:36 +0900 Subject: [PATCH 12/16] selftests/net: Add selftest for IPv4 RTM_GETMULTICAST support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This change introduces a new selftest case to verify the functionality of dumping IPv4 multicast addresses using the RTM_GETMULTICAST netlink message. The test utilizes the ynl library to interact with the netlink interface and validate that the kernel correctly reports the joined IPv4 multicast addresses. To run the test, execute the following command: $ vng -v --user root --cpus 16 -- \ make -C tools/testing/selftests TARGETS=net \ TEST_PROGS=rtnetlink.py TEST_GEN_PROGS="" run_tests Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Link: https://patch.msgid.link/20250207110836.2407224-2-yuyanghuang@google.com Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/rt_addr.yaml | 23 ++++++++++++++ tools/testing/selftests/net/Makefile | 1 + .../testing/selftests/net/lib/py/__init__.py | 2 +- tools/testing/selftests/net/lib/py/ynl.py | 4 +++ tools/testing/selftests/net/rtnetlink.py | 30 +++++++++++++++++++ 5 files changed, 59 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/rtnetlink.py diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt_addr.yaml index cbee1cedb177..5dd5469044c7 100644 --- a/Documentation/netlink/specs/rt_addr.yaml +++ b/Documentation/netlink/specs/rt_addr.yaml @@ -168,6 +168,29 @@ operations: reply: value: 20 attributes: *ifaddr-all + - + name: getmaddrs + doc: Get / dump IPv4/IPv6 multicast addresses. + attribute-set: addr-attrs + fixed-header: ifaddrmsg + do: + request: + value: 58 + attributes: + - ifa-family + - ifa-index + reply: + value: 58 + attributes: &mcaddr-attrs + - ifa-multicast + - ifa-cacheinfo + dump: + request: + value: 58 + - ifa-family + reply: + value: 58 + attributes: *mcaddr-attrs mcast-groups: list: diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..e2f03211f9b3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -36,6 +36,7 @@ TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py +TEST_PROGS += rtnetlink.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 54d8f5eba810..729457859316 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -5,5 +5,5 @@ from .ksft import * from .netns import NetNS from .nsim import * from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index ad1e36baee2a..8986c584cb37 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -42,6 +42,10 @@ class RtnlFamily(YnlFamily): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): def __init__(self, recv_size=0): diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..80950888800b --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() -- 2.51.0 From a9d71b5de76ccc50318d13b828f1a753d8e6a63f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Feb 2025 14:59:19 +0100 Subject: [PATCH 13/16] mptcp: pm: drop info of userspace_pm_remove_id_zero_address The only use of 'info' parameter of userspace_pm_remove_id_zero_address() is to set an error message into it. Plus, this helper will only fail when it cannot find any subflows with a local address ID 0. This patch drops this parameter and sets the error message where this function is called in mptcp_pm_nl_remove_doit(). Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/mptcp/pm_userspace.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index a3d477059b11..4de38bc03ab8 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -253,8 +253,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) return err; } -static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, - struct genl_info *info) +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk) { struct mptcp_rm_list list = { .nr = 0 }; struct mptcp_subflow_context *subflow; @@ -269,10 +268,8 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, break; } } - if (!has_id_0) { - GENL_SET_ERR_MSG(info, "address with id 0 not found"); + if (!has_id_0) goto remove_err; - } list.ids[list.nr++] = 0; @@ -330,7 +327,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) sk = (struct sock *)msk; if (id_val == 0) { - err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + err = mptcp_userspace_pm_remove_id_zero_address(msk); goto out; } @@ -339,7 +336,6 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val); if (!match) { - GENL_SET_ERR_MSG(info, "address with specified id not found"); spin_unlock_bh(&msk->pm.lock); release_sock(sk); goto out; @@ -356,6 +352,11 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) err = 0; out: + if (err) + GENL_SET_ERR_MSG_FMT(info, + "address with id %u not found", + id_val); + sock_put(sk); return err; } -- 2.51.0 From 58b21309f97b08b6b9814d1ee1419249eba9ef08 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Feb 2025 14:59:20 +0100 Subject: [PATCH 14/16] mptcp: pm: userspace: flags: clearer msg if no remote addr Since its introduction in commit 892f396c8e68 ("mptcp: netlink: issue MP_PRIO signals from userspace PMs"), it was mandatory to specify the remote address, because of the 'if (rem->addr.family == AF_UNSPEC)' check done later one. In theory, this attribute can be optional, but it sounds better to be precise to avoid sending the MP_PRIO on the wrong subflow, e.g. if there are multiple subflows attached to the same local ID. This can be relaxed later on if there is a need to act on multiple subflows with one command. For the moment, the check to see if attr_rem is NULL can be removed, because mptcp_pm_parse_entry() will do this check as well, no need to do that differently here. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/mptcp/pm_userspace.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 4de38bc03ab8..b6cf8ea1161d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -580,11 +580,9 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto set_flags_err; - if (attr_rem) { - ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); - if (ret < 0) - goto set_flags_err; - } + ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); + if (ret < 0) + goto set_flags_err; if (loc.addr.family == AF_UNSPEC || rem.addr.family == AF_UNSPEC) { -- 2.51.0 From 891a87f7a76c77f8afde876e08b55a2af9819709 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Feb 2025 14:59:21 +0100 Subject: [PATCH 15/16] mptcp: pm: more precise error messages Some errors reported by the userspace PM were vague: "this or that is invalid". It is easier for the userspace to know which part is wrong, instead of having to guess that. While at it, in mptcp_userspace_pm_set_flags() move the parsing after the check linked to the local attribute. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/mptcp/pm_userspace.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b6cf8ea1161d..cdc83fabb7c2 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -223,8 +223,14 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "invalid addr id or flags"); + if (addr_val.addr.id == 0) { + GENL_SET_ERR_MSG(info, "invalid addr id"); + err = -EINVAL; + goto announce_err; + } + + if (!(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { + GENL_SET_ERR_MSG(info, "invalid addr flags"); err = -EINVAL; goto announce_err; } @@ -531,8 +537,14 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info goto destroy_err; } - if (!addr_l.addr.port || !addr_r.port) { - GENL_SET_ERR_MSG(info, "missing local or remote port"); + if (!addr_l.addr.port) { + GENL_SET_ERR_MSG(info, "missing local port"); + err = -EINVAL; + goto destroy_err; + } + + if (!addr_r.port) { + GENL_SET_ERR_MSG(info, "missing remote port"); err = -EINVAL; goto destroy_err; } @@ -580,13 +592,18 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto set_flags_err; + if (loc.addr.family == AF_UNSPEC) { + GENL_SET_ERR_MSG(info, "invalid local address family"); + ret = -EINVAL; + goto set_flags_err; + } + ret = mptcp_pm_parse_entry(attr_rem, info, false, &rem); if (ret < 0) goto set_flags_err; - if (loc.addr.family == AF_UNSPEC || - rem.addr.family == AF_UNSPEC) { - GENL_SET_ERR_MSG(info, "invalid address families"); + if (rem.addr.family == AF_UNSPEC) { + GENL_SET_ERR_MSG(info, "invalid remote address family"); ret = -EINVAL; goto set_flags_err; } -- 2.51.0 From b2bdec19beecf9351ff9f7ca3131f5f61aacfc9d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Feb 2025 14:59:22 +0100 Subject: [PATCH 16/16] mptcp: pm: improve error messages Some error messages were: - too generic: "missing input", "invalid request" - not precise enough: "limit greater than maximum" but what's the max? - missing: subflow not found, or connect error. This can be easily improved by being more precise, or adding new error messages. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- net/mptcp/pm_netlink.c | 6 ++++-- net/mptcp/pm_userspace.c | 10 +++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca3..1afa2bd89862 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1875,7 +1875,9 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) *limit = nla_get_u32(attr); if (*limit > MPTCP_PM_ADDR_MAX) { - GENL_SET_ERR_MSG(info, "limit greater than maximum"); + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, + "limit greater than maximum (%u)", + MPTCP_PM_ADDR_MAX); return -EINVAL; } return 0; @@ -2003,7 +2005,7 @@ int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info) if (addr.addr.family == AF_UNSPEC) { lookup_by_id = 1; if (!addr.addr.id) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + GENL_SET_ERR_MSG(info, "missing address ID"); return -EOPNOTSUPP; } } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index cdc83fabb7c2..e350d6cc23bf 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -190,7 +190,7 @@ static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *in } if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + GENL_SET_ERR_MSG(info, "userspace PM not selected"); sock_put((struct sock *)msk); return NULL; } @@ -428,6 +428,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) err = __mptcp_subflow_connect(sk, &local, &addr_r); release_sock(sk); + if (err) + GENL_SET_ERR_MSG_FMT(info, "connect error: %d", err); + spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &entry); @@ -552,6 +555,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (!ssk) { + GENL_SET_ERR_MSG(info, "subflow not found"); err = -ESRCH; goto release_sock; } @@ -625,6 +629,10 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup); release_sock(sk); + /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */ + if (ret < 0) + GENL_SET_ERR_MSG(info, "subflow not found"); + set_flags_err: sock_put(sk); return ret; -- 2.51.0