From d5d715207e2911b84b92f10420d4a8d7653aa98d Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Fri, 7 Mar 2025 01:12:12 +0000 Subject: [PATCH 01/16] virtio_net: Use persistent NAPI config Use persistent NAPI config so that NAPI IDs are not renumbered as queue counts change. $ sudo ethtool -l ens4 | tail -5 | egrep -i '(current|combined)' Current hardware settings: Combined: 4 $ ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/netdev.yaml \ --dump queue-get --json='{"ifindex": 2}' [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'}, {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'}, {'id': 0, 'ifindex': 2, 'type': 'tx'}, {'id': 1, 'ifindex': 2, 'type': 'tx'}, {'id': 2, 'ifindex': 2, 'type': 'tx'}, {'id': 3, 'ifindex': 2, 'type': 'tx'}] Now adjust the queue count, note that the NAPI IDs are not renumbered: $ sudo ethtool -L ens4 combined 1 $ ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/netdev.yaml \ --dump queue-get --json='{"ifindex": 2}' [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, {'id': 0, 'ifindex': 2, 'type': 'tx'}] $ sudo ethtool -L ens4 combined 8 $ ./tools/net/ynl/pyynl/cli.py \ --spec Documentation/netlink/specs/netdev.yaml \ --dump queue-get --json='{"ifindex": 2}' [{'id': 0, 'ifindex': 2, 'napi-id': 8193, 'type': 'rx'}, {'id': 1, 'ifindex': 2, 'napi-id': 8194, 'type': 'rx'}, {'id': 2, 'ifindex': 2, 'napi-id': 8195, 'type': 'rx'}, {'id': 3, 'ifindex': 2, 'napi-id': 8196, 'type': 'rx'}, {'id': 4, 'ifindex': 2, 'napi-id': 8197, 'type': 'rx'}, {'id': 5, 'ifindex': 2, 'napi-id': 8198, 'type': 'rx'}, {'id': 6, 'ifindex': 2, 'napi-id': 8199, 'type': 'rx'}, {'id': 7, 'ifindex': 2, 'napi-id': 8200, 'type': 'rx'}, [...] Signed-off-by: Joe Damato Reviewed-by: Gerhard Engleder Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Tested-by: Lei Yang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20250307011215.266806-5-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7bd63a677123..34cec2b11b74 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -6455,8 +6455,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) INIT_DELAYED_WORK(&vi->refill, refill_work); for (i = 0; i < vi->max_queue_pairs; i++) { vi->rq[i].pages = NULL; - netif_napi_add_weight(vi->dev, &vi->rq[i].napi, virtnet_poll, - napi_weight); + netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll, + i); + vi->rq[i].napi.weight = napi_weight; netif_napi_add_tx_weight(vi->dev, &vi->sq[i].napi, virtnet_poll_tx, napi_tx ? napi_weight : 0); -- 2.50.1 From 54580ccdd8a9c6821fd6f72171d435480867e4c3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 6 Mar 2025 22:34:08 -0500 Subject: [PATCH 02/16] ipv6: remove leftover ip6 cookie initializer As of the blamed commit ipc6.dontfrag is always initialized at the start of udpv6_sendmsg, by ipcm6_init_sk, to either 0 or 1. Later checks against -1 are no longer needed and the branches are now dead code. The blamed commit had removed those branches. But I had overlooked this one case. UDP has both a lockless fast path and a slower path for corked requests. This branch remained in the fast path. Fixes: 096208592b09 ("ipv6: replace ipcm6_init calls with ipcm6_init_sk") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250307033620.411611-2-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d577bf2f3053..d91da522c34e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -2054,8 +2054,6 @@ struct sk_buff *ip6_make_skb(struct sock *sk, ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } - if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, -- 2.50.1 From a18dfa9925b9ef6107ea3aa5814ca3c704d34a8a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 6 Mar 2025 22:34:09 -0500 Subject: [PATCH 03/16] ipv6: save dontfrag in cork When spanning datagram construction over multiple send calls using MSG_MORE, per datagram settings are configured on the first send. That is when ip(6)_setup_cork stores these settings for subsequent use in __ip(6)_append_data and others. The only flag that escaped this was dontfrag. As a result, a datagram could be constructed with df=0 on the first sendmsg, but df=1 on a next. Which is what cmsg_ip.sh does in an upcoming MSG_MORE test in the "diff" scenario. Changing datagram conditions in the middle of constructing an skb makes this already complex code path even more convoluted. It is here unintentional. Bring this flag in line with expected sockopt/cmsg behavior. And stop passing ipc6 to __ip6_append_data, to avoid such issues in the future. This is already the case for __ip_append_data. inet6_cork had a 6 byte hole, so the 1B flag has no impact. Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250307033620.411611-3-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ipv6.h | 1 + net/ipv6/ip6_output.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a6e2aadbb91b..5aeeed22f35b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -207,6 +207,7 @@ struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; + u8 dontfrag:1; }; /* struct ipv6_pinfo - ipv6 private area */ diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d91da522c34e..581bc6289081 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1386,6 +1386,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; + v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); @@ -1421,7 +1422,7 @@ static int __ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, - unsigned int flags, struct ipcm6_cookie *ipc6) + unsigned int flags) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; @@ -1475,7 +1476,7 @@ static int __ip6_append_data(struct sock *sk, if (headersize + transhdrlen > mtu) goto emsgsize; - if (cork->length + length > mtu - headersize && ipc6->dontfrag && + if (cork->length + length > mtu - headersize && v6_cork->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { @@ -1855,7 +1856,7 @@ int ip6_append_data(struct sock *sk, return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, - from, length, transhdrlen, flags, ipc6); + from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data); @@ -2058,7 +2059,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, - flags, ipc6); + flags); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); -- 2.50.1 From 0922cb68edfde9e3920bb3aedea203d333af9f10 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 6 Mar 2025 22:34:10 -0500 Subject: [PATCH 04/16] selftests/net: expand cmsg_ip with MSG_MORE UDP send with MSG_MORE takes a slightly different path than the lockless fast path. For completeness, add coverage to this case too. Pass MSG_MORE on the initial sendmsg, then follow up with a zero byte write to unplug the cork. Unrelated: also add two missing endlines in usage(). Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250307033620.411611-4-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/cmsg_ip.sh | 11 +++++++---- tools/testing/selftests/net/cmsg_sender.c | 24 ++++++++++++++++++----- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh index 2a52520aca32..b55680e081ad 100755 --- a/tools/testing/selftests/net/cmsg_ip.sh +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -50,8 +50,9 @@ check_result() { # IPV6_DONTFRAG for ovr in setsock cmsg both diff; do for df in 0 1; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -81,8 +82,9 @@ test_dscp() { ip $IPVER -netns $NS route add table 300 prohibit any for ovr in setsock cmsg both diff; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -134,8 +136,9 @@ test_ttl_hoplimit() { local -r LIM=4 for ovr in setsock cmsg both diff; do - for p in u i r; do + for p in u U i r; do [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP [ $p == "i" ] && prot=ICMP [ $p == "r" ] && prot=RAW @@ -166,7 +169,7 @@ test_ttl_hoplimit -4 $TGT4 ttl test_ttl_hoplimit -6 $TGT6 hlim # IPV6 exthdr -for p in u i r; do +for p in u U i r; do # Very basic "does it crash" test for h in h d r; do $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 19bd8499031b..a825e628aee7 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" @@ -109,8 +112,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -531,7 +537,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -542,6 +548,14 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; -- 2.50.1 From 473367a5ffe1607a61be481e2feda684eb5faea9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 7 Mar 2025 08:29:47 +0100 Subject: [PATCH 05/16] r8169: increase max jumbo packet size on RTL8125/RTL8126 Realtek confirmed that all RTL8125/RTL8126 chip versions support up to 16K jumbo packets. Reflect this in the driver. Tested by Rui on RTL8125B with 12K jumbo packets. Suggested-by: Rui Salvaterra Tested-by: Rui Salvaterra Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/396762ad-cc65-4e60-b01e-8847db89e98b@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fa339bd8c775..b18daeeda40d 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -89,6 +89,7 @@ #define JUMBO_6K (6 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) #define JUMBO_7K (7 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) #define JUMBO_9K (9 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN) +#define JUMBO_16K (SZ_16K - VLAN_ETH_HLEN - ETH_FCS_LEN) static const struct { const char *name; @@ -5360,6 +5361,9 @@ static int rtl_jumbo_max(struct rtl8169_private *tp) /* RTL8168c */ case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_24: return JUMBO_6K; + /* RTL8125/8126 */ + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: + return JUMBO_16K; default: return JUMBO_9K; } -- 2.50.1 From 991a1b09920bc15c66f64c1e7d15cdabd3816c46 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Mar 2025 12:28:48 +0300 Subject: [PATCH 06/16] eth: fbnic: fix memory corruption in fbnic_tlv_attr_get_string() This code is trying to ensure that the last byte of the buffer is a NUL terminator. However, the problem is that attr->value[] is an array of __le32, not char, so it zeroes out 4 bytes way beyond the end of the buffer. Cast the buffer to char to address this. Fixes: e5cf5107c9e4 ("eth: fbnic: Update fbnic_tlv_attr_get_string() to work like nla_strscpy()") Signed-off-by: Dan Carpenter Reviewed-by: Lee Trager Link: https://patch.msgid.link/2791d4be-ade4-4e50-9b12-33307d8410f6@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_tlv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c index d558d176e0df..517ed8b2f1cb 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_tlv.c @@ -261,7 +261,7 @@ ssize_t fbnic_tlv_attr_get_string(struct fbnic_tlv_msg *attr, char *dst, return -E2BIG; srclen = le16_to_cpu(attr->hdr.len) - sizeof(*attr); - if (srclen > 0 && attr->value[srclen - 1] == '\0') + if (srclen > 0 && ((char *)attr->value)[srclen - 1] == '\0') srclen--; if (srclen >= dstsize) { -- 2.50.1 From 7462fe22cc74321eb663768848976d42eba3ddbb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 7 Mar 2025 12:21:45 +0100 Subject: [PATCH 07/16] mptcp: pm: use addr entry for get_local_id The following code in mptcp_userspace_pm_get_local_id() that assigns "skc" to "new_entry" is not allowed in BPF if we use the same code to implement the get_local_id() interface of a BFP path manager: memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; To solve the issue, this patch moves this assignment to "new_entry" forward to mptcp_pm_get_local_id(), and then passing "new_entry" as a parameter to both mptcp_pm_nl_get_local_id() and mptcp_userspace_pm_get_local_id(). No behavioural changes intended. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-1-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 9 ++++++--- net/mptcp/pm_netlink.c | 11 ++++------- net/mptcp/pm_userspace.c | 17 ++++++----------- net/mptcp/protocol.h | 6 ++++-- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 6c8cadf84f31..f6030ce04efd 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -406,7 +406,7 @@ out_unlock: int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { - struct mptcp_addr_info skc_local; + struct mptcp_pm_addr_entry skc_local = { 0 }; struct mptcp_addr_info msk_local; if (WARN_ON_ONCE(!msk)) @@ -416,10 +416,13 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) * addr */ mptcp_local_address((struct sock_common *)msk, &msk_local); - mptcp_local_address((struct sock_common *)skc, &skc_local); - if (mptcp_addresses_equal(&msk_local, &skc_local, false)) + mptcp_local_address((struct sock_common *)skc, &skc_local.addr); + if (mptcp_addresses_equal(&msk_local, &skc_local.addr, false)) return 0; + skc_local.addr.id = 0; + skc_local.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; + if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_local_id(msk, &skc_local); return mptcp_pm_nl_get_local_id(msk, &skc_local); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a6387bcf848b..23c28e37ab8f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1150,7 +1150,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, return err; } -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc) { struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; @@ -1159,7 +1160,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc pernet = pm_nl_get_pernet_from_msk(msk); rcu_read_lock(); - entry = __lookup_addr(pernet, skc); + entry = __lookup_addr(pernet, &skc->addr); ret = entry ? entry->addr.id : -1; rcu_read_unlock(); if (ret >= 0) @@ -1170,12 +1171,8 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc if (!entry) return -ENOMEM; - entry->addr = *skc; - entry->addr.id = 0; + *entry = *skc; entry->addr.port = 0; - entry->ifindex = 0; - entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - entry->lsk = NULL; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) kfree(entry); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 7e7d01bef5d4..8c45eebe9bbc 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -130,27 +130,22 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) } int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, - struct mptcp_addr_info *skc) + struct mptcp_pm_addr_entry *skc) { - struct mptcp_pm_addr_entry *entry = NULL, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + struct mptcp_pm_addr_entry *entry; spin_lock_bh(&msk->pm.lock); - entry = mptcp_userspace_pm_lookup_addr(msk, skc); + entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr); spin_unlock_bh(&msk->pm.lock); if (entry) return entry->addr.id; - memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); - new_entry.addr = *skc; - new_entry.addr.id = 0; - new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; - - if (new_entry.addr.port == msk_sport) - new_entry.addr.port = 0; + if (skc->addr.port == msk_sport) + skc->addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); + return mptcp_userspace_pm_append_new_local_addr(msk, skc, true); } bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7b74dedc7936..333d20a018b4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1121,8 +1121,10 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); -int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); +int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc); bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); -- 2.50.1 From fac7a6ddc75740ee3d24c7fa054921f9c495d8c2 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:46 +0100 Subject: [PATCH 08/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_addr_send_ack Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_addr_send_ack()' is not specific to this PM: it is used by both the in-kernel and userspace PMs. To avoid confusions, the '_nl' bit has been removed from the name. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-2-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 +- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index f6030ce04efd..ece706e8ed22 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -57,7 +57,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ msk->pm.rm_list_tx = *rm_list; rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); WRITE_ONCE(msk->pm.addr_signal, rm_addr); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); return 0; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 23c28e37ab8f..a70a688eae84 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -606,7 +606,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) local.addr.id = 0; mptcp_pm_announce_addr(msk, &local.addr, false); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) signal_and_subflow = true; @@ -740,7 +740,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) remote = msk->pm.remote; mptcp_pm_announce_addr(msk, &remote, true); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) return; @@ -781,7 +781,7 @@ bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, return mptcp_addresses_equal(&mpc_remote, remote, remote->port); } -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *alt = NULL; @@ -942,7 +942,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk) } if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 8c45eebe9bbc..b41e1aaa1d1c 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -234,7 +234,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); - mptcp_pm_nl_addr_send_ack(msk); + mptcp_pm_addr_send_ack(msk); } spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 333d20a018b4..2a3eb2392b3b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1008,7 +1008,7 @@ void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *remote); -void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); -- 2.50.1 From d1734987992c977f1515a5208688aced653c1869 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:47 +0100 Subject: [PATCH 09/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_mp_prio_send_ack Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_mp_prio_send_ack()' is not specific to this PM: it is used by both the in-kernel and userspace PMs. To avoid confusions, the '_nl' bit has been removed from the name. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-3-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 10 +++++----- net/mptcp/pm_userspace.c | 4 ++-- net/mptcp/protocol.h | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a70a688eae84..5494b5b409dc 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -808,10 +808,10 @@ void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) mptcp_pm_send_ack(msk, alt, false, false); } -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup) +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup) { struct mptcp_subflow_context *subflow; @@ -1936,7 +1936,7 @@ static void mptcp_nl_set_flags(struct net *net, lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) - mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, NULL, bkup); + mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); /* Subflows will only be recreated if the SUBFLOW flag is set */ if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) mptcp_pm_nl_fullmesh(msk, &local->addr); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index b41e1aaa1d1c..2626b2b092d4 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -605,10 +605,10 @@ int mptcp_userspace_pm_set_flags(struct mptcp_pm_addr_entry *local, spin_unlock_bh(&msk->pm.lock); lock_sock(sk); - ret = mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, &rem, bkup); + ret = mptcp_pm_mp_prio_send_ack(msk, &local->addr, &rem, bkup); release_sock(sk); - /* mptcp_pm_nl_mp_prio_send_ack() only fails in one case */ + /* mptcp_pm_mp_prio_send_ack() only fails in one case */ if (ret < 0) GENL_SET_ERR_MSG(info, "subflow not found"); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 2a3eb2392b3b..5508343f2c69 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1013,10 +1013,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); -int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, - struct mptcp_addr_info *addr, - struct mptcp_addr_info *rem, - u8 bkup); +int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup); bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); -- 2.50.1 From 551a9ad7879df1c7d604b27272fe84032a040074 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:48 +0100 Subject: [PATCH 10/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_work Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_work' is not specific to this PM: it is called from the core to call helpers, some of them needed by both the in-kernel and userspace PMs. To avoid confusions, the '_nl' bit has been removed from the name. Also used 'worker' instead of 'work', similar to protocol.c's worker. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-4-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5494b5b409dc..f6f7ea25640b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -924,7 +924,7 @@ static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } -void mptcp_pm_nl_work(struct mptcp_sock *msk) +void mptcp_pm_worker(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ec23e65ef0f1..ac946263ec64 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2681,7 +2681,7 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_fastclose(msk); - mptcp_pm_nl_work(msk); + mptcp_pm_worker(msk); mptcp_check_send_data_fin(sk); mptcp_check_data_fin_ack(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5508343f2c69..f29f4dd28fc5 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1147,7 +1147,7 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo } void __init mptcp_pm_nl_init(void); -void mptcp_pm_nl_work(struct mptcp_sock *msk); +void mptcp_pm_worker(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); -- 2.50.1 From 63611391850850bf27f81afb0d0b6d1237a34006 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:49 +0100 Subject: [PATCH 11/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_rm_addr_received Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_rm_addr_received' is not specific to this PM: it is called from the PM worker, and used by both the in-kernel and userspace PMs. The helper has been renamed to 'mptcp_pm_rm_addr_recv' instead of '_received' to avoid confusions with the one from pm.c. mptcp_pm_nl_rm_addr_or_subflow', and 'mptcp_pm_nl_rm_subflow_received' have been updated too for the same reason. To avoid confusions, the '_nl' bit has been removed from the name. While at it, the in-kernel PM specific code has been move from mptcp_pm_rm_addr_or_subflow to a new dedicated helper, clearer. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-5-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 55 +++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f6f7ea25640b..09ef3aa025e7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -838,9 +838,20 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list, - enum linux_mptcp_mib_field rm_type) +static void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) +{ + if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); + } +} + +static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list, + enum linux_mptcp_mib_field rm_type) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; @@ -893,35 +904,23 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, __MPTCP_INC_STATS(sock_net(sk), rm_type); } - if (rm_type == MPTCP_MIB_RMADDR) + if (rm_type == MPTCP_MIB_RMADDR) { __MPTCP_INC_STATS(sock_net(sk), rm_type); - - if (!removed) - continue; - - if (!mptcp_pm_is_kernel(msk)) - continue; - - if (rm_type == MPTCP_MIB_RMADDR && rm_id && - !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { - /* Note: if the subflow has been closed before, this - * add_addr_accepted counter will not be decremented. - */ - if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) - WRITE_ONCE(msk->pm.accept_addr, true); + if (removed && mptcp_pm_is_kernel(msk)) + mptcp_pm_nl_rm_addr(msk, rm_id); } } } -static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) { - mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); + mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } -static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) +static void mptcp_pm_rm_subflow(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { - mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); + mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } void mptcp_pm_worker(struct mptcp_sock *msk) @@ -946,7 +945,7 @@ void mptcp_pm_worker(struct mptcp_sock *msk) } if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_nl_rm_addr_received(msk); + mptcp_pm_rm_addr_recv(msk); } if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); @@ -1538,7 +1537,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, list.ids[0] = mptcp_endp_get_local_id(msk, addr); if (remove_subflow) { spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_rm_subflow(msk, &list); spin_unlock_bh(&msk->pm.lock); } @@ -1583,7 +1582,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, lock_sock(sk); spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); - mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_rm_subflow(msk, &list); __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1670,7 +1669,7 @@ static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, mptcp_pm_remove_addr(msk, &alist); } if (slist.nr) - mptcp_pm_nl_rm_subflow_received(msk, &slist); + mptcp_pm_rm_subflow(msk, &slist); /* Reset counters: maybe some subflows have been removed before */ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); msk->pm.local_addr_used = 0; @@ -1910,7 +1909,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, &list); + mptcp_pm_rm_subflow(msk, &list); __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); -- 2.50.1 From 550c50bbc2b7950dd4ea1082c016f84469509eab Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:50 +0100 Subject: [PATCH 12/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_subflow_chk_stale() Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_subflow_chk_stale' is not specific to this PM: it is called from pm.c for both the in-kernel and userspace PMs. To avoid confusions, the '_nl' bit has been removed from the name. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-6-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 +- net/mptcp/pm_netlink.c | 2 +- net/mptcp/protocol.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index ece706e8ed22..14c7ff5c606c 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -567,7 +567,7 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { if (subflow->stale_count < U8_MAX) subflow->stale_count++; - mptcp_pm_nl_subflow_chk_stale(msk, ssk); + mptcp_pm_subflows_chk_stale(msk, ssk); } else { subflow->stale_count = 0; mptcp_subflow_set_active(subflow); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 09ef3aa025e7..43667ad4c4ae 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1203,7 +1203,7 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = { }, }; -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f29f4dd28fc5..a5db1a297fbc 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -992,7 +992,7 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); -void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); +void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); -- 2.50.1 From 498d7d8b75f16a5b6e4f6499f1e72e9296f2d0cd Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:51 +0100 Subject: [PATCH 13/16] mptcp: pm: remove '_nl' from mptcp_pm_nl_is_init_remote_addr Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. But here 'mptcp_pm_nl_is_init_remote_addr' is not specific to this PM: it is called from pm.c for both the in-kernel and userspace PMs. To avoid confusions, the '_nl' bit has been removed from the name. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-7-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 +- net/mptcp/pm_netlink.c | 4 ++-- net/mptcp/protocol.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 14c7ff5c606c..ab443b9f9c5f 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -231,7 +231,7 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); } /* id0 should not have a different address */ - } else if ((addr->id == 0 && !mptcp_pm_nl_is_init_remote_addr(msk, addr)) || + } else if ((addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) || (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 43667ad4c4ae..029a74162b0b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -772,8 +772,8 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) } } -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote) +bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote) { struct mptcp_addr_info mpc_remote; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a5db1a297fbc..39bcad1def6b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1006,8 +1006,8 @@ void mptcp_pm_add_addr_received(const struct sock *ssk, void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); -bool mptcp_pm_nl_is_init_remote_addr(struct mptcp_sock *msk, - const struct mptcp_addr_info *remote); +bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *remote); void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -- 2.50.1 From 40aa7409d30df2b610f8e590cbb183faa2ac6f1f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:52 +0100 Subject: [PATCH 14/16] mptcp: pm: kernel: add '_pm' to mptcp_nl_set_flags Currently, in-kernel PM specific helpers are prefixed with 'mptcp_pm_nl_'. Here, '_pm' was missing from 'mptcp_nl_set_flags'. Add '_pm' to be similar to others, and add '_all' to avoid confusions witih the global 'mptcp_pm_nl_set_flags'. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-8-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 029a74162b0b..781831c50691 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1915,9 +1915,9 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); } -static void mptcp_nl_set_flags(struct net *net, - struct mptcp_pm_addr_entry *local, - u8 changed) +static void mptcp_pm_nl_set_flags_all(struct net *net, + struct mptcp_pm_addr_entry *local, + u8 changed) { u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); @@ -1992,7 +1992,7 @@ int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, *local = *entry; spin_unlock_bh(&pernet->lock); - mptcp_nl_set_flags(net, local, changed); + mptcp_pm_nl_set_flags_all(net, local, changed); return 0; } -- 2.50.1 From a17336b2b2e0339b8107b9d4d0fd6b7cf51172e4 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:53 +0100 Subject: [PATCH 15/16] mptcp: pm: avoid calling PM specific code from core When destroying an MPTCP socket, some userspace PM specific code was called from mptcp_destroy_common() in protocol.c. That feels wrong, and it is the only case. Instead, the core now calls mptcp_pm_destroy() from pm.c which is now in charge of cleaning the announced addresses list, and ask the different PMs to do extra cleaning if needed, e.g. the userspace PM, if used, will clean the local addresses list. While at it, the userspace PM specific helper has been prefixed with 'mptcp_userspace_pm_' like the other ones. No behavioural changes intended. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-9-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 8 ++++++++ net/mptcp/pm_userspace.c | 5 +---- net/mptcp/protocol.c | 3 +-- net/mptcp/protocol.h | 3 ++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index ab443b9f9c5f..17f99924dfa0 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -599,6 +599,14 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, #endif } +void mptcp_pm_destroy(struct mptcp_sock *msk) +{ + mptcp_pm_free_anno_list(msk); + + if (mptcp_pm_is_userspace(msk)) + mptcp_userspace_pm_free_local_addr_list(msk); +} + void mptcp_pm_data_reset(struct mptcp_sock *msk) { u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 2626b2b092d4..13856df22673 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -12,15 +12,12 @@ list_for_each_entry(__entry, \ &((__msk)->pm.userspace_pm_local_addr_list), list) -void mptcp_free_local_addr_list(struct mptcp_sock *msk) +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; LIST_HEAD(free_list); - if (!mptcp_pm_is_userspace(msk)) - return; - spin_lock_bh(&msk->pm.lock); list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list); spin_unlock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ac946263ec64..ad780ae1d30d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3302,8 +3302,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) * inet_sock_destruct() will dispose it */ mptcp_token_destroy(msk); - mptcp_pm_free_anno_list(msk); - mptcp_free_local_addr_list(msk); + mptcp_pm_destroy(msk); } static void mptcp_destroy(struct sock *sk) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 39bcad1def6b..0013b68a2731 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -983,6 +983,7 @@ __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum su void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); void mptcp_pm_data_reset(struct mptcp_sock *msk); +void mptcp_pm_destroy(struct mptcp_sock *msk); int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, struct mptcp_addr_info *addr); int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, @@ -1042,7 +1043,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); -void mptcp_free_local_addr_list(struct mptcp_sock *msk); +void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -- 2.50.1 From a49eb8ae95b8ce24f30a027f286baf8cac08a0ae Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 7 Mar 2025 12:21:54 +0100 Subject: [PATCH 16/16] mptcp: pm: worker: split in-kernel and common tasks To make it clear what actions are in-kernel PM specific and which ones are not and done for all PMs, e.g. sending ADD_ADDR and close associated subflows when a RM_ADDR is received. The behavioural is changed a bit: MPTCP_PM_ADD_ADDR_RECEIVED is now treated after MPTCP_PM_ADD_ADDR_SEND_ACK and MPTCP_PM_RM_ADDR_RECEIVED, but that should not change anything in practice. Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-10-abef20ada03b@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 25 +++++++++++++++++++++++++ net/mptcp/pm_netlink.c | 23 +++-------------------- net/mptcp/protocol.h | 2 ++ 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 17f99924dfa0..ddf9d0dc6274 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -599,6 +599,31 @@ bool mptcp_pm_addr_families_match(const struct sock *sk, #endif } +void mptcp_pm_worker(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + msk_owned_by_me(msk); + + if (!(pm->status & MPTCP_PM_WORK_MASK)) + return; + + spin_lock_bh(&msk->pm.lock); + + pr_debug("msk=%p status=%x\n", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_addr_send_ack(msk); + } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_rm_addr_recv(msk); + } + __mptcp_pm_kernel_worker(msk); + + spin_unlock_bh(&msk->pm.lock); +} + void mptcp_pm_destroy(struct mptcp_sock *msk) { mptcp_pm_free_anno_list(msk); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 781831c50691..37986208b9c0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -912,7 +912,7 @@ static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, } } -static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) +void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) { mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } @@ -923,30 +923,15 @@ static void mptcp_pm_rm_subflow(struct mptcp_sock *msk, mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } -void mptcp_pm_worker(struct mptcp_sock *msk) +/* Called under PM lock */ +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; - msk_owned_by_me(msk); - - if (!(pm->status & MPTCP_PM_WORK_MASK)) - return; - - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x\n", msk, pm->status); if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); mptcp_pm_nl_add_addr_received(msk); } - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_addr_send_ack(msk); - } - if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_rm_addr_recv(msk); - } if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); mptcp_pm_nl_fully_established(msk); @@ -955,8 +940,6 @@ void mptcp_pm_worker(struct mptcp_sock *msk) pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); mptcp_pm_nl_subflow_established(msk); } - - spin_unlock_bh(&msk->pm.lock); } static bool address_use_port(struct mptcp_pm_addr_entry *entry) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0013b68a2731..d4725b32aa56 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1010,6 +1010,7 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *remote); void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); +void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk); void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); @@ -1149,6 +1150,7 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_worker(struct mptcp_sock *msk); +void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); -- 2.50.1