From c183165f87a486d5879f782c05a23c179c3794ab Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 28 Mar 2025 15:27:18 +0100 Subject: [PATCH 01/16] selftests: mptcp: close fd_in before returning in main_loop The file descriptor 'fd_in' is opened when cfg_input is configured, but not closed in main_loop(), this patch fixes it. Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Co-developed-by: Cong Liu Signed-off-by: Cong Liu Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-3-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 893dc36b12f6..c83a8b47bbdf 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1299,7 +1299,7 @@ again: ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { shutdown(fd, SHUT_WR); @@ -1320,7 +1320,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) -- 2.51.0 From b44a4c28228fc50b0af05b5d15b44c2172f112a0 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 28 Mar 2025 15:27:19 +0100 Subject: [PATCH 02/16] selftests: mptcp: ignore mptcp_diag binary A new binary is now generated by the MPTCP selftests: mptcp_diag. Like the other binaries from this directory, there is no need to track this in Git, it should then be ignored. Fixes: 00f5e338cf7e ("selftests: mptcp: Add a tool to get specific msk_info") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250328-net-mptcp-misc-fixes-6-15-v1-4-34161a482a7f@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl -- 2.51.0 From 9e3267cf02c240065fddfbe1a58cdb99d0b00531 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 28 Mar 2025 09:47:42 -0700 Subject: [PATCH 03/16] eth: gve: add missing netdev locks on reset and shutdown paths All the misc entry points end up calling into either gve_open() or gve_close(), they take rtnl_lock today but since the recent instance locking changes should also take the instance lock. Found by code inspection and untested. Fixes: cae03e5bdd9e ("net: hold netdev instance lock during queue operations") Acked-by: Stanislav Fomichev Reviewed-by: Harshitha Ramamurthy Link: https://patch.msgid.link/20250328164742.1268069-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index cb2f9978f45e..f9a73c956861 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -2077,7 +2077,9 @@ static void gve_handle_reset(struct gve_priv *priv) if (gve_get_do_reset(priv)) { rtnl_lock(); + netdev_lock(priv->dev); gve_reset(priv, false); + netdev_unlock(priv->dev); rtnl_unlock(); } } @@ -2714,6 +2716,7 @@ static void gve_shutdown(struct pci_dev *pdev) bool was_up = netif_running(priv->dev); rtnl_lock(); + netdev_lock(netdev); if (was_up && gve_close(priv->dev)) { /* If the dev was up, attempt to close, if close fails, reset */ gve_reset_and_teardown(priv, was_up); @@ -2721,6 +2724,7 @@ static void gve_shutdown(struct pci_dev *pdev) /* If the dev wasn't up or close worked, finish tearing down */ gve_teardown_priv_resources(priv); } + netdev_unlock(netdev); rtnl_unlock(); } -- 2.51.0 From dd07df9ff3d148aee87fcbab99ff14f0727752f4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 28 Mar 2025 10:42:16 -0700 Subject: [PATCH 04/16] bnxt_en: bring back rtnl lock in bnxt_shutdown Taehee reports missing rtnl from bnxt_shutdown path: inetdev_event (./include/linux/inetdevice.h:256 net/ipv4/devinet.c:1585) notifier_call_chain (kernel/notifier.c:85) __dev_close_many (net/core/dev.c:1732 (discriminator 3)) kernel/locking/mutex.c:713 kernel/locking/mutex.c:732) dev_close_many (net/core/dev.c:1786) netif_close (./include/linux/list.h:124 ./include/linux/list.h:215 bnxt_shutdown (drivers/net/ethernet/broadcom/bnxt/bnxt.c:16707) bnxt_en pci_device_shutdown (drivers/pci/pci-driver.c:511) device_shutdown (drivers/base/core.c:4820) kernel_restart (kernel/reboot.c:271 kernel/reboot.c:285) Bring back the rtnl lock. Link: https://lore.kernel.org/netdev/CAMArcTV4P8PFsc6O2tSgzRno050DzafgqkLA2b7t=Fv_SY=brw@mail.gmail.com/ Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL") Reported-by: Taehee Yoo Signed-off-by: Stanislav Fomichev Tested-by: Taehee Yoo Tested-by: Breno Leitao Link: https://patch.msgid.link/20250328174216.3513079-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 934ba9425857..1a70605fad38 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16698,6 +16698,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (!dev) return; + rtnl_lock(); netdev_lock(dev); bp = netdev_priv(dev); if (!bp) @@ -16717,6 +16718,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) shutdown_exit: netdev_unlock(dev); + rtnl_unlock(); } #ifdef CONFIG_PM_SLEEP -- 2.51.0 From f278b6d5bb465c7fd66f3d103812947e55b376ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Mar 2025 07:59:46 +0000 Subject: [PATCH 05/16] Revert "tcp: avoid atomic operations on sk->sk_rmem_alloc" This reverts commit 0de2a5c4b824da2205658ebebb99a55c43cdf60f. I forgot that a TCP socket could receive messages in its error queue. sock_queue_err_skb() can be called without socket lock being held, and changes sk->sk_rmem_alloc. The fact that skbs in error queue are limited by sk->sk_rcvbuf means that error messages can be dropped if socket receive queues are full, which is an orthogonal issue. In future kernels, we could use a separate sk->sk_error_mem_alloc counter specifically for the error queue. Fixes: 0de2a5c4b824 ("tcp: avoid atomic operations on sk->sk_rmem_alloc") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250331075946.31960-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 15 --------------- net/ipv4/tcp.c | 18 ++---------------- net/ipv4/tcp_fastopen.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- 4 files changed, 6 insertions(+), 35 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index df04dc09c519..4450c384ef17 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,7 +779,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); -void tcp_sock_rfree(struct sk_buff *skb); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -2899,18 +2898,4 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); -/* version of skb_set_owner_r() avoiding one atomic_add() */ -static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = tcp_sock_rfree; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) + skb->truesize); - - sk_forward_alloc_add(sk, -skb->truesize); -} - #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ea8de00f669d..6edc441b3702 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1525,25 +1525,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } -/* private version of sock_rfree() avoiding one atomic_sub() */ -void tcp_sock_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - unsigned int len = skb->truesize; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) - len); - - sk_forward_alloc_add(sk, len); - sk_mem_reclaim(sk); -} - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == tcp_sock_rfree)) { - tcp_sock_rfree(skb); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ca40665145c6..1a6b1bc54245 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e1f952fbac48..a35018e2d0ba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5171,7 +5171,7 @@ end: if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } } @@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } return eaten; } @@ -5504,7 +5504,7 @@ skip_this: __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - tcp_skb_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */ -- 2.51.0 From 212120a164d59fd534148d315f13db3d296efb0f Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Tue, 1 Apr 2025 12:58:23 +0100 Subject: [PATCH 06/16] Documentation/EDAC: Fix warning document isn't included in any toctree Fix the build (htmldocs) warning: Documentation/edac/index.rst: WARNING: document isn't included in any toctree. Fixes: db99ea5f2c03 ("EDAC: Add support for EDAC device features control") Closes: https://lore.kernel.org/all/20250228185102.15842f8b@canb.auug.org.au/ Reported-by: Stephen Rothwell Signed-off-by: Shiju Jose Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20250401115823.573-1-shiju.jose@huawei.com --- Documentation/subsystem-apis.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/subsystem-apis.rst b/Documentation/subsystem-apis.rst index b52ad5b969d4..ff4fe8c936c8 100644 --- a/Documentation/subsystem-apis.rst +++ b/Documentation/subsystem-apis.rst @@ -71,6 +71,7 @@ Other subsystems accounting/index cpu-freq/index + edac/index fpga/index i2c/index iio/index -- 2.51.0 From 078aabd567de3d63d37d7673f714e309d369e6e2 Mon Sep 17 00:00:00 2001 From: Debin Zhu Date: Tue, 1 Apr 2025 20:40:18 +0800 Subject: [PATCH 07/16] netlabel: Fix NULL pointer exception caused by CALIPSO on IPv4 sockets When calling netlbl_conn_setattr(), addr->sa_family is used to determine the function behavior. If sk is an IPv4 socket, but the connect function is called with an IPv6 address, the function calipso_sock_setattr() is triggered. Inside this function, the following code is executed: sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; Since sk is an IPv4 socket, pinet6 is NULL, leading to a null pointer dereference. This patch fixes the issue by checking if inet6_sk(sk) returns a NULL pointer before accessing pinet6. Signed-off-by: Debin Zhu Signed-off-by: Bitao Ouyang <1985755126@qq.com> Acked-by: Paul Moore Fixes: ceba1832b1b2 ("calipso: Set the calipso socket label to match the secattr.") Link: https://patch.msgid.link/20250401124018.4763-1-mowenroot@163.com Signed-off-by: Jakub Kicinski --- net/ipv6/calipso.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index dbcea9fee626..62618a058b8f 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1072,8 +1072,13 @@ static int calipso_sock_getattr(struct sock *sk, struct ipv6_opt_hdr *hop; int opt_len, len, ret_val = -ENOMSG, offset; unsigned char *opt; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return -EAFNOSUPPORT; + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; @@ -1125,8 +1130,13 @@ static int calipso_sock_setattr(struct sock *sk, { int ret_val; struct ipv6_opt_hdr *old, *new; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return -EAFNOSUPPORT; + txopts = txopt_get(pinfo); old = NULL; if (txopts) old = txopts->hopopt; @@ -1153,8 +1163,13 @@ static int calipso_sock_setattr(struct sock *sk, static void calipso_sock_delattr(struct sock *sk) { struct ipv6_opt_hdr *new_hop; - struct ipv6_txoptions *txopts = txopt_get(inet6_sk(sk)); + struct ipv6_pinfo *pinfo = inet6_sk(sk); + struct ipv6_txoptions *txopts; + + if (!pinfo) + return; + txopts = txopt_get(pinfo); if (!txopts || !txopts->hopopt) goto done; -- 2.51.0 From ce8fe975fd99b49c29c42e50f2441ba53112b2e8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Mar 2025 15:25:35 -0700 Subject: [PATCH 08/16] net_sched: skbprio: Remove overly strict queue assertions In the current implementation, skbprio enqueue/dequeue contains an assertion that fails under certain conditions when SKBPRIO is used as a child qdisc under TBF with specific parameters. The failure occurs because TBF sometimes peeks at packets in the child qdisc without actually dequeuing them when tokens are unavailable. This peek operation creates a discrepancy between the parent and child qdisc queue length counters. When TBF later receives a high-priority packet, SKBPRIO's queue length may show a different value than what's reflected in its internal priority queue tracking, triggering the assertion. The fix removes this overly strict assertions in SKBPRIO, they are not necessary at all. Reported-by: syzbot+a3422a19b05ea96bee18@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a3422a19b05ea96bee18 Fixes: aea5f654e6b7 ("net/sched: add skbprio scheduler") Cc: Nishanth Devarajan Signed-off-by: Cong Wang Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250329222536.696204-2-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_skbprio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 20ff7386b74b..f485f62ab721 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -123,8 +123,6 @@ static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check to update highest and lowest priorities. */ if (skb_queue_empty(lp_qdisc)) { if (q->lowest_prio == q->highest_prio) { - /* The incoming packet is the only packet in queue. */ - BUG_ON(sch->q.qlen != 1); q->lowest_prio = prio; q->highest_prio = prio; } else { @@ -156,7 +154,6 @@ static struct sk_buff *skbprio_dequeue(struct Qdisc *sch) /* Update highest priority field. */ if (skb_queue_empty(hpq)) { if (q->lowest_prio == q->highest_prio) { - BUG_ON(sch->q.qlen); q->highest_prio = 0; q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1; } else { -- 2.51.0 From 076c700988938e02d51c018095d33b339cdb7ffd Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Mar 2025 15:25:36 -0700 Subject: [PATCH 09/16] selftests: tc-testing: Add TBF with SKBPRIO queue length corner case test Add a test case to validate the interaction between TBF and SKBPRIO queueing disciplines, specifically targeting queue length accounting corner cases. This test complements the fix for the queue length accounting issue in the SKBPRIO qdisc. This is still best-effort, as timing and manipulating enqueue and dequeue from user-space is very hard. Cc: Pedro Tammela Signed-off-by: Cong Wang Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250329222536.696204-3-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9044ac054167..25454fd95537 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -126,5 +126,37 @@ "$TC qdisc del dev $DUMMY root handle 1: drr", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "c024", + "name": "Test TBF with SKBPRIO - catch qlen corner cases", + "category": [ + "qdisc", + "tbf", + "skbprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root tbf rate 100bit burst 2000 limit 1000", + "$TC qdisc add dev $DUMMY parent 1: handle 10: skbprio limit 1", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.5" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc skbprio'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + } ] -- 2.51.0 From 10206302af856791fbcc27a33ed3c3eb09b2793d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 31 Mar 2025 09:15:32 +0000 Subject: [PATCH 10/16] sctp: add mutual exclusion in proc_sctp_do_udp_port() We must serialize calls to sctp_udp_sock_stop() and sctp_udp_sock_start() or risk a crash as syzbot reported: Oops: general protection fault, probably for non-canonical address 0xdffffc000000000d: 0000 [#1] SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 1 UID: 0 PID: 6551 Comm: syz.1.44 Not tainted 6.14.0-syzkaller-g7f2ff7b62617 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 RIP: 0010:kernel_sock_shutdown+0x47/0x70 net/socket.c:3653 Call Trace: udp_tunnel_sock_release+0x68/0x80 net/ipv4/udp_tunnel_core.c:181 sctp_udp_sock_stop+0x71/0x160 net/sctp/protocol.c:930 proc_sctp_do_udp_port+0x264/0x450 net/sctp/sysctl.c:553 proc_sys_call_handler+0x3d0/0x5b0 fs/proc/proc_sysctl.c:601 iter_file_splice_write+0x91c/0x1150 fs/splice.c:738 do_splice_from fs/splice.c:935 [inline] direct_splice_actor+0x18f/0x6c0 fs/splice.c:1158 splice_direct_to_actor+0x342/0xa30 fs/splice.c:1102 do_splice_direct_actor fs/splice.c:1201 [inline] do_splice_direct+0x174/0x240 fs/splice.c:1227 do_sendfile+0xafd/0xe50 fs/read_write.c:1368 __do_sys_sendfile64 fs/read_write.c:1429 [inline] __se_sys_sendfile64 fs/read_write.c:1415 [inline] __x64_sys_sendfile64+0x1d8/0x220 fs/read_write.c:1415 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] Fixes: 046c052b475e ("sctp: enable udp tunneling socks") Reported-by: syzbot+fae49d997eb56fa7c74d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67ea5c01.050a0220.1547ec.012b.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Acked-by: Xin Long Link: https://patch.msgid.link/20250331091532.224982-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/sysctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 8e1e97be4df7..ee3eac338a9d 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -525,6 +525,8 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write, return ret; } +static DEFINE_MUTEX(sctp_sysctl_mutex); + static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -549,6 +551,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, if (new_value > max || new_value < min) return -EINVAL; + mutex_lock(&sctp_sysctl_mutex); net->sctp.udp_port = new_value; sctp_udp_sock_stop(net); if (new_value) { @@ -561,6 +564,7 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write, lock_sock(sk); sctp_sk(sk)->udp_port = htons(net->sctp.udp_port); release_sock(sk); + mutex_unlock(&sctp_sysctl_mutex); } return ret; -- 2.51.0 From 57b290d97c6150774bf929117ca737a26d8fc33d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Mar 2025 08:52:53 +0200 Subject: [PATCH 11/16] net: airoha: Fix qid report in airoha_tc_get_htb_get_leaf_queue() Fix the following kernel warning deleting HTB offloaded leafs and/or root HTB qdisc in airoha_eth driver properly reporting qid in airoha_tc_get_htb_get_leaf_queue routine. $tc qdisc replace dev eth1 root handle 10: htb offload $tc class add dev eth1 arent 10: classid 10:4 htb rate 100mbit ceil 100mbit $tc qdisc replace dev eth1 parent 10:4 handle 4: ets bands 8 \ quanta 1514 3028 4542 6056 7570 9084 10598 12112 $tc qdisc del dev eth1 root [ 55.827864] ------------[ cut here ]------------ [ 55.832493] WARNING: CPU: 3 PID: 2678 at 0xffffffc0798695a4 [ 55.956510] CPU: 3 PID: 2678 Comm: tc Tainted: G O 6.6.71 #0 [ 55.963557] Hardware name: Airoha AN7581 Evaluation Board (DT) [ 55.969383] pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 55.976344] pc : 0xffffffc0798695a4 [ 55.979851] lr : 0xffffffc079869a20 [ 55.983358] sp : ffffffc0850536a0 [ 55.986665] x29: ffffffc0850536a0 x28: 0000000000000024 x27: 0000000000000001 [ 55.993800] x26: 0000000000000000 x25: ffffff8008b19000 x24: ffffff800222e800 [ 56.000935] x23: 0000000000000001 x22: 0000000000000000 x21: ffffff8008b19000 [ 56.008071] x20: ffffff8002225800 x19: ffffff800379d000 x18: 0000000000000000 [ 56.015206] x17: ffffffbf9ea59000 x16: ffffffc080018000 x15: 0000000000000000 [ 56.022342] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000001 [ 56.029478] x11: ffffffc081471008 x10: ffffffc081575a98 x9 : 0000000000000000 [ 56.036614] x8 : ffffffc08167fd40 x7 : ffffffc08069e104 x6 : ffffff8007f86000 [ 56.043748] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000001 [ 56.050884] x2 : 0000000000000000 x1 : 0000000000000250 x0 : ffffff800222c000 [ 56.058020] Call trace: [ 56.060459] 0xffffffc0798695a4 [ 56.063618] 0xffffffc079869a20 [ 56.066777] __qdisc_destroy+0x40/0xa0 [ 56.070528] qdisc_put+0x54/0x6c [ 56.073748] qdisc_graft+0x41c/0x648 [ 56.077324] tc_get_qdisc+0x168/0x2f8 [ 56.080978] rtnetlink_rcv_msg+0x230/0x330 [ 56.085076] netlink_rcv_skb+0x5c/0x128 [ 56.088913] rtnetlink_rcv+0x14/0x1c [ 56.092490] netlink_unicast+0x1e0/0x2c8 [ 56.096413] netlink_sendmsg+0x198/0x3c8 [ 56.100337] ____sys_sendmsg+0x1c4/0x274 [ 56.104261] ___sys_sendmsg+0x7c/0xc0 [ 56.107924] __sys_sendmsg+0x44/0x98 [ 56.111492] __arm64_sys_sendmsg+0x20/0x28 [ 56.115580] invoke_syscall.constprop.0+0x58/0xfc [ 56.120285] do_el0_svc+0x3c/0xbc [ 56.123592] el0_svc+0x18/0x4c [ 56.126647] el0t_64_sync_handler+0x118/0x124 [ 56.131005] el0t_64_sync+0x150/0x154 [ 56.134660] ---[ end trace 0000000000000000 ]--- Fixes: ef1ca9271313b ("net: airoha: Add sched HTB offload support") Signed-off-by: Lorenzo Bianconi Acked-by: Paolo Abeni Link: https://patch.msgid.link/20250331-airoha-htb-qdisc-offload-del-fix-v1-1-4ea429c2c968@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index c0a642568ac1..20a96cafc748 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2358,7 +2358,7 @@ static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port, return -EINVAL; } - opt->qid = channel; + opt->qid = AIROHA_NUM_TX_RING + channel; return 0; } -- 2.51.0 From 367579274f60cb23c570ae5348966ab51e1509a4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 31 Mar 2025 18:17:31 +0200 Subject: [PATCH 12/16] net: airoha: Fix ETS priomap validation ETS Qdisc schedules SP bands in a priority order assigning band-0 the highest priority (band-0 > band-1 > .. > band-n) while EN7581 arranges SP bands in a priority order assigning band-7 the highest priority (band-7 > band-6, .. > band-n). Fix priomap check in airoha_qdma_set_tx_ets_sched routine in order to align ETS Qdisc and airoha_eth driver SP priority ordering. Fixes: b56e4d660a96 ("net: airoha: Enforce ETS Qdisc priomap") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20250331-airoha-ets-validate-priomap-v1-1-60a524488672@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 20a96cafc748..69e523dd4186 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -2028,7 +2028,7 @@ static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, struct tc_ets_qopt_offload_replace_params *p = &opt->replace_params; enum tx_sched_mode mode = TC_SCH_SP; u16 w[AIROHA_NUM_QOS_QUEUES] = {}; - int i, nstrict = 0, nwrr, qidx; + int i, nstrict = 0; if (p->bands > AIROHA_NUM_QOS_QUEUES) return -EINVAL; @@ -2046,17 +2046,17 @@ static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port, * lowest priorities with respect to SP ones. * e.g: WRR0, WRR1, .., WRRm, SP0, SP1, .., SPn */ - nwrr = p->bands - nstrict; - qidx = nstrict && nwrr ? nstrict : 0; - for (i = 1; i <= p->bands; i++) { - if (p->priomap[i % AIROHA_NUM_QOS_QUEUES] != qidx) + for (i = 0; i < nstrict; i++) { + if (p->priomap[p->bands - i - 1] != i) return -EINVAL; - - qidx = i == nwrr ? 0 : qidx + 1; } - for (i = 0; i < nwrr; i++) + for (i = 0; i < p->bands - nstrict; i++) { + if (p->priomap[i] != nstrict + i) + return -EINVAL; + w[i] = p->weights[nstrict + i]; + } if (!nstrict) mode = TC_SCH_WRR8; -- 2.51.0 From d996e412b2dfc079bd44bff5b3bc743fdb6d7c90 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 31 Mar 2025 07:28:14 -0700 Subject: [PATCH 13/16] bpf: add missing ops lock around dev_xdp_attach_link Syzkaller points out that create_link path doesn't grab ops lock, add it. Reported-by: syzbot+08936936fe8132f91f1a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/67e6b3e8.050a0220.2f068f.0079.GAE@google.com/ Fixes: 97246d6d21c2 ("net: hold netdev instance lock during ndo_bpf") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250331142814.1887506-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index be17e0660144..5d20ff226d5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10284,7 +10284,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) goto unlock; } + netdev_lock_ops(dev); err = dev_xdp_attach_link(dev, &extack, link); + netdev_unlock_ops(dev); rtnl_unlock(); if (err) { -- 2.51.0 From 5bbcb5902e1c7193b5ffee13251ec92878aae0e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Mar 2025 17:15:20 -0700 Subject: [PATCH 14/16] MAINTAINERS: update Open vSwitch maintainers Pravin has not been active for a while, missingmaints reports: Subsystem OPENVSWITCH Changes 138 / 253 (54%) (No activity) Top reviewers: [41]: aconole@redhat.com [31]: horms@kernel.org [23]: echaudro@redhat.com [8]: fw@strlen.de [6]: i.maximets@ovn.org INACTIVE MAINTAINER Pravin B Shelar Let's elevate Aaron, Eelco and Ilya to the status of maintainers. Acked-by: Aaron Conole Acked-by: Ilya Maximets Acked-by: Eelco Chaudron Acked-by: Simon Horman Link: https://patch.msgid.link/20250401001520.2080231-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 660d5e67af7d..0dabce0f03f0 100644 --- a/CREDITS +++ b/CREDITS @@ -3666,6 +3666,10 @@ S: 149 Union St. S: Kingston, Ontario S: Canada K7L 2P4 +N: Pravin B Shelar +E: pshelar@ovn.org +D: Open vSwitch maintenance and contributions + N: John Shifflett E: john@geolog.com E: jshiffle@netcom.com diff --git a/MAINTAINERS b/MAINTAINERS index e0045ac4327b..ff882416c445 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18097,7 +18097,9 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin B Shelar +M: Aaron Conole +M: Eelco Chaudron +M: Ilya Maximets L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained -- 2.51.0 From d3210dabda8dd0477f3a6301dcaf9ed44aeccd3c Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 31 Mar 2025 18:53:15 -0700 Subject: [PATCH 15/16] eth: mlx4: select PAGE_POOL With commit 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") mlx4 started using functions guarded by PAGE_POOL. This change introduced build errors when CONFIG_MLX4_EN is set but CONFIG_PAGE_POOL is not: ld: vmlinux.o: in function `mlx4_en_alloc_frags': en_rx.c:(.text+0xa5eaf9): undefined reference to `page_pool_alloc_pages' ld: vmlinux.o: in function `mlx4_en_create_rx_ring': (.text+0xa5ee91): undefined reference to `page_pool_create' Make MLX4_EN select PAGE_POOL to fix the ml;x4 build errors. Fixes: 8533b14b3d65 ("eth: mlx4: create a page pool for Rx") Signed-off-by: Greg Thelen Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250401015315.2306092-1-gthelen@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 825e05fb8607..0b1cb340206f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -7,6 +7,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on PCI && NETDEVICES && ETHERNET && INET depends on PTP_1588_CLOCK_OPTIONAL + select PAGE_POOL select MLX4_CORE help This driver supports Mellanox Technologies ConnectX Ethernet -- 2.51.0 From 96844075226b49af25a69a1d084b648ec2d9b08d Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 1 Apr 2025 08:58:04 +0200 Subject: [PATCH 16/16] net: mvpp2: Prevent parser TCAM memory corruption Protect the parser TCAM/SRAM memory, and the cached (shadow) SRAM information, from concurrent modifications. Both the TCAM and SRAM tables are indirectly accessed by configuring an index register that selects the row to read or write to. This means that operations must be atomic in order to, e.g., avoid spreading writes across multiple rows. Since the shadow SRAM array is used to find free rows in the hardware table, it must also be protected in order to avoid TOCTOU errors where multiple cores allocate the same row. This issue was detected in a situation where `mvpp2_set_rx_mode()` ran concurrently on two CPUs. In this particular case the MVPP2_PE_MAC_UC_PROMISCUOUS entry was corrupted, causing the classifier unit to drop all incoming unicast - indicated by the `rx_classifier_drops` counter. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Tobias Waldekranz Reviewed-by: Maxime Chevallier Tested-by: Maxime Chevallier Link: https://patch.msgid.link/20250401065855.3113635-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 3 + .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 3 +- .../net/ethernet/marvell/mvpp2/mvpp2_prs.c | 201 ++++++++++++------ 3 files changed, 140 insertions(+), 67 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 44fe9b68d1c2..061fcd444d50 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -1113,6 +1113,9 @@ struct mvpp2 { /* Spinlocks for CM3 shared memory configuration */ spinlock_t mss_spinlock; + + /* Spinlock for shared PRS parser memory and shadow table */ + spinlock_t prs_spinlock; }; struct mvpp2_pcpu_stats { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 566c12c89520..416a926a8281 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7723,8 +7723,9 @@ static int mvpp2_probe(struct platform_device *pdev) if (mvpp2_read(priv, MVPP2_VER_ID_REG) == MVPP2_VER_PP23) priv->hw_version = MVPP23; - /* Init mss lock */ + /* Init locks for shared packet processor resources */ spin_lock_init(&priv->mss_spinlock); + spin_lock_init(&priv->prs_spinlock); /* Initialize network controller */ err = mvpp2_init(pdev, priv); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 9af22f497a40..93e978bdf303 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -23,6 +23,8 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) { int i; + lockdep_assert_held(&priv->prs_spinlock); + if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1) return -EINVAL; @@ -43,11 +45,13 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) } /* Initialize tcam entry from hw */ -int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, - int tid) +static int __mvpp2_prs_init_from_hw(struct mvpp2 *priv, + struct mvpp2_prs_entry *pe, int tid) { int i; + lockdep_assert_held(&priv->prs_spinlock); + if (tid > MVPP2_PRS_TCAM_SRAM_SIZE - 1) return -EINVAL; @@ -73,6 +77,18 @@ int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, return 0; } +int mvpp2_prs_init_from_hw(struct mvpp2 *priv, struct mvpp2_prs_entry *pe, + int tid) +{ + int err; + + spin_lock_bh(&priv->prs_spinlock); + err = __mvpp2_prs_init_from_hw(priv, pe, tid); + spin_unlock_bh(&priv->prs_spinlock); + + return err; +} + /* Invalidate tcam hw entry */ static void mvpp2_prs_hw_inv(struct mvpp2 *priv, int index) { @@ -374,7 +390,7 @@ static int mvpp2_prs_flow_find(struct mvpp2 *priv, int flow) priv->prs_shadow[tid].lu != MVPP2_PRS_LU_FLOWS) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); bits = mvpp2_prs_sram_ai_get(&pe); /* Sram store classification lookup ID in AI bits [5:0] */ @@ -441,7 +457,7 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) if (priv->prs_shadow[MVPP2_PE_DROP_ALL].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL); + __mvpp2_prs_init_from_hw(priv, &pe, MVPP2_PE_DROP_ALL); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -469,14 +485,17 @@ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) } /* Set port to unicast or multicast promiscuous mode */ -void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, - enum mvpp2_prs_l2_cast l2_cast, bool add) +static void __mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, + enum mvpp2_prs_l2_cast l2_cast, + bool add) { struct mvpp2_prs_entry pe; unsigned char cast_match; unsigned int ri; int tid; + lockdep_assert_held(&priv->prs_spinlock); + if (l2_cast == MVPP2_PRS_L2_UNI_CAST) { cast_match = MVPP2_PRS_UCAST_VAL; tid = MVPP2_PE_MAC_UC_PROMISCUOUS; @@ -489,7 +508,7 @@ void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, /* promiscuous mode - Accept unknown unicast or multicast packets */ if (priv->prs_shadow[tid].valid) { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { memset(&pe, 0, sizeof(pe)); mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); @@ -522,6 +541,14 @@ void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, mvpp2_prs_hw_write(priv, &pe); } +void mvpp2_prs_mac_promisc_set(struct mvpp2 *priv, int port, + enum mvpp2_prs_l2_cast l2_cast, bool add) +{ + spin_lock_bh(&priv->prs_spinlock); + __mvpp2_prs_mac_promisc_set(priv, port, l2_cast, add); + spin_unlock_bh(&priv->prs_spinlock); +} + /* Set entry for dsa packets */ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, bool tagged, bool extend) @@ -539,7 +566,7 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, if (priv->prs_shadow[tid].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -610,7 +637,7 @@ static void mvpp2_prs_dsa_tag_ethertype_set(struct mvpp2 *priv, int port, if (priv->prs_shadow[tid].valid) { /* Entry exist - update port only */ - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } else { /* Entry doesn't exist - create new */ memset(&pe, 0, sizeof(pe)); @@ -673,7 +700,7 @@ static int mvpp2_prs_vlan_find(struct mvpp2 *priv, unsigned short tpid, int ai) priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid); if (!match) continue; @@ -726,7 +753,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid_aux); + __mvpp2_prs_init_from_hw(priv, &pe, tid_aux); ri_bits = mvpp2_prs_sram_ri_get(&pe); if ((ri_bits & MVPP2_PRS_RI_VLAN_MASK) == MVPP2_PRS_RI_VLAN_DOUBLE) @@ -760,7 +787,7 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Update ports' mask */ mvpp2_prs_tcam_port_map_set(&pe, port_map); @@ -800,7 +827,7 @@ static int mvpp2_prs_double_vlan_find(struct mvpp2 *priv, unsigned short tpid1, priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); match = mvpp2_prs_tcam_data_cmp(&pe, 0, tpid1) && mvpp2_prs_tcam_data_cmp(&pe, 4, tpid2); @@ -849,7 +876,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, priv->prs_shadow[tid_aux].lu != MVPP2_PRS_LU_VLAN) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid_aux); + __mvpp2_prs_init_from_hw(priv, &pe, tid_aux); ri_bits = mvpp2_prs_sram_ri_get(&pe); ri_bits &= MVPP2_PRS_RI_VLAN_MASK; if (ri_bits == MVPP2_PRS_RI_VLAN_SINGLE || @@ -880,7 +907,7 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VLAN); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Update ports' mask */ @@ -1213,8 +1240,8 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) /* Create dummy entries for drop all and promiscuous modes */ mvpp2_prs_drop_fc(priv); mvpp2_prs_mac_drop_all_set(priv, 0, false); - mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); - mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); + __mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); + __mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); } /* Set default entries for various types of dsa packets */ @@ -1533,12 +1560,6 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) struct mvpp2_prs_entry pe; int err; - priv->prs_double_vlans = devm_kcalloc(&pdev->dev, sizeof(bool), - MVPP2_PRS_DBL_VLANS_MAX, - GFP_KERNEL); - if (!priv->prs_double_vlans) - return -ENOMEM; - /* Double VLAN: 0x88A8, 0x8100 */ err = mvpp2_prs_double_vlan_add(priv, ETH_P_8021AD, ETH_P_8021Q, MVPP2_PRS_PORT_MASK); @@ -1941,7 +1962,7 @@ static int mvpp2_prs_vid_range_find(struct mvpp2_port *port, u16 vid, u16 mask) port->priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) continue; - mvpp2_prs_init_from_hw(port->priv, &pe, tid); + __mvpp2_prs_init_from_hw(port->priv, &pe, tid); mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]); mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]); @@ -1970,6 +1991,8 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + /* Scan TCAM and see if entry with this already exist */ tid = mvpp2_prs_vid_range_find(port, vid, mask); @@ -1988,8 +2011,10 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) MVPP2_PRS_VLAN_FILT_MAX_ENTRY); /* There isn't room for a new VID filter */ - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&priv->prs_spinlock); return tid; + } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); pe.index = tid; @@ -1997,7 +2022,7 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) /* Mask all ports */ mvpp2_prs_tcam_port_map_set(&pe, 0); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } /* Enable the current port */ @@ -2019,6 +2044,7 @@ int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); mvpp2_prs_hw_write(priv, &pe); + spin_unlock_bh(&priv->prs_spinlock); return 0; } @@ -2028,15 +2054,16 @@ void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid) struct mvpp2 *priv = port->priv; int tid; - /* Scan TCAM and see if entry with this already exist */ - tid = mvpp2_prs_vid_range_find(port, vid, 0xfff); + spin_lock_bh(&priv->prs_spinlock); - /* No such entry */ - if (tid < 0) - return; + /* Invalidate TCAM entry with this , if it exists */ + tid = mvpp2_prs_vid_range_find(port, vid, 0xfff); + if (tid >= 0) { + mvpp2_prs_hw_inv(priv, tid); + priv->prs_shadow[tid].valid = false; + } - mvpp2_prs_hw_inv(priv, tid); - priv->prs_shadow[tid].valid = false; + spin_unlock_bh(&priv->prs_spinlock); } /* Remove all existing VID filters on this port */ @@ -2045,6 +2072,8 @@ void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; int tid; + spin_lock_bh(&priv->prs_spinlock); + for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { if (priv->prs_shadow[tid].valid) { @@ -2052,6 +2081,8 @@ void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) priv->prs_shadow[tid].valid = false; } } + + spin_unlock_bh(&priv->prs_spinlock); } /* Remove VID filering entry for this port */ @@ -2060,10 +2091,14 @@ void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port) unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); struct mvpp2 *priv = port->priv; + spin_lock_bh(&priv->prs_spinlock); + /* Invalidate the guard entry */ mvpp2_prs_hw_inv(priv, tid); priv->prs_shadow[tid].valid = false; + + spin_unlock_bh(&priv->prs_spinlock); } /* Add guard entry that drops packets when no VID is matched on this port */ @@ -2079,6 +2114,8 @@ void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + pe.index = tid; reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); @@ -2111,6 +2148,8 @@ void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) /* Update shadow table */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); mvpp2_prs_hw_write(priv, &pe); + + spin_unlock_bh(&priv->prs_spinlock); } /* Parser default initialization */ @@ -2118,6 +2157,20 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) { int err, index, i; + priv->prs_shadow = devm_kcalloc(&pdev->dev, MVPP2_PRS_TCAM_SRAM_SIZE, + sizeof(*priv->prs_shadow), + GFP_KERNEL); + if (!priv->prs_shadow) + return -ENOMEM; + + priv->prs_double_vlans = devm_kcalloc(&pdev->dev, sizeof(bool), + MVPP2_PRS_DBL_VLANS_MAX, + GFP_KERNEL); + if (!priv->prs_double_vlans) + return -ENOMEM; + + spin_lock_bh(&priv->prs_spinlock); + /* Enable tcam table */ mvpp2_write(priv, MVPP2_PRS_TCAM_CTRL_REG, MVPP2_PRS_TCAM_EN_MASK); @@ -2136,12 +2189,6 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) for (index = 0; index < MVPP2_PRS_TCAM_SRAM_SIZE; index++) mvpp2_prs_hw_inv(priv, index); - priv->prs_shadow = devm_kcalloc(&pdev->dev, MVPP2_PRS_TCAM_SRAM_SIZE, - sizeof(*priv->prs_shadow), - GFP_KERNEL); - if (!priv->prs_shadow) - return -ENOMEM; - /* Always start from lookup = 0 */ for (index = 0; index < MVPP2_MAX_PORTS; index++) mvpp2_prs_hw_port_init(priv, index, MVPP2_PRS_LU_MH, @@ -2158,26 +2205,13 @@ int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) mvpp2_prs_vid_init(priv); err = mvpp2_prs_etype_init(priv); - if (err) - return err; - - err = mvpp2_prs_vlan_init(pdev, priv); - if (err) - return err; - - err = mvpp2_prs_pppoe_init(priv); - if (err) - return err; - - err = mvpp2_prs_ip6_init(priv); - if (err) - return err; - - err = mvpp2_prs_ip4_init(priv); - if (err) - return err; + err = err ? : mvpp2_prs_vlan_init(pdev, priv); + err = err ? : mvpp2_prs_pppoe_init(priv); + err = err ? : mvpp2_prs_ip6_init(priv); + err = err ? : mvpp2_prs_ip4_init(priv); - return 0; + spin_unlock_bh(&priv->prs_spinlock); + return err; } /* Compare MAC DA with tcam entry data */ @@ -2217,7 +2251,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, (priv->prs_shadow[tid].udf != udf_type)) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); entry_pmap = mvpp2_prs_tcam_port_map_get(&pe); if (mvpp2_prs_mac_range_equals(&pe, da, mask) && @@ -2229,7 +2263,8 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da, } /* Update parser's mac da entry */ -int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) +static int __mvpp2_prs_mac_da_accept(struct mvpp2_port *port, + const u8 *da, bool add) { unsigned char mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; struct mvpp2 *priv = port->priv; @@ -2261,7 +2296,7 @@ int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) /* Mask all ports */ mvpp2_prs_tcam_port_map_set(&pe, 0); } else { - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); @@ -2317,6 +2352,17 @@ int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) return 0; } +int mvpp2_prs_mac_da_accept(struct mvpp2_port *port, const u8 *da, bool add) +{ + int err; + + spin_lock_bh(&port->priv->prs_spinlock); + err = __mvpp2_prs_mac_da_accept(port, da, add); + spin_unlock_bh(&port->priv->prs_spinlock); + + return err; +} + int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da) { struct mvpp2_port *port = netdev_priv(dev); @@ -2345,6 +2391,8 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) unsigned long pmap; int index, tid; + spin_lock_bh(&priv->prs_spinlock); + for (tid = MVPP2_PE_MAC_RANGE_START; tid <= MVPP2_PE_MAC_RANGE_END; tid++) { unsigned char da[ETH_ALEN], da_mask[ETH_ALEN]; @@ -2354,7 +2402,7 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) (priv->prs_shadow[tid].udf != MVPP2_PRS_UDF_MAC_DEF)) continue; - mvpp2_prs_init_from_hw(priv, &pe, tid); + __mvpp2_prs_init_from_hw(priv, &pe, tid); pmap = mvpp2_prs_tcam_port_map_get(&pe); @@ -2375,14 +2423,17 @@ void mvpp2_prs_mac_del_all(struct mvpp2_port *port) continue; /* Remove entry from TCAM */ - mvpp2_prs_mac_da_accept(port, da, false); + __mvpp2_prs_mac_da_accept(port, da, false); } + + spin_unlock_bh(&priv->prs_spinlock); } int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) { switch (type) { case MVPP2_TAG_TYPE_EDSA: + spin_lock_bh(&priv->prs_spinlock); /* Add port to EDSA entries */ mvpp2_prs_dsa_tag_set(priv, port, true, MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); @@ -2393,9 +2444,11 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_DSA); + spin_unlock_bh(&priv->prs_spinlock); break; case MVPP2_TAG_TYPE_DSA: + spin_lock_bh(&priv->prs_spinlock); /* Add port to DSA entries */ mvpp2_prs_dsa_tag_set(priv, port, true, MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); @@ -2406,10 +2459,12 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA); + spin_unlock_bh(&priv->prs_spinlock); break; case MVPP2_TAG_TYPE_MH: case MVPP2_TAG_TYPE_NONE: + spin_lock_bh(&priv->prs_spinlock); /* Remove port form EDSA and DSA entries */ mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_TAGGED, MVPP2_PRS_DSA); @@ -2419,6 +2474,7 @@ int mvpp2_prs_tag_mode_set(struct mvpp2 *priv, int port, int type) MVPP2_PRS_TAGGED, MVPP2_PRS_EDSA); mvpp2_prs_dsa_tag_set(priv, port, false, MVPP2_PRS_UNTAGGED, MVPP2_PRS_EDSA); + spin_unlock_bh(&priv->prs_spinlock); break; default: @@ -2437,11 +2493,15 @@ int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&priv->prs_spinlock); + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_LAST_FREE_TID, MVPP2_PE_FIRST_FREE_TID); - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&priv->prs_spinlock); return tid; + } pe.index = tid; @@ -2461,6 +2521,7 @@ int mvpp2_prs_add_flow(struct mvpp2 *priv, int flow, u32 ri, u32 ri_mask) mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); mvpp2_prs_hw_write(priv, &pe); + spin_unlock_bh(&priv->prs_spinlock); return 0; } @@ -2472,6 +2533,8 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) memset(&pe, 0, sizeof(pe)); + spin_lock_bh(&port->priv->prs_spinlock); + tid = mvpp2_prs_flow_find(port->priv, port->id); /* Such entry not exist */ @@ -2480,8 +2543,10 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) tid = mvpp2_prs_tcam_first_free(port->priv, MVPP2_PE_LAST_FREE_TID, MVPP2_PE_FIRST_FREE_TID); - if (tid < 0) + if (tid < 0) { + spin_unlock_bh(&port->priv->prs_spinlock); return tid; + } pe.index = tid; @@ -2492,13 +2557,14 @@ int mvpp2_prs_def_flow(struct mvpp2_port *port) /* Update shadow table */ mvpp2_prs_shadow_set(port->priv, pe.index, MVPP2_PRS_LU_FLOWS); } else { - mvpp2_prs_init_from_hw(port->priv, &pe, tid); + __mvpp2_prs_init_from_hw(port->priv, &pe, tid); } mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_tcam_port_map_set(&pe, (1 << port->id)); mvpp2_prs_hw_write(port->priv, &pe); + spin_unlock_bh(&port->priv->prs_spinlock); return 0; } @@ -2509,11 +2575,14 @@ int mvpp2_prs_hits(struct mvpp2 *priv, int index) if (index > MVPP2_PRS_TCAM_SRAM_SIZE) return -EINVAL; + spin_lock_bh(&priv->prs_spinlock); + mvpp2_write(priv, MVPP2_PRS_TCAM_HIT_IDX_REG, index); val = mvpp2_read(priv, MVPP2_PRS_TCAM_HIT_CNT_REG); val &= MVPP2_PRS_TCAM_HIT_CNT_MASK; + spin_unlock_bh(&priv->prs_spinlock); return val; } -- 2.51.0