From: Eric Dumazet <edumazet@google.com> Date: Mon, 31 Mar 2025 07:59:46 +0000 (+0000) Subject: Revert "tcp: avoid atomic operations on sk->sk_rmem_alloc" X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=f278b6d5bb465c7fd66f3d103812947e55b376ed;p=users%2Fdwmw2%2Flinux.git Revert "tcp: avoid atomic operations on sk->sk_rmem_alloc" This reverts commit 0de2a5c4b824da2205658ebebb99a55c43cdf60f. I forgot that a TCP socket could receive messages in its error queue. sock_queue_err_skb() can be called without socket lock being held, and changes sk->sk_rmem_alloc. The fact that skbs in error queue are limited by sk->sk_rcvbuf means that error messages can be dropped if socket receive queues are full, which is an orthogonal issue. In future kernels, we could use a separate sk->sk_error_mem_alloc counter specifically for the error queue. Fixes: 0de2a5c4b824 ("tcp: avoid atomic operations on sk->sk_rmem_alloc") Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://patch.msgid.link/20250331075946.31960-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- diff --git a/include/net/tcp.h b/include/net/tcp.h index df04dc09c519d..4450c384ef178 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,7 +779,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) /* tcp.c */ void tcp_get_info(struct sock *, struct tcp_info *); -void tcp_sock_rfree(struct sk_buff *skb); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, @@ -2899,18 +2898,4 @@ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const void *saddr, const void *daddr, int family, int dif, int sdif); -/* version of skb_set_owner_r() avoiding one atomic_add() */ -static inline void tcp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb_orphan(skb); - skb->sk = sk; - skb->destructor = tcp_sock_rfree; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) + skb->truesize); - - sk_forward_alloc_add(sk, -skb->truesize); -} - #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ea8de00f669d0..6edc441b37023 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1525,25 +1525,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) __tcp_cleanup_rbuf(sk, copied); } -/* private version of sock_rfree() avoiding one atomic_sub() */ -void tcp_sock_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - unsigned int len = skb->truesize; - - sock_owned_by_me(sk); - atomic_set(&sk->sk_rmem_alloc, - atomic_read(&sk->sk_rmem_alloc) - len); - - sk_forward_alloc_add(sk, len); - sk_mem_reclaim(sk); -} - static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb) { __skb_unlink(skb, &sk->sk_receive_queue); - if (likely(skb->destructor == tcp_sock_rfree)) { - tcp_sock_rfree(skb); + if (likely(skb->destructor == sock_rfree)) { + sock_rfree(skb); skb->destructor = NULL; skb->sk = NULL; return skb_attempt_defer_free(skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ca40665145c69..1a6b1bc542451 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -189,7 +189,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); sk_forced_mem_schedule(sk, skb->truesize); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); TCP_SKB_CB(skb)->seq++; TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e1f952fbac48d..a35018e2d0ba2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5171,7 +5171,7 @@ end: if (tcp_is_sack(tp)) tcp_grow_window(sk, skb, false); skb_condense(skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } } @@ -5187,7 +5187,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { tcp_add_receive_queue(sk, skb); - tcp_skb_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); } return eaten; } @@ -5504,7 +5504,7 @@ skip_this: __skb_queue_before(list, skb, nskb); else __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ - tcp_skb_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); mptcp_skb_ext_move(nskb, skb); /* Copy data, releasing collapsed skbs. */