]> www.infradead.org Git - users/willy/xarray.git/commitdiff
tcp: avoid too many retransmit packets
authorEric Dumazet <edumazet@google.com>
Wed, 10 Jul 2024 00:14:01 +0000 (00:14 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 11 Jul 2024 02:05:27 +0000 (19:05 -0700)
If a TCP socket is using TCP_USER_TIMEOUT, and the other peer
retracted its window to zero, tcp_retransmit_timer() can
retransmit a packet every two jiffies (2 ms for HZ=1000),
for about 4 minutes after TCP_USER_TIMEOUT has 'expired'.

The fix is to make sure tcp_rtx_probe0_timed_out() takes
icsk->icsk_user_timeout into account.

Before blamed commit, the socket would not timeout after
icsk->icsk_user_timeout, but would use standard exponential
backoff for the retransmits.

Also worth noting that before commit e89688e3e978 ("net: tcp:
fix unexcepted socket die when snd_wnd is 0"), the issue
would last 2 minutes instead of 4.

Fixes: b701a99e431d ("tcp: Add tcp_clamp_rto_to_user_timeout() helper to improve accuracy")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Jon Maxwell <jmaxwell37@gmail.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20240710001402.2758273-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/tcp_timer.c

index db9d826560e57caf8274d1b7253c7af4dd7821a0..892c86657fbc243ce53a939157b77f1fe0410097 100644 (file)
@@ -483,15 +483,26 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
                                     const struct sk_buff *skb,
                                     u32 rtx_delta)
 {
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
        const struct tcp_sock *tp = tcp_sk(sk);
-       const int timeout = TCP_RTO_MAX * 2;
+       int timeout = TCP_RTO_MAX * 2;
        s32 rcv_delta;
 
+       if (user_timeout) {
+               /* If user application specified a TCP_USER_TIMEOUT,
+                * it does not want win 0 packets to 'reset the timer'
+                * while retransmits are not making progress.
+                */
+               if (rtx_delta > user_timeout)
+                       return true;
+               timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
+       }
        /* Note: timer interrupt might have been delayed by at least one jiffy,
         * and tp->rcv_tstamp might very well have been written recently.
         * rcv_delta can thus be negative.
         */
-       rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
+       rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
        if (rcv_delta <= timeout)
                return false;