Yong-Hao Zou mentioned that linux was not strict as other OS in 3WHS,
for flows using TCP TS option (RFC 7323)
As hinted by an old comment in tcp_check_req(),
we can check the TSEcr value in the incoming packet corresponds
to one of the SYNACK TSval values we have sent.
In this patch, I record the oldest and most recent values
that SYNACK packets have used.
Send a challenge ACK if we receive a TSEcr outside
of this range, and increase a new SNMP counter.
nstat -az | grep TSEcrRejected
TcpExtTSEcrRejected            0                  0.0
Due to TCP fastopen implementation, do not apply yet these checks
for fastopen flows.
v2: No longer use req->num_timeout, but treq->snt_tsval_first
    to detect when first SYNACK is prepared. This means
    we make sure to not send an initial zero TSval.
    Make sure MPTCP and TCP selftests are passing.
    Change MIB name to TcpExtTSEcrRejected
v1: https://lore.kernel.org/netdev/CADVnQykD8i4ArpSZaPKaoNxLJ2if2ts9m4As+=Jvdkrgx1qMHw@mail.gmail.com/T/
Reported-by: Yong-Hao Zou <yonghaoz1994@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250225171048.3105061-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
 unsigned_long  LINUX_MIB_TIMEWAITKILLED
 unsigned_long  LINUX_MIB_PAWSACTIVEREJECTED
 unsigned_long  LINUX_MIB_PAWSESTABREJECTED
+unsigned_long  LINUX_MIB_TSECR_REJECTED
 unsigned_long  LINUX_MIB_DELAYEDACKLOST
 unsigned_long  LINUX_MIB_LISTENOVERFLOWS
 unsigned_long  LINUX_MIB_LISTENDROPS
 
        u32                             rcv_isn;
        u32                             snt_isn;
        u32                             ts_off;
+       u32                             snt_tsval_first;
+       u32                             snt_tsval_last;
        u32                             last_oow_ack_time; /* last SYNACK */
        u32                             rcv_nxt; /* the ack # by SYNACK. For
                                                  * FastOpen it's the seq#
 
        LINUX_MIB_TIMEWAITKILLED,               /* TimeWaitKilled */
        LINUX_MIB_PAWSACTIVEREJECTED,           /* PAWSActiveRejected */
        LINUX_MIB_PAWSESTABREJECTED,            /* PAWSEstabRejected */
+       LINUX_MIB_TSECRREJECTED,                /* TSEcrRejected */
        LINUX_MIB_PAWS_OLD_ACK,                 /* PAWSOldAck */
        LINUX_MIB_DELAYEDACKS,                  /* DelayedACKs */
        LINUX_MIB_DELAYEDACKLOCKED,             /* DelayedACKLocked */
 
        SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
        SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
        SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+       SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
        SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
        SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
        SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
 
                ireq->smc_ok = 0;
 
        treq->snt_synack = 0;
+       treq->snt_tsval_first = 0;
        treq->tfo_listener = false;
        treq->txhash = net_tx_rndhash();
        treq->rcv_isn = ntohl(th->seq) - 1;
 
        tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
        tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
        tcp_rsk(req)->snt_synack = 0;
+       tcp_rsk(req)->snt_tsval_first = 0;
        tcp_rsk(req)->last_oow_ack_time = 0;
        req->mss = rx_opt->mss_clamp;
        req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
 
        struct sock *child;
        const struct tcphdr *th = tcp_hdr(skb);
        __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+       bool tsecr_reject = false;
        bool paws_reject = false;
        bool own_req;
 
 
                if (tmp_opt.saw_tstamp) {
                        tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
-                       if (tmp_opt.rcv_tsecr)
+                       if (tmp_opt.rcv_tsecr) {
+                               if (inet_rsk(req)->tstamp_ok && !fastopen)
+                                       tsecr_reject = !between(tmp_opt.rcv_tsecr,
+                                                       tcp_rsk(req)->snt_tsval_first,
+                                                       READ_ONCE(tcp_rsk(req)->snt_tsval_last));
                                tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+                       }
                        /* We do not store true stamp, but it is not required,
                         * it can be estimated (approximately)
                         * from another data.
             tcp_rsk(req)->snt_isn + 1))
                return sk;
 
-       /* Also, it would be not so bad idea to check rcv_tsecr, which
-        * is essentially ACK extension and too early or too late values
-        * should cause reset in unsynchronized states.
-        */
-
        /* RFC793: "first check sequence number". */
 
-       if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
-                                         TCP_SKB_CB(skb)->end_seq,
-                                         tcp_rsk(req)->rcv_nxt,
-                                         tcp_rsk(req)->rcv_nxt +
-                                         tcp_synack_window(req))) {
+       if (paws_reject || tsecr_reject ||
+           !tcp_in_window(TCP_SKB_CB(skb)->seq,
+                          TCP_SKB_CB(skb)->end_seq,
+                          tcp_rsk(req)->rcv_nxt,
+                          tcp_rsk(req)->rcv_nxt +
+                          tcp_synack_window(req))) {
                /* Out of window: send ACK and drop. */
                if (!(flg & TCP_FLAG_RST) &&
                    !tcp_oow_rate_limited(sock_net(sk), skb,
                        req->rsk_ops->send_ack(sk, skb, req);
                if (paws_reject)
                        NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+               else if (tsecr_reject)
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECRREJECTED);
                return NULL;
        }
 
 
                opts->options |= OPTION_TS;
                opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
                              tcp_rsk(req)->ts_off;
+               if (!tcp_rsk(req)->snt_tsval_first) {
+                       if (!opts->tsval)
+                               opts->tsval = ~0U;
+                       tcp_rsk(req)->snt_tsval_first = opts->tsval;
+               }
+               WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval);
                opts->tsecr = READ_ONCE(req->ts_recent);
                remaining -= TCPOLEN_TSTAMP_ALIGNED;
        }