Add validation points and state propagation to support PSP key
exchange inline, on TCP connections. The expectation is that
application will use some well established mechanism like TLS
handshake to establish a secure channel over the connection and
if both endpoints are PSP-capable - exchange and install PSP keys.
Because the connection can existing in PSP-unsecured and PSP-secured
state we need to make sure that there are no race conditions or
retransmission leaks.
On Tx - mark packets with the skb->decrypted bit when PSP key
is at the enqueue time. Drivers should only encrypt packets with
this bit set. This prevents retransmissions getting encrypted when
original transmission was not. Similarly to TLS, we'll use
sk->sk_validate_xmit_skb to make sure PSP skbs can't "escape"
via a PSP-unaware device without being encrypted.
On Rx - validation is done under socket lock. This moves the validation
point later than xfrm, for example. Please see the documentation patch
for more details on the flow of securing a connection, but for
the purpose of this patch what's important is that we want to
enforce the invariant that once connection is secured any skb
in the receive queue has been encrypted with PSP.
Add GRO and coalescing checks to prevent PSP authenticated data from
being combined with cleartext data, or data with non-matching PSP
state. On Rx, check skb's with psp_skb_coalesce_diff() at points
before psp_sk_rx_policy_check(). After skb's are policy checked and on
the socket receive queue, skb_cmp_decrypted() is sufficient for
checking for coalescable PSP state. On Tx, tcp_write_collapse_fence()
should be called when transitioning a socket into PSP Tx state to
prevent data sent as cleartext from being coalesced with PSP
encapsulated data.
This change only adds the validation points, for ease of review.
Subsequent change will add the ability to install keys, and flesh
the enforcement logic out
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Co-developed-by: Daniel Zahka <daniel.zahka@gmail.com>
Signed-off-by: Daniel Zahka <daniel.zahka@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250917000954.859376-5-daniel.zahka@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
FN(CANXL_RX_INVALID_FRAME) \
FN(PFMEMALLOC) \
FN(DUALPI2_STEP_DROP) \
+ FN(PSP_INPUT) \
+ FN(PSP_OUTPUT) \
FNe(MAX)
/**
* threshold of DualPI2 qdisc.
*/
SKB_DROP_REASON_DUALPI2_STEP_DROP,
+ /** @SKB_DROP_REASON_PSP_INPUT: PSP input checks failed */
+ SKB_DROP_REASON_PSP_INPUT,
+ /** @SKB_DROP_REASON_PSP_OUTPUT: PSP output checks failed */
+ SKB_DROP_REASON_PSP_OUTPUT,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
#ifndef __NET_PSP_HELPERS_H
#define __NET_PSP_HELPERS_H
+#include <linux/skbuff.h>
+#include <net/sock.h>
#include <net/psp/types.h>
struct inet_timewait_sock;
void psp_dev_unregister(struct psp_dev *psd);
/* Kernel-facing API */
+#if IS_ENABLED(CONFIG_INET_PSP)
static inline void psp_sk_assoc_free(struct sock *sk) { }
+static inline void
+psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { }
static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { }
+static inline void
+psp_reply_set_decrypted(struct sk_buff *skb) { }
+
+static inline void
+psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+}
+
+static inline unsigned long
+__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two,
+ unsigned long diffs)
+{
+ return diffs;
+}
+
+static inline enum skb_drop_reason
+psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline enum skb_drop_reason
+psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb)
+{
+ return NULL;
+}
+#else
+static inline void psp_sk_assoc_free(struct sock *sk) { }
+static inline void
+psp_twsk_init(struct inet_timewait_sock *tw, const struct sock *sk) { }
+static inline void psp_twsk_assoc_free(struct inet_timewait_sock *tw) { }
+static inline void
+psp_reply_set_decrypted(struct sk_buff *skb) { }
+
+static inline void
+psp_enqueue_set_decrypted(struct sock *sk, struct sk_buff *skb) { }
+
+static inline unsigned long
+__psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two,
+ unsigned long diffs)
+{
+ return diffs;
+}
+
+static inline enum skb_drop_reason
+psp_sk_rx_policy_check(struct sock *sk, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline enum skb_drop_reason
+psp_twsk_rx_policy_check(struct inet_timewait_sock *tw, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline struct psp_assoc *psp_skb_get_assoc_rcu(struct sk_buff *skb)
+{
+ return NULL;
+}
+#endif
+
+static inline unsigned long
+psp_skb_coalesce_diff(const struct sk_buff *one, const struct sk_buff *two)
+{
+ return __psp_skb_coalesce_diff(one, two, 0);
+}
#endif /* __NET_PSP_HELPERS_H */
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <net/psp.h>
#include <net/gro.h>
#include <net/dst_metadata.h>
#include <net/busy_poll.h>
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
diffs |= gro_list_prepare_tc_ext(skb, p, diffs);
+ diffs |= __psp_skb_coalesce_diff(skb, p, diffs);
}
NAPI_GRO_CB(p)->same_flow = !diffs;
#include <net/inet_timewait_sock.h>
#include <net/ip.h>
#include <net/tcp.h>
+#include <net/psp.h>
/**
* inet_twsk_bind_unhash - unhash a timewait socket from bind hash
refcount_set(&tw->tw_refcnt, 0);
__module_get(tw->tw_prot->owner);
+ psp_twsk_init(tw, sk);
}
return tw;
#include <linux/netfilter_bridge.h>
#include <linux/netlink.h>
#include <linux/tcp.h>
+#include <net/psp.h>
static int
ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- if (orig_sk)
+ if (orig_sk) {
skb_set_owner_edemux(nskb, (struct sock *)orig_sk);
+ psp_reply_set_decrypted(nskb);
+ }
if (transmit_time)
nskb->tstamp_type = SKB_CLOCK_MONOTONIC;
if (txhash)
#include <net/proto_memory.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/psp.h>
#include <net/sock.h>
#include <net/rstreason.h>
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
__skb_header_release(skb);
+ psp_enqueue_set_decrypted(sk, skb);
tcp_add_write_queue_tail(sk, skb);
sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
#include <net/secure_seq.h>
#include <net/busy_poll.h>
#include <net/rstreason.h>
+#include <net/psp.h>
#include <linux/inet.h>
#include <linux/ipv6.h>
enum skb_drop_reason reason;
struct sock *rsk;
+ reason = psp_sk_rx_policy_check(sk, skb);
+ if (reason)
+ goto err_discard;
+
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+err_discard:
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
(TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)) ||
!tcp_skb_can_collapse_rx(tail, skb) ||
thtail->doff != th->doff ||
- memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
+ memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)) ||
+ /* prior to PSP Rx policy check, retain exact PSP metadata */
+ psp_skb_coalesce_diff(tail, skb))
goto no_coalesce;
__skb_pull(skb, hdrlen);
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
+
+ drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
+ if (drop_reason)
+ break;
}
/* to ACK */
fallthrough;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt);
struct tcp_options_received tmp_opt;
+ enum skb_drop_reason psp_drop;
bool paws_reject = false;
int ts_recent_stamp;
+ /* Instead of dropping immediately, wait to see what value is
+ * returned. We will accept a non psp-encapsulated syn in the
+ * case where TCP_TW_SYN is returned.
+ */
+ psp_drop = psp_twsk_rx_policy_check(tw, skb);
+
tmp_opt.saw_tstamp = 0;
ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp);
if (th->doff > (sizeof(*th) >> 2) && ts_recent_stamp) {
if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
+ if (psp_drop)
+ goto out_put;
+
/* Out of window, send ACK */
if (paws_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
(TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
/* In window segment, it may be only reset or bare ack. */
+ if (psp_drop)
+ goto out_put;
+
if (th->rst) {
/* This is TIME_WAIT assassination, in two flavors.
* Oh well... nobody has a sufficient solution to this
return TCP_TW_SYN;
}
+ if (psp_drop)
+ goto out_put;
+
if (paws_reject) {
*drop_reason = SKB_DROP_REASON_TCP_RFC7323_TW_PAWS;
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWS_TW_REJECTED);
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
}
+
+out_put:
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
}
#include <net/tcp_ecn.h>
#include <net/mptcp.h>
#include <net/proto_memory.h>
+#include <net/psp.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
* auto increment end seqno.
*/
-static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u16 flags)
+static void tcp_init_nondata_skb(struct sk_buff *skb, struct sock *sk,
+ u32 seq, u16 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL;
TCP_SKB_CB(skb)->tcp_flags = flags;
tcp_skb_pcount_set(skb, 1);
+ psp_enqueue_set_decrypted(sk, skb);
TCP_SKB_CB(skb)->seq = seq;
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
/* Advance write_seq and place onto the write_queue. */
WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
__skb_header_release(skb);
+ psp_enqueue_set_decrypted(sk, skb);
tcp_add_write_queue_tail(sk, skb);
sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
skb_reserve(skb, MAX_TCP_HEADER);
sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
- tcp_init_nondata_skb(skb, tp->write_seq,
+ tcp_init_nondata_skb(skb, sk, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
tcp_queue_skb(sk, skb);
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
- tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+ tcp_init_nondata_skb(skb, sk, tcp_acceptable_seq(sk),
TCPHDR_ACK | TCPHDR_RST);
tcp_mstamp_refresh(tcp_sk(sk));
/* Send it off. */
/* SYN eats a sequence byte, write_seq updated by
* tcp_connect_queue_skb().
*/
- tcp_init_nondata_skb(buff, tp->write_seq, TCPHDR_SYN);
+ tcp_init_nondata_skb(buff, sk, tp->write_seq, TCPHDR_SYN);
tcp_mstamp_refresh(tp);
tp->retrans_stamp = tcp_time_stamp_ts(tp);
tcp_connect_queue_skb(sk, buff);
/* Reserve space for headers and prepare control bits. */
skb_reserve(buff, MAX_TCP_HEADER);
- tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK | flags);
+ tcp_init_nondata_skb(buff, sk,
+ tcp_acceptable_seq(sk), TCPHDR_ACK | flags);
/* We do not want pure acks influencing TCP Small Queues or fq/pacing
* too much.
* end to send an ack. Don't queue or clone SKB, just
* send it.
*/
- tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
+ tcp_init_nondata_skb(skb, sk, tp->snd_una - !urgent, TCPHDR_ACK);
NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
}
#include <net/hotdata.h>
#include <net/busy_poll.h>
#include <net/rstreason.h>
+#include <net/psp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
if (sk) {
/* unconstify the socket only to attach it to buff with care. */
skb_set_owner_edemux(buff, (struct sock *)sk);
+ psp_reply_set_decrypted(buff);
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
+ reason = psp_sk_rx_policy_check(sk, skb);
+ if (reason)
+ goto err_discard;
+
/*
* socket locking is here for SMP purposes as backlog rcv
* is currently called with bh processing disabled.
reason = SKB_DROP_REASON_TCP_CSUM;
trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+err_discard:
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
+
+ drop_reason = psp_twsk_rx_policy_check(inet_twsk(sk), skb);
+ if (drop_reason)
+ break;
}
/* to ACK */
fallthrough;
config INET_PSP
bool "PSP Security Protocol support"
depends on INET
+ select SKB_DECRYPTED
help
Enable kernel support for the PSP protocol.
For more information see: