From bb2b6be8283dfe91704024128f971be7903a8474 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 12 Apr 2017 22:34:56 +0100
Subject: [PATCH] DTrace tcp/udp provider probes
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

This patch adds DTrace SDT probes for the TCP and UDP protocols.  For tcp
the following probes are added:

tcp:::send		Fires when a tcp segment is transmitted
tcp:::receive		Fires when a tcp segment is received
tcp:::state-change	Fires when a tcp connection changes state
tcp:::connect-request	Fires when a SYN segment is sent
tcp:::connect-refused	Fires when a RST is received for connection attempt
tcp:::connect-established
			Fires when three-way handshake completes for
			initiator

tcp:::accept-refused	Fires when a RST is sent refusing connection attempt
tcp:::accept-established
			Fires when three-way handshake succeeds for acceptor

Arguments for all of these probes are:

arg0	struct sk_buff *; to be translated into pktinfo_t * containing
	implementation-independent packet data
arg1	struct sock *; to be translated into csinfo_t * containing
	implementation-independent connection data
arg2	__dtrace_tcp_void_ip_t *; to be translated into ipinfo_t * containing
	implementation-independent IP information.  Custom type is used as
	this gives DTrace a hint that we can source IP information from other
	arguments if the IP header is not available.
arg3	struct tcp_sock *; to be translated into tcpsinfo_t * containing
	implementation-independent TCP connection data
arg4	struct tcphdr *; to be translated into a tcpinfo_t * containing
	implementation-independent TCP header data
arg5	int representing previous state; to be translated into a
	tcplsinfo_t * which contains the previous state.  Differs from
	current state (arg6) for state-change probes only.
arg6	int representing current state.  Cannot be sourced from struct
	tcp_sock as we sometimes need to probe before state change is
	reflected there
arg7	int representing direction of traffic for probe; values are
	DTRACE_NET_PROBE_INBOUND for receipt of data and
	DTRACE_NET_PROBE_OUTBOUND for transmission.

For udp the following probes are added:

udp:::send		Fires when a udp datagram is sent
udp:::receive		Fires when a udp datagram is received

Arguments for these probes are:

arg0	struct sk_buff *; to be translated into pktinfo_t * containing
        implementation-independent packet data
arg1    struct sock *; to be translated into csinfo_t * containing
        implementation-independent connection data
arg2    void_ip_t *; to be translated into ipinfo_t * containing
        implementation-independent IP information.
arg3	struct udp_sock *; to be translated into a udpsinfo_t * containing
	implementation-independent UDP connection data
arg4	struct udphdr *; to be translated into a udpinfo_t * containing
	implementation-independent UDP header information.

Orabug: 25815197

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Reviewed-by: Rao Shoaib <rao.shoaib@oracle.com>
Acked-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
---
 include/linux/sdt.h      |  34 +++++++++++-
 net/ipv4/tcp.c           |  26 +++++++++
 net/ipv4/tcp_input.c     |  44 +++++++++++++++
 net/ipv4/tcp_ipv4.c      | 115 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_minisocks.c |  15 +++++
 net/ipv4/tcp_output.c    |  29 ++++++++++
 net/ipv4/udp.c           |  26 ++++++++-
 net/ipv6/tcp_ipv6.c      | 106 +++++++++++++++++++++++++++++++++++-
 net/ipv6/udp.c           |  23 ++++++++
 9 files changed, 414 insertions(+), 4 deletions(-)

diff --git a/include/linux/sdt.h b/include/linux/sdt.h
index bab41ce09951..d585ed612874 100644
--- a/include/linux/sdt.h
+++ b/include/linux/sdt.h
@@ -113,14 +113,46 @@ typedef struct sdt_probedesc {
 #define	DTRACE_SMB(name, ...)						\
 	DTRACE_PROBE(__smb_##name, ## __VA_ARGS__);
 
+/*
+ * These definitions are used at probe points to specify the traffic direction;
+ * this helps simplify argument translation.
+ */
+#define	DTRACE_NET_PROBE_OUTBOUND	0x0
+#define	DTRACE_NET_PROBE_INBOUND	0x1
+
 #define	DTRACE_IP(name, ...)						\
 	DTRACE_PROBE(__ip_##name, ## __VA_ARGS__);
 
+/*
+ * Default DTRACE_TCP() and DTRACE_UDP() provider definitions specify the
+ * probe point within an is-enabled predicate.  This is to avoid the overhead
+ * incurred during argument dereferencing (e.g. calls to ip_hdr(skb)), along
+ * with any conditional evaluation (which would require branching) when the
+ * probe is disabled.
+ *
+ * Because some TCP probe points require additional argument preparation,
+ * we also define the is-enabled predicate directly as
+ * DTRACE_TCP_ENABLED(probename) along with a probe point which does not
+ * the probe in an is-enabled predicate; this allows us to handle cases such
+ * as this:
+ *
+ * if (DTRACE_TCP_ENABLED(state__change)) {
+ *      ...argument preparation...
+ *      DTRACE_TCP_NOCHECK(state__change, ...);
+ * }
+ */
+
 #define	DTRACE_TCP(name, ...)						\
+	if (DTRACE_PROBE_ENABLED(__tcp_##name))				\
+		DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__)
+#define	DTRACE_TCP_ENABLED(name)					\
+	DTRACE_PROBE_ENABLED(__tcp_##name)
+#define	DTRACE_TCP_NOCHECK(name, ...)					\
 	DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__);
 
 #define	DTRACE_UDP(name, ...)						\
-	DTRACE_PROBE(__udp_##name, ## __VA_ARGS__);
+	if (DTRACE_PROBE_ENABLED(__udp_##name))				\
+		DTRACE_PROBE(__udp_##name, ## __VA_ARGS__);
 
 #define	DTRACE_SYSEVENT(name, ...)					\
 	DTRACE_PROBE(__sysevent_##name, ## __VA_ARGS__);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b5f4f5cb452e..8bdd39ba2950 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,7 @@
 #include <linux/crypto.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/sdt.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -1874,6 +1875,19 @@ recv_sndq:
 }
 EXPORT_SYMBOL(tcp_recvmsg);
 
+/* We wish to avoid instrumenting TCP state transitions to SYN_SENT as we trace
+ * those state changes later once the destination address is committed to the
+ * sk.  We also need to deal with the fact that separate timewait sockets are
+ * used to handle the TIME_WAIT state.  We do not want to trace direct
+ * transitions from CLOSING/FIN_WAIT2 -> CLOSE since they do not represent
+ * connection close, rather a transition to using the timewait socket.
+ * Accordingly skip instrumentation of transitions from CLOSING/FIN_WAIT2 to
+ * CLOSE.
+ */
+#define	REAL_STATE_CHANGE(old, new)					\
+	(old != new && new != TCP_SYN_SENT &&				\
+	((old != TCP_CLOSING && old != TCP_FIN_WAIT2) || new != TCP_CLOSE))
+
 void tcp_set_state(struct sock *sk, int state)
 {
 	int oldstate = sk->sk_state;
@@ -1903,6 +1917,18 @@ void tcp_set_state(struct sock *sk, int state)
 	 */
 	sk->sk_state = state;
 
+	if (DTRACE_TCP_ENABLED(state__change) &&
+	    REAL_STATE_CHANGE(oldstate, state))
+		DTRACE_TCP_NOCHECK(state__change,
+				   struct sk_buff * : pktinfo_t *, NULL,
+				   struct sock * : csinfo_t *, sk,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *, NULL,
+				   struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+				   struct tcphdr * : tcpinfo_t *, NULL,
+				   int : tcplsinfo_t *, oldstate,
+				   int : int, state,
+				   int : int, DTRACE_NET_PROBE_OUTBOUND);
+
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
 #endif
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7b1fc955b6b6..76d24c510c66 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,6 +75,7 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <linux/errqueue.h>
+#include <linux/sdt.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -5377,6 +5378,14 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
+	DTRACE_TCP(connect__established,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct tcp_sock * : tcpsinfo_t *, tp,
+		   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+		   int : tcplsinfo_t *, TCP_ESTABLISHED,
+		   int, TCP_ESTABLISHED, int, DTRACE_NET_PROBE_INBOUND);
 	tcp_set_state(sk, TCP_ESTABLISHED);
 
 	if (skb) {
@@ -5507,6 +5516,17 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		if (th->rst) {
+			DTRACE_TCP(connect__refused,
+				   struct sk_buff * : pktinfo_t *, skb,
+				   struct sock * : csinfo_t *, sk,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *,
+				   ip_hdr(skb),
+				   struct tcp_sock * : tcpsinfo_t *, tp,
+				   struct tcphdr * : tcpinfo_t *, th,
+				   int : tcplsinfo_t *,
+				   sk ? sk->sk_state : TCP_CLOSE,
+				   int, sk ? sk->sk_state : TCP_CLOSE,
+				   int, DTRACE_NET_PROBE_INBOUND);
 			tcp_reset(sk);
 			goto discard;
 		}
@@ -5798,6 +5818,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			tcp_init_buffer_space(sk);
 		}
 		smp_mb();
+
+		DTRACE_TCP(accept__established,
+			   struct sk_buff * :  pktinfo_t *, skb,
+			   struct sock * : csinfo_t *, sk,
+			   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+			   struct tcp_sock * : tcpsinfo_t *, tp,
+			   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+			   int : tcplsinfo_t *, TCP_ESTABLISHED,
+			   int, TCP_ESTABLISHED,
+			   int, DTRACE_NET_PROBE_INBOUND);
 		tcp_set_state(sk, TCP_ESTABLISHED);
 		sk->sk_state_change(sk);
 
@@ -6208,6 +6238,20 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		   tcp_try_fastopen(sk, skb, req, &foc, dst);
 	err = af_ops->send_synack(sk, dst, &fl, req,
 				  skb_get_queue_mapping(skb), &foc);
+
+	/* Do not pass in tcp sock as ports/addresses are not yet
+	 * populated - instead translators will fill them in from
+	 * skb data.
+	 */
+	DTRACE_TCP(state__change,
+		   struct sk_buff * : pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct tcp_sock * : tcpsinfo_t *, NULL,
+		   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+		   int : tcplsinfo_t *, TCP_LISTEN,
+		   int, TCP_SYN_RECV, int, DTRACE_NET_PROBE_INBOUND);
+
 	if (!fastopen) {
 		if (err || want_cookie)
 			goto drop_and_free;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index aa4d525bbce8..f8ecc36ce190 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
 
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <linux/sdt.h>
 
 int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
@@ -563,6 +564,21 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_v4_send_check);
 
+/* Since we want to trace send events in TCP prior to pushing the segment to
+ * IP - where the IP header is added - we need to construct an argument
+ * containing relevant IP info so that TCP probe consumers can utilize it.
+ */
+static inline void dtrace_tcp_build_iphdr(__be32 saddr, __be32 daddr,
+					  struct iphdr *iph)
+{
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->tot_len = 5;
+	iph->protocol = IPPROTO_TCP;
+	iph->saddr = saddr;
+	iph->daddr = daddr;
+}
+
 /*
  *	This routine will send an RST to the other tcp.
  *
@@ -684,6 +700,39 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 		arg.bound_dev_if = sk->sk_bound_dev_if;
 
 	arg.tos = ip_hdr(skb)->tos;
+
+	if (DTRACE_TCP_ENABLED(send) ||
+	    DTRACE_TCP_ENABLED(accept__refused)) {
+		struct iphdr iph;
+
+		dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
+				       &iph);
+
+		DTRACE_TCP_NOCHECK(send,
+				   struct sk_buff * : pktinfo_t *, NULL,
+				   struct sock * : csinfo_t *, NULL,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph,
+				   struct tcp_sock * : tcpsinfo_t *, NULL,
+				   struct tcphdr * : tcpinfo_t *, &rep.th,
+				   int : tcplsinfo_t *, TCP_CLOSE,
+				   int : int, TCP_CLOSE,
+				   int : int, DTRACE_NET_PROBE_OUTBOUND);
+		if (th->syn && rep.th.seq == 0)
+			DTRACE_TCP_NOCHECK(accept__refused,
+					   struct sk_buff * : pktinfo_t *, NULL,
+					   struct sock * : csinfo_t *, NULL,
+					   __dtrace_tcp_void_ip_t * :
+					   ipinfo_t *, &iph,
+					   struct tcp_sock * : tcpsinfo_t *,
+					   NULL,
+					   struct tcphdr * : tcpinfo_t *,
+					   &rep.th,
+					   int : tcplsinfo_t *, TCP_CLOSE,
+					   int : int, TCP_CLOSE,
+					   int : int,
+					   DTRACE_NET_PROBE_OUTBOUND);
+	}
+
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
@@ -769,6 +818,24 @@ static void tcp_v4_send_ack(struct net *net,
 	if (oif)
 		arg.bound_dev_if = oif;
 	arg.tos = tos;
+
+	if (DTRACE_TCP_ENABLED(send)) {
+		struct iphdr iph;
+
+		dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
+				       &iph);
+
+		DTRACE_TCP_NOCHECK(send,
+				   struct sk_buff * : pktinfo_t *, NULL,
+				   struct sock * : csinfo_t *, NULL,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph,
+				   struct tcp_sock * : tcpsinfo_t *, NULL,
+				   struct tcphdr * : tcpinfo_t *, &rep,
+				   int : tcplsinfo_t *, TCP_CLOSE,
+				   int : int, TCP_CLOSE,
+				   int : int, DTRACE_NET_PROBE_OUTBOUND);
+	}
+
 	ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
 			      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
@@ -842,6 +909,31 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
 		skb_set_queue_mapping(skb, queue_mapping);
+
+		if (DTRACE_TCP_ENABLED(send)) {
+			struct iphdr iph;
+
+			dtrace_tcp_build_iphdr(ireq->ir_loc_addr,
+					       ireq->ir_rmt_addr, &iph);
+
+			/* Do not supply tcp sk - addresses/ports are not
+			 * committed yet - instead translators will fill them
+			 * in from skb/IP info.
+			 */
+			DTRACE_TCP_NOCHECK(send,
+					   struct sk_buff * :  pktinfo_t *, skb,
+					   struct sock * : csinfo_t *, sk,
+					   __dtrace_tcp_void_ip_t * :
+					   ipinfo_t *, &iph,
+					   struct tcp_sock * : tcpsinfo_t *,
+					   NULL,
+					   struct tcphdr * : tcpinfo_t *,
+					   tcp_hdr(skb),
+					   int : tcplsinfo_t *, TCP_LISTEN,
+					   int, TCP_LISTEN,
+					   int, DTRACE_NET_PROBE_OUTBOUND);
+		}
+
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
 					    ireq->opt);
@@ -1547,7 +1639,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
 	const struct tcphdr *th;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	int ret;
 	struct net *net = dev_net(skb->dev);
 
@@ -1597,6 +1689,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	DTRACE_TCP(receive,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+		   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+		   int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE,
+		   int, sk ? sk->sk_state : TCP_CLOSE,
+		   int, DTRACE_NET_PROBE_INBOUND);
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
@@ -1660,6 +1761,18 @@ bad_packet:
 
 discard_it:
 	/* Discard frame. */
+	if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST)
+		DTRACE_TCP_NOCHECK(receive,
+				   struct sk_buff * :  pktinfo_t *, skb,
+				   struct sock * : csinfo_t *, sk,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *,
+				   ip_hdr(skb),
+				   struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+				   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+				   int : tcplsinfo_t *,
+				   sk ? sk->sk_state : TCP_CLOSE,
+				   int, sk ? sk->sk_state : TCP_CLOSE,
+				   int, DTRACE_NET_PROBE_INBOUND);
 	kfree_skb(skb);
 	return 0;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index fec2907b85e8..00ba8e607eff 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
+#include <linux/sdt.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
 #include <net/xfrm.h>
@@ -340,6 +341,20 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		/* Linkage updates. */
 		__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
 		inet_twsk_put(tw);
+
+		if (DTRACE_TCP_ENABLED(state__change) &&
+		    state != sk->sk_state)
+			DTRACE_TCP_NOCHECK(state__change,
+					   struct sk_buff * : pktinfo_t *, NULL,
+					   struct sock * : csinfo_t *, sk,
+					   __dtrace_tcp_void_ip_t * :
+					   ipinfo_t *, NULL,
+					   struct tcp_sock * : tcpsinfo_t *,
+					   tcp_sk(sk),
+					   struct tcphdr * : tcpinfo_t *, NULL,
+					   int : tcplsinfo_t *, sk->sk_state,
+					   int, state,
+					   int, DTRACE_NET_PROBE_OUTBOUND);
 	} else {
 		/* Sorry, if we're out of memory, just CLOSE this
 		 * socket up.  We've got bigger problems than
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1ea4322c3b0c..841fa0fd36fc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,6 +41,7 @@
 #include <linux/compiler.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/sdt.h>
 
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -1014,6 +1015,27 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (skb->len != tcp_header_size)
 		tcp_event_data_sent(tp, sk);
 
+	DTRACE_TCP(send,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct tcp_sock * : tcpsinfo_t *, tp,
+		   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+		   int : tcplsinfo_t *, sk->sk_state, int, sk->sk_state,
+		   int, DTRACE_NET_PROBE_OUTBOUND);
+	if (DTRACE_TCP_ENABLED(connect__request) && th->syn &&
+	    th->ack_seq == 0)
+		DTRACE_TCP_NOCHECK(connect__request,
+				   struct sk_buff * : pktinfo_t *, skb,
+				   struct sock * : csinfo_t *, sk,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *,
+				   ip_hdr(skb),
+				   struct tcp_sock * : tcpsinfo_t *, tp,
+				   struct tcphdr * : tcpinfo_t *, th,
+				   int : tcplsinfo_t *, sk->sk_state,
+				   int, sk->sk_state,
+				   int, DTRACE_NET_PROBE_OUTBOUND);
+
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
 			      tcp_skb_pcount(skb));
@@ -3251,6 +3273,13 @@ int tcp_connect(struct sock *sk)
 	tp->retrans_stamp = tcp_time_stamp;
 	tcp_connect_queue_skb(sk, buff);
 	tcp_ecn_send_syn(sk, buff);
+	DTRACE_TCP(state__change, struct sk_buff * : pktinfo_t *, NULL,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(buff),
+		   struct tcp_sock * : tcpsinfo_t *, tp,
+		   struct tcphdr * : tcpinfo_t *, tcp_hdr(buff),
+		   int : tcplsinfo_t *, TCP_CLOSE,
+		   int, sk->sk_state, int, DTRACE_NET_PROBE_OUTBOUND);
 
 	/* Send off SYN; include data in Fast Open. */
 	err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a390174b96de..1f9bce3fd8f6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -111,6 +111,7 @@
 #include <net/xfrm.h>
 #include <trace/events/udp.h>
 #include <linux/static_key.h>
+#include <linux/sdt.h>
 #include <trace/events/skb.h>
 #include <net/busy_poll.h>
 #include "udp_impl.h"
@@ -836,6 +837,13 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 		uh->check = CSUM_MANGLED_0;
 
 send:
+	DTRACE_UDP(send,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+		   struct udphdr * : udpinfo_t *, uh);
+
 	err = ip_send_skb(sock_net(sk), skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet->recverr) {
@@ -1314,9 +1322,16 @@ try_again:
 		goto out_free;
 	}
 
-	if (!peeked)
+	if (!peeked) {
+		DTRACE_UDP(receive,
+			   struct sk_buff * :  pktinfo_t *, skb,
+			   struct sock * : csinfo_t *, sk,
+			   void_ip_t * : ipinfo_t *, ip_hdr(skb),
+			   struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+			   struct udphdr * : udpinfo_t *, udp_hdr(skb));
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
+	}
 
 	sock_recv_ts_and_drops(msg, sk, skb);
 
@@ -1529,6 +1544,15 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 			ret = encap_rcv(sk, skb);
 			if (ret <= 0) {
+				DTRACE_UDP(receive,
+					   struct sk_buff * :  pktinfo_t *, skb,
+					   struct sock * : csinfo_t *, sk,
+					   void_ip_t * : ipinfo_t *,
+					   ip_hdr(skb),
+					   struct udp_sock * : udpsinfo_t *,
+					   udp_sk(sk),
+					   struct udphdr * : udpinfo_t *,
+					   udp_hdr(skb));
 				UDP_INC_STATS_BH(sock_net(sk),
 						 UDP_MIB_INDATAGRAMS,
 						 is_udplite);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c1147acbc8c4..b9ddb6665aa3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,7 @@
 
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <linux/sdt.h>
 
 static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
@@ -437,6 +438,20 @@ out:
 	sock_put(sk);
 }
 
+/* Since we want to trace send events in TCP prior to pushing the segment to
+ * IP - where the IP header is added - we need to construct an argument
+ * containing relevant IP info so that TCP probe consumers can utilize it.
+ */
+static inline void dtrace_tcp_build_ipv6hdr(struct in6_addr *saddr,
+					    struct in6_addr *daddr,
+					    struct ipv6hdr *ip6h)
+{
+	ip6h->version = 6;
+	ip6h->payload_len = 0;
+	ip6h->nexthdr = IPPROTO_TCP;
+	ip6h->saddr = *saddr;
+	ip6h->daddr = *daddr;
+}
 
 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct flowi *fl,
@@ -466,6 +481,32 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 
 		skb_set_queue_mapping(skb, queue_mapping);
 		rcu_read_lock();
+
+		if (DTRACE_TCP_ENABLED(send)) {
+			struct ipv6hdr ip6h;
+
+			dtrace_tcp_build_ipv6hdr(&ireq->ir_v6_loc_addr,
+						 &ireq->ir_v6_rmt_addr, &ip6h);
+
+			/* Do not supply tcp sk - addresses/ports are not
+			 * committed yet - instead translators will fill them
+			 * in from IP/TCP data.
+			 */
+			DTRACE_TCP_NOCHECK(send,
+					   struct sk_buff * :  pktinfo_t *,
+					   NULL,
+					   struct sock * : csinfo_t *, sk,
+					   __dtrace_tcp_void_ip_t * :
+					   ipinfo_t *, &ip6h,
+					   struct tcp_sock * : tcpsinfo_t *,
+					   NULL,
+					   struct tcphdr * : tcpinfo_t *,
+					   tcp_hdr(skb),
+					   int : tcplsinfo_t *, TCP_LISTEN,
+					   int, TCP_LISTEN,
+					   int, DTRACE_NET_PROBE_OUTBOUND);
+		}
+
 		err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
 			       np->tclass);
 		rcu_read_unlock();
@@ -819,6 +860,48 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(buff, dst);
+		if (DTRACE_TCP_ENABLED(send) ||
+		    DTRACE_TCP_ENABLED(accept__refused)) {
+			struct ipv6hdr ip6h;
+
+			dtrace_tcp_build_ipv6hdr(&fl6.saddr, &fl6.daddr,
+						 &ip6h);
+
+			/* Do not supply tcp sk - addresses/ports are not
+			 * committed yet - instead translators will fill them
+			 * in from IP/TCP data.
+			 */
+			DTRACE_TCP_NOCHECK(send,
+					   struct sk_buff * :  pktinfo_t *,
+					   NULL,
+					   struct sock * : csinfo_t *, NULL,
+					   __dtrace_tcp_void_ip_t * :
+					   ipinfo_t *, &ip6h,
+					   struct tcp_sock * : tcpsinfo_t *,
+					   NULL,
+					   struct tcphdr * : tcpinfo_t *, t1,
+					   int : tcplsinfo_t *, TCP_CLOSE,
+					   int, TCP_CLOSE,
+					   int, DTRACE_NET_PROBE_OUTBOUND);
+			if (rst && th->syn && th->ack == 0)
+				DTRACE_TCP_NOCHECK(accept__refused,
+						   struct sk_buff * :
+						   pktinfo_t *, NULL,
+						   struct sock * : csinfo_t *,
+						   NULL,
+						   __dtrace_tcp_void_ip_t * :
+						   ipinfo_t *, &ip6h,
+						   struct tcp_sock * :
+						   tcpsinfo_t *, NULL,
+						   struct tcphdr * :
+						   tcpinfo_t *, t1,
+						   int : tcplsinfo_t *,
+						   TCP_CLOSE,
+						   int, TCP_CLOSE,
+						   int,
+						   DTRACE_NET_PROBE_OUTBOUND);
+		}
+
 		ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
 		TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 		if (rst)
@@ -1369,7 +1452,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	int ret;
 	struct net *net = dev_net(skb->dev);
 
@@ -1402,6 +1485,15 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	DTRACE_TCP(receive,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   __dtrace_tcp_void_ip_t * : ipinfo_t *, hdr,
+		   struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+		   struct tcphdr * : tcpinfo_t *, th,
+		   int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE,
+		   int, sk ? sk->sk_state : TCP_CLOSE,
+		   int, DTRACE_NET_PROBE_INBOUND);
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
@@ -1459,6 +1551,18 @@ bad_packet:
 	}
 
 discard_it:
+	if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST)
+		DTRACE_TCP_NOCHECK(receive,
+				   struct sk_buff * :  pktinfo_t *, skb,
+				   struct sock * : csinfo_t *, sk,
+				   __dtrace_tcp_void_ip_t * : ipinfo_t *,
+				   ipv6_hdr(skb),
+				   struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+				   struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+				   int : tcplsinfo_t *,
+				   sk ? sk->sk_state : TCP_CLOSE,
+				   int, sk ? sk->sk_state : TCP_CLOSE,
+				   int, DTRACE_NET_PROBE_INBOUND);
 	kfree_skb(skb);
 	return 0;
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7333f3575fc5..8deb823ada20 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -50,6 +50,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/sdt.h>
 #include <trace/events/skb.h>
 #include "udp_impl.h"
 
@@ -458,6 +459,12 @@ try_again:
 		goto out_free;
 	}
 	if (!peeked) {
+		DTRACE_UDP(receive,
+			   struct sk_buff * :  pktinfo_t *, skb,
+			   struct sock * : csinfo_t *, sk,
+			   void_ip_t * : ipinfo_t *, ip_hdr(skb),
+			   struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+			   struct udphdr * : udpinfo_t *, udp_hdr(skb));
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INDATAGRAMS, is_udplite);
@@ -653,6 +660,15 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 			ret = encap_rcv(sk, skb);
 			if (ret <= 0) {
+				DTRACE_UDP(receive,
+					   struct sk_buff * :  pktinfo_t *, skb,
+					   struct sock * : csinfo_t *, sk,
+					   void_ip_t * : ipinfo_t *,
+					   ip_hdr(skb),
+					   struct udp_sock * : udpsinfo_t *,
+					   udp_sk(sk),
+					   struct udphdr * : udpinfo_t *,
+					   udp_hdr(skb));
 				UDP_INC_STATS_BH(sock_net(sk),
 						 UDP_MIB_INDATAGRAMS,
 						 is_udplite);
@@ -1058,6 +1074,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
 		uh->check = CSUM_MANGLED_0;
 
 send:
+	DTRACE_UDP(send,
+		   struct sk_buff * :  pktinfo_t *, skb,
+		   struct sock * : csinfo_t *, sk,
+		   void_ip_t * : ipinfo_t *, ip_hdr(skb),
+		   struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+		   struct udphdr * : udpinfo_t *, uh);
+
 	err = ip6_send_skb(skb);
 	if (err) {
 		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
-- 
2.50.1