DTrace tcp/udp provider probes

author Alan Maguire <alan.maguire@oracle.com>

Wed, 12 Apr 2017 21:34:56 +0000 (22:34 +0100)

committer Nick Alcock <nick.alcock@oracle.com>

Fri, 26 May 2017 00:06:04 +0000 (01:06 +0100)
author Alan Maguire <alan.maguire@oracle.com>
Wed, 12 Apr 2017 21:34:56 +0000 (22:34 +0100)
committer Nick Alcock <nick.alcock@oracle.com>
Fri, 26 May 2017 00:06:04 +0000 (01:06 +0100)
diff --git a/include/linux/sdt.h b/include/linux/sdt.h

index bab41ce09951a71a78dbf6d896682d38180ecd0d..d585ed612874ec40f2187c932ccbac8f92e9db17 100644 (file)
--- a/include/linux/sdt.h
+++ b/include/linux/sdt.h
@@ -113,14 +113,46 @@ typedef struct sdt_probedesc {
  #define        DTRACE_SMB(name, ...)                                           \
         DTRACE_PROBE(__smb_##name, ## __VA_ARGS__);
  
+/*
+ * These definitions are used at probe points to specify the traffic direction;
+ * this helps simplify argument translation.
+ */
+#define        DTRACE_NET_PROBE_OUTBOUND       0x0
+#define        DTRACE_NET_PROBE_INBOUND        0x1
+
  #define        DTRACE_IP(name, ...)                                            \
         DTRACE_PROBE(__ip_##name, ## __VA_ARGS__);
  
+/*
+ * Default DTRACE_TCP() and DTRACE_UDP() provider definitions specify the
+ * probe point within an is-enabled predicate.  This is to avoid the overhead
+ * incurred during argument dereferencing (e.g. calls to ip_hdr(skb)), along
+ * with any conditional evaluation (which would require branching) when the
+ * probe is disabled.
+ *
+ * Because some TCP probe points require additional argument preparation,
+ * we also define the is-enabled predicate directly as
+ * DTRACE_TCP_ENABLED(probename) along with a probe point which does not
+ * the probe in an is-enabled predicate; this allows us to handle cases such
+ * as this:
+ *
+ * if (DTRACE_TCP_ENABLED(state__change)) {
+ *      ...argument preparation...
+ *      DTRACE_TCP_NOCHECK(state__change, ...);
+ * }
+ */
+
  #define        DTRACE_TCP(name, ...)                                           \
+       if (DTRACE_PROBE_ENABLED(__tcp_##name))                         \
+               DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__)
+#define        DTRACE_TCP_ENABLED(name)                                        \
+       DTRACE_PROBE_ENABLED(__tcp_##name)
+#define        DTRACE_TCP_NOCHECK(name, ...)                                   \
         DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__);
  
  #define        DTRACE_UDP(name, ...)                                           \
-       DTRACE_PROBE(__udp_##name, ## __VA_ARGS__);
+       if (DTRACE_PROBE_ENABLED(__udp_##name))                         \
+               DTRACE_PROBE(__udp_##name, ## __VA_ARGS__);
  
  #define        DTRACE_SYSEVENT(name, ...)                                      \
         DTRACE_PROBE(__sysevent_##name, ## __VA_ARGS__);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index b5f4f5cb452e8aa85b1e75541df37d9eb510e964..8bdd39ba2950a6af0602916f1ee79e31f4b1bbbf 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -269,6 +269,7 @@
  #include <linux/crypto.h>
  #include <linux/time.h>
  #include <linux/slab.h>
+#include <linux/sdt.h>
  
  #include <net/icmp.h>
  #include <net/inet_common.h>
@@ -1874,6 +1875,19 @@ recv_sndq:
  }
  EXPORT_SYMBOL(tcp_recvmsg);
  
+/* We wish to avoid instrumenting TCP state transitions to SYN_SENT as we trace
+ * those state changes later once the destination address is committed to the
+ * sk.  We also need to deal with the fact that separate timewait sockets are
+ * used to handle the TIME_WAIT state.  We do not want to trace direct
+ * transitions from CLOSING/FIN_WAIT2 -> CLOSE since they do not represent
+ * connection close, rather a transition to using the timewait socket.
+ * Accordingly skip instrumentation of transitions from CLOSING/FIN_WAIT2 to
+ * CLOSE.
+ */
+#define        REAL_STATE_CHANGE(old, new)                                     \
+       (old != new && new != TCP_SYN_SENT &&                           \
+       ((old != TCP_CLOSING && old != TCP_FIN_WAIT2) || new != TCP_CLOSE))
+
  void tcp_set_state(struct sock *sk, int state)
  {
         int oldstate = sk->sk_state;
@@ -1903,6 +1917,18 @@ void tcp_set_state(struct sock *sk, int state)
          */
         sk->sk_state = state;
  
+       if (DTRACE_TCP_ENABLED(state__change) &&
+           REAL_STATE_CHANGE(oldstate, state))
+               DTRACE_TCP_NOCHECK(state__change,
+                                  struct sk_buff * : pktinfo_t *, NULL,
+                                  struct sock * : csinfo_t *, sk,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *, NULL,
+                                  struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+                                  struct tcphdr * : tcpinfo_t *, NULL,
+                                  int : tcplsinfo_t *, oldstate,
+                                  int : int, state,
+                                  int : int, DTRACE_NET_PROBE_OUTBOUND);
+
  #ifdef STATE_TRACE
         SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
  #endif
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 7b1fc955b6b6a9930e49efca379cd5abdd1f37de..76d24c510c6634d915b2457528193a181086839c 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,6 +75,7 @@
  #include <linux/ipsec.h>
  #include <asm/unaligned.h>
  #include <linux/errqueue.h>
+#include <linux/sdt.h>
  
  int sysctl_tcp_timestamps __read_mostly = 1;
  int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -5377,6 +5378,14 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
         struct tcp_sock *tp = tcp_sk(sk);
         struct inet_connection_sock *icsk = inet_csk(sk);
  
+       DTRACE_TCP(connect__established,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct tcp_sock * : tcpsinfo_t *, tp,
+                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                  int : tcplsinfo_t *, TCP_ESTABLISHED,
+                  int, TCP_ESTABLISHED, int, DTRACE_NET_PROBE_INBOUND);
         tcp_set_state(sk, TCP_ESTABLISHED);
  
         if (skb) {
@@ -5507,6 +5516,17 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                  */
  
                 if (th->rst) {
+                       DTRACE_TCP(connect__refused,
+                                  struct sk_buff * : pktinfo_t *, skb,
+                                  struct sock * : csinfo_t *, sk,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *,
+                                  ip_hdr(skb),
+                                  struct tcp_sock * : tcpsinfo_t *, tp,
+                                  struct tcphdr * : tcpinfo_t *, th,
+                                  int : tcplsinfo_t *,
+                                  sk ? sk->sk_state : TCP_CLOSE,
+                                  int, sk ? sk->sk_state : TCP_CLOSE,
+                                  int, DTRACE_NET_PROBE_INBOUND);
                         tcp_reset(sk);
                         goto discard;
                 }
@@ -5798,6 +5818,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                         tcp_init_buffer_space(sk);
                 }
                 smp_mb();
+
+               DTRACE_TCP(accept__established,
+                          struct sk_buff * :  pktinfo_t *, skb,
+                          struct sock * : csinfo_t *, sk,
+                          __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                          struct tcp_sock * : tcpsinfo_t *, tp,
+                          struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                          int : tcplsinfo_t *, TCP_ESTABLISHED,
+                          int, TCP_ESTABLISHED,
+                          int, DTRACE_NET_PROBE_INBOUND);
                 tcp_set_state(sk, TCP_ESTABLISHED);
                 sk->sk_state_change(sk);
  
@@ -6208,6 +6238,20 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                    tcp_try_fastopen(sk, skb, req, &foc, dst);
         err = af_ops->send_synack(sk, dst, &fl, req,
                                   skb_get_queue_mapping(skb), &foc);
+
+       /* Do not pass in tcp sock as ports/addresses are not yet
+        * populated - instead translators will fill them in from
+        * skb data.
+        */
+       DTRACE_TCP(state__change,
+                  struct sk_buff * : pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct tcp_sock * : tcpsinfo_t *, NULL,
+                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                  int : tcplsinfo_t *, TCP_LISTEN,
+                  int, TCP_SYN_RECV, int, DTRACE_NET_PROBE_INBOUND);
+
         if (!fastopen) {
                 if (err || want_cookie)
                         goto drop_and_free;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index aa4d525bbce8538acaad6fe908bfab97508af023..f8ecc36ce190d667477db74db06b208d4b470fd1 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
  
  #include <linux/crypto.h>
  #include <linux/scatterlist.h>
+#include <linux/sdt.h>
  
  int sysctl_tcp_tw_reuse __read_mostly;
  int sysctl_tcp_low_latency __read_mostly;
@@ -563,6 +564,21 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
  }
  EXPORT_SYMBOL(tcp_v4_send_check);
  
+/* Since we want to trace send events in TCP prior to pushing the segment to
+ * IP - where the IP header is added - we need to construct an argument
+ * containing relevant IP info so that TCP probe consumers can utilize it.
+ */
+static inline void dtrace_tcp_build_iphdr(__be32 saddr, __be32 daddr,
+                                         struct iphdr *iph)
+{
+       iph->version = 4;
+       iph->ihl = 5;
+       iph->tot_len = 5;
+       iph->protocol = IPPROTO_TCP;
+       iph->saddr = saddr;
+       iph->daddr = daddr;
+}
+
  /*
   *     This routine will send an RST to the other tcp.
   *
@@ -684,6 +700,39 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
                 arg.bound_dev_if = sk->sk_bound_dev_if;
  
         arg.tos = ip_hdr(skb)->tos;
+
+       if (DTRACE_TCP_ENABLED(send) ||
+           DTRACE_TCP_ENABLED(accept__refused)) {
+               struct iphdr iph;
+
+               dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
+                                      &iph);
+
+               DTRACE_TCP_NOCHECK(send,
+                                  struct sk_buff * : pktinfo_t *, NULL,
+                                  struct sock * : csinfo_t *, NULL,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph,
+                                  struct tcp_sock * : tcpsinfo_t *, NULL,
+                                  struct tcphdr * : tcpinfo_t *, &rep.th,
+                                  int : tcplsinfo_t *, TCP_CLOSE,
+                                  int : int, TCP_CLOSE,
+                                  int : int, DTRACE_NET_PROBE_OUTBOUND);
+               if (th->syn && rep.th.seq == 0)
+                       DTRACE_TCP_NOCHECK(accept__refused,
+                                          struct sk_buff * : pktinfo_t *, NULL,
+                                          struct sock * : csinfo_t *, NULL,
+                                          __dtrace_tcp_void_ip_t * :
+                                          ipinfo_t *, &iph,
+                                          struct tcp_sock * : tcpsinfo_t *,
+                                          NULL,
+                                          struct tcphdr * : tcpinfo_t *,
+                                          &rep.th,
+                                          int : tcplsinfo_t *, TCP_CLOSE,
+                                          int : int, TCP_CLOSE,
+                                          int : int,
+                                          DTRACE_NET_PROBE_OUTBOUND);
+       }
+
         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
@@ -769,6 +818,24 @@ static void tcp_v4_send_ack(struct net *net,
         if (oif)
                 arg.bound_dev_if = oif;
         arg.tos = tos;
+
+       if (DTRACE_TCP_ENABLED(send)) {
+               struct iphdr iph;
+
+               dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr,
+                                      &iph);
+
+               DTRACE_TCP_NOCHECK(send,
+                                  struct sk_buff * : pktinfo_t *, NULL,
+                                  struct sock * : csinfo_t *, NULL,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph,
+                                  struct tcp_sock * : tcpsinfo_t *, NULL,
+                                  struct tcphdr * : tcpinfo_t *, &rep,
+                                  int : tcplsinfo_t *, TCP_CLOSE,
+                                  int : int, TCP_CLOSE,
+                                  int : int, DTRACE_NET_PROBE_OUTBOUND);
+       }
+
         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
@@ -842,6 +909,31 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
                 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
  
                 skb_set_queue_mapping(skb, queue_mapping);
+
+               if (DTRACE_TCP_ENABLED(send)) {
+                       struct iphdr iph;
+
+                       dtrace_tcp_build_iphdr(ireq->ir_loc_addr,
+                                              ireq->ir_rmt_addr, &iph);
+
+                       /* Do not supply tcp sk - addresses/ports are not
+                        * committed yet - instead translators will fill them
+                        * in from skb/IP info.
+                        */
+                       DTRACE_TCP_NOCHECK(send,
+                                          struct sk_buff * :  pktinfo_t *, skb,
+                                          struct sock * : csinfo_t *, sk,
+                                          __dtrace_tcp_void_ip_t * :
+                                          ipinfo_t *, &iph,
+                                          struct tcp_sock * : tcpsinfo_t *,
+                                          NULL,
+                                          struct tcphdr * : tcpinfo_t *,
+                                          tcp_hdr(skb),
+                                          int : tcplsinfo_t *, TCP_LISTEN,
+                                          int, TCP_LISTEN,
+                                          int, DTRACE_NET_PROBE_OUTBOUND);
+               }
+
                 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
                                             ireq->ir_rmt_addr,
                                             ireq->opt);
@@ -1547,7 +1639,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
  {
         const struct iphdr *iph;
         const struct tcphdr *th;
-       struct sock *sk;
+       struct sock *sk = NULL;
         int ret;
         struct net *net = dev_net(skb->dev);
  
@@ -1597,6 +1689,15 @@ int tcp_v4_rcv(struct sk_buff *skb)
         if (!sk)
                 goto no_tcp_socket;
  
+       DTRACE_TCP(receive,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                  int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE,
+                  int, sk ? sk->sk_state : TCP_CLOSE,
+                  int, DTRACE_NET_PROBE_INBOUND);
  process:
         if (sk->sk_state == TCP_TIME_WAIT)
                 goto do_time_wait;
@@ -1660,6 +1761,18 @@ bad_packet:
  
  discard_it:
         /* Discard frame. */
+       if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST)
+               DTRACE_TCP_NOCHECK(receive,
+                                  struct sk_buff * :  pktinfo_t *, skb,
+                                  struct sock * : csinfo_t *, sk,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *,
+                                  ip_hdr(skb),
+                                  struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+                                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                                  int : tcplsinfo_t *,
+                                  sk ? sk->sk_state : TCP_CLOSE,
+                                  int, sk ? sk->sk_state : TCP_CLOSE,
+                                  int, DTRACE_NET_PROBE_INBOUND);
         kfree_skb(skb);
         return 0;
  
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index fec2907b85e81e0096b6362d87269c380115c46c..00ba8e607effd257e0762ecd94ee61cafb6bb0b6 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,6 +23,7 @@
  #include <linux/slab.h>
  #include <linux/sysctl.h>
  #include <linux/workqueue.h>
+#include <linux/sdt.h>
  #include <net/tcp.h>
  #include <net/inet_common.h>
  #include <net/xfrm.h>
@@ -340,6 +341,20 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
                 /* Linkage updates. */
                 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
                 inet_twsk_put(tw);
+
+               if (DTRACE_TCP_ENABLED(state__change) &&
+                   state != sk->sk_state)
+                       DTRACE_TCP_NOCHECK(state__change,
+                                          struct sk_buff * : pktinfo_t *, NULL,
+                                          struct sock * : csinfo_t *, sk,
+                                          __dtrace_tcp_void_ip_t * :
+                                          ipinfo_t *, NULL,
+                                          struct tcp_sock * : tcpsinfo_t *,
+                                          tcp_sk(sk),
+                                          struct tcphdr * : tcpinfo_t *, NULL,
+                                          int : tcplsinfo_t *, sk->sk_state,
+                                          int, state,
+                                          int, DTRACE_NET_PROBE_OUTBOUND);
         } else {
                 /* Sorry, if we're out of memory, just CLOSE this
                  * socket up.  We've got bigger problems than
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 1ea4322c3b0c9a70ea30738953f787346ebed54a..841fa0fd36fc2082f3a3510e9dda974a31c500fc 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,6 +41,7 @@
  #include <linux/compiler.h>
  #include <linux/gfp.h>
  #include <linux/module.h>
+#include <linux/sdt.h>
  
  /* People can turn this off for buggy TCP's found in printers etc. */
  int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -1014,6 +1015,27 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
         if (skb->len != tcp_header_size)
                 tcp_event_data_sent(tp, sk);
  
+       DTRACE_TCP(send,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct tcp_sock * : tcpsinfo_t *, tp,
+                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                  int : tcplsinfo_t *, sk->sk_state, int, sk->sk_state,
+                  int, DTRACE_NET_PROBE_OUTBOUND);
+       if (DTRACE_TCP_ENABLED(connect__request) && th->syn &&
+           th->ack_seq == 0)
+               DTRACE_TCP_NOCHECK(connect__request,
+                                  struct sk_buff * : pktinfo_t *, skb,
+                                  struct sock * : csinfo_t *, sk,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *,
+                                  ip_hdr(skb),
+                                  struct tcp_sock * : tcpsinfo_t *, tp,
+                                  struct tcphdr * : tcpinfo_t *, th,
+                                  int : tcplsinfo_t *, sk->sk_state,
+                                  int, sk->sk_state,
+                                  int, DTRACE_NET_PROBE_OUTBOUND);
+
         if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
                 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
                               tcp_skb_pcount(skb));
@@ -3251,6 +3273,13 @@ int tcp_connect(struct sock *sk)
         tp->retrans_stamp = tcp_time_stamp;
         tcp_connect_queue_skb(sk, buff);
         tcp_ecn_send_syn(sk, buff);
+       DTRACE_TCP(state__change, struct sk_buff * : pktinfo_t *, NULL,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(buff),
+                  struct tcp_sock * : tcpsinfo_t *, tp,
+                  struct tcphdr * : tcpinfo_t *, tcp_hdr(buff),
+                  int : tcplsinfo_t *, TCP_CLOSE,
+                  int, sk->sk_state, int, DTRACE_NET_PROBE_OUTBOUND);
  
         /* Send off SYN; include data in Fast Open. */
         err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index a390174b96de6b09a9b49f26ea83e69b91e63cfb..1f9bce3fd8f6f6044c4df4c3a7884c772198f5b8 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -111,6 +111,7 @@
  #include <net/xfrm.h>
  #include <trace/events/udp.h>
  #include <linux/static_key.h>
+#include <linux/sdt.h>
  #include <trace/events/skb.h>
  #include <net/busy_poll.h>
  #include "udp_impl.h"
@@ -836,6 +837,13 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
                 uh->check = CSUM_MANGLED_0;
  
  send:
+       DTRACE_UDP(send,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+                  struct udphdr * : udpinfo_t *, uh);
+
         err = ip_send_skb(sock_net(sk), skb);
         if (err) {
                 if (err == -ENOBUFS && !inet->recverr) {
@@ -1314,9 +1322,16 @@ try_again:
                 goto out_free;
         }
  
-       if (!peeked)
+       if (!peeked) {
+               DTRACE_UDP(receive,
+                          struct sk_buff * :  pktinfo_t *, skb,
+                          struct sock * : csinfo_t *, sk,
+                          void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                          struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+                          struct udphdr * : udpinfo_t *, udp_hdr(skb));
                 UDP_INC_STATS_USER(sock_net(sk),
                                 UDP_MIB_INDATAGRAMS, is_udplite);
+       }
  
         sock_recv_ts_and_drops(msg, sk, skb);
  
@@ -1529,6 +1544,15 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  
                         ret = encap_rcv(sk, skb);
                         if (ret <= 0) {
+                               DTRACE_UDP(receive,
+                                          struct sk_buff * :  pktinfo_t *, skb,
+                                          struct sock * : csinfo_t *, sk,
+                                          void_ip_t * : ipinfo_t *,
+                                          ip_hdr(skb),
+                                          struct udp_sock * : udpsinfo_t *,
+                                          udp_sk(sk),
+                                          struct udphdr * : udpinfo_t *,
+                                          udp_hdr(skb));
                                 UDP_INC_STATS_BH(sock_net(sk),
                                                  UDP_MIB_INDATAGRAMS,
                                                  is_udplite);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index c1147acbc8c49f6c7995672f00930366fd6b1713..b9ddb6665aa3ae97622c7ccd8bcbe7beeddd87d7 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,7 @@
  
  #include <linux/crypto.h>
  #include <linux/scatterlist.h>
+#include <linux/sdt.h>
  
  static void    tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
  static void    tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
@@ -437,6 +438,20 @@ out:
         sock_put(sk);
  }
  
+/* Since we want to trace send events in TCP prior to pushing the segment to
+ * IP - where the IP header is added - we need to construct an argument
+ * containing relevant IP info so that TCP probe consumers can utilize it.
+ */
+static inline void dtrace_tcp_build_ipv6hdr(struct in6_addr *saddr,
+                                           struct in6_addr *daddr,
+                                           struct ipv6hdr *ip6h)
+{
+       ip6h->version = 6;
+       ip6h->payload_len = 0;
+       ip6h->nexthdr = IPPROTO_TCP;
+       ip6h->saddr = *saddr;
+       ip6h->daddr = *daddr;
+}
  
  static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
                               struct flowi *fl,
@@ -466,6 +481,32 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
  
                 skb_set_queue_mapping(skb, queue_mapping);
                 rcu_read_lock();
+
+               if (DTRACE_TCP_ENABLED(send)) {
+                       struct ipv6hdr ip6h;
+
+                       dtrace_tcp_build_ipv6hdr(&ireq->ir_v6_loc_addr,
+                                                &ireq->ir_v6_rmt_addr, &ip6h);
+
+                       /* Do not supply tcp sk - addresses/ports are not
+                        * committed yet - instead translators will fill them
+                        * in from IP/TCP data.
+                        */
+                       DTRACE_TCP_NOCHECK(send,
+                                          struct sk_buff * :  pktinfo_t *,
+                                          NULL,
+                                          struct sock * : csinfo_t *, sk,
+                                          __dtrace_tcp_void_ip_t * :
+                                          ipinfo_t *, &ip6h,
+                                          struct tcp_sock * : tcpsinfo_t *,
+                                          NULL,
+                                          struct tcphdr * : tcpinfo_t *,
+                                          tcp_hdr(skb),
+                                          int : tcplsinfo_t *, TCP_LISTEN,
+                                          int, TCP_LISTEN,
+                                          int, DTRACE_NET_PROBE_OUTBOUND);
+               }
+
                 err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
                                np->tclass);
                 rcu_read_unlock();
@@ -819,6 +860,48 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
         dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
         if (!IS_ERR(dst)) {
                 skb_dst_set(buff, dst);
+               if (DTRACE_TCP_ENABLED(send) ||
+                   DTRACE_TCP_ENABLED(accept__refused)) {
+                       struct ipv6hdr ip6h;
+
+                       dtrace_tcp_build_ipv6hdr(&fl6.saddr, &fl6.daddr,
+                                                &ip6h);
+
+                       /* Do not supply tcp sk - addresses/ports are not
+                        * committed yet - instead translators will fill them
+                        * in from IP/TCP data.
+                        */
+                       DTRACE_TCP_NOCHECK(send,
+                                          struct sk_buff * :  pktinfo_t *,
+                                          NULL,
+                                          struct sock * : csinfo_t *, NULL,
+                                          __dtrace_tcp_void_ip_t * :
+                                          ipinfo_t *, &ip6h,
+                                          struct tcp_sock * : tcpsinfo_t *,
+                                          NULL,
+                                          struct tcphdr * : tcpinfo_t *, t1,
+                                          int : tcplsinfo_t *, TCP_CLOSE,
+                                          int, TCP_CLOSE,
+                                          int, DTRACE_NET_PROBE_OUTBOUND);
+                       if (rst && th->syn && th->ack == 0)
+                               DTRACE_TCP_NOCHECK(accept__refused,
+                                                  struct sk_buff * :
+                                                  pktinfo_t *, NULL,
+                                                  struct sock * : csinfo_t *,
+                                                  NULL,
+                                                  __dtrace_tcp_void_ip_t * :
+                                                  ipinfo_t *, &ip6h,
+                                                  struct tcp_sock * :
+                                                  tcpsinfo_t *, NULL,
+                                                  struct tcphdr * :
+                                                  tcpinfo_t *, t1,
+                                                  int : tcplsinfo_t *,
+                                                  TCP_CLOSE,
+                                                  int, TCP_CLOSE,
+                                                  int,
+                                                  DTRACE_NET_PROBE_OUTBOUND);
+               }
+
                 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
                 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
                 if (rst)
@@ -1369,7 +1452,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
  {
         const struct tcphdr *th;
         const struct ipv6hdr *hdr;
-       struct sock *sk;
+       struct sock *sk = NULL;
         int ret;
         struct net *net = dev_net(skb->dev);
  
@@ -1402,6 +1485,15 @@ static int tcp_v6_rcv(struct sk_buff *skb)
         if (!sk)
                 goto no_tcp_socket;
  
+       DTRACE_TCP(receive,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  __dtrace_tcp_void_ip_t * : ipinfo_t *, hdr,
+                  struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+                  struct tcphdr * : tcpinfo_t *, th,
+                  int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE,
+                  int, sk ? sk->sk_state : TCP_CLOSE,
+                  int, DTRACE_NET_PROBE_INBOUND);
  process:
         if (sk->sk_state == TCP_TIME_WAIT)
                 goto do_time_wait;
@@ -1459,6 +1551,18 @@ bad_packet:
         }
  
  discard_it:
+       if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST)
+               DTRACE_TCP_NOCHECK(receive,
+                                  struct sk_buff * :  pktinfo_t *, skb,
+                                  struct sock * : csinfo_t *, sk,
+                                  __dtrace_tcp_void_ip_t * : ipinfo_t *,
+                                  ipv6_hdr(skb),
+                                  struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk),
+                                  struct tcphdr * : tcpinfo_t *, tcp_hdr(skb),
+                                  int : tcplsinfo_t *,
+                                  sk ? sk->sk_state : TCP_CLOSE,
+                                  int, sk ? sk->sk_state : TCP_CLOSE,
+                                  int, DTRACE_NET_PROBE_INBOUND);
         kfree_skb(skb);
         return 0;
  
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index 7333f3575fc54813c617c47a883d09d7b5f2f6e3..8deb823ada2035435e4c7b4bf785f8ee8b563946 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -50,6 +50,7 @@
  
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
+#include <linux/sdt.h>
  #include <trace/events/skb.h>
  #include "udp_impl.h"
  
@@ -458,6 +459,12 @@ try_again:
                 goto out_free;
         }
         if (!peeked) {
+               DTRACE_UDP(receive,
+                          struct sk_buff * :  pktinfo_t *, skb,
+                          struct sock * : csinfo_t *, sk,
+                          void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                          struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+                          struct udphdr * : udpinfo_t *, udp_hdr(skb));
                 if (is_udp4)
                         UDP_INC_STATS_USER(sock_net(sk),
                                         UDP_MIB_INDATAGRAMS, is_udplite);
@@ -653,6 +660,15 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  
                         ret = encap_rcv(sk, skb);
                         if (ret <= 0) {
+                               DTRACE_UDP(receive,
+                                          struct sk_buff * :  pktinfo_t *, skb,
+                                          struct sock * : csinfo_t *, sk,
+                                          void_ip_t * : ipinfo_t *,
+                                          ip_hdr(skb),
+                                          struct udp_sock * : udpsinfo_t *,
+                                          udp_sk(sk),
+                                          struct udphdr * : udpinfo_t *,
+                                          udp_hdr(skb));
                                 UDP_INC_STATS_BH(sock_net(sk),
                                                  UDP_MIB_INDATAGRAMS,
                                                  is_udplite);
@@ -1058,6 +1074,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6)
                 uh->check = CSUM_MANGLED_0;
  
  send:
+       DTRACE_UDP(send,
+                  struct sk_buff * :  pktinfo_t *, skb,
+                  struct sock * : csinfo_t *, sk,
+                  void_ip_t * : ipinfo_t *, ip_hdr(skb),
+                  struct udp_sock * : udpsinfo_t *, udp_sk(sk),
+                  struct udphdr * : udpinfo_t *, uh);
+
         err = ip6_send_skb(skb);
         if (err) {
                 if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
author	Alan Maguire <alan.maguire@oracle.com>
	Wed, 12 Apr 2017 21:34:56 +0000 (22:34 +0100)
committer	Nick Alcock <nick.alcock@oracle.com>
	Fri, 26 May 2017 00:06:04 +0000 (01:06 +0100)
include/linux/sdt.h		patch \| blob \| history
net/ipv4/tcp.c		patch \| blob \| history
net/ipv4/tcp_input.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/tcp_minisocks.c		patch \| blob \| history
net/ipv4/tcp_output.c		patch \| blob \| history
net/ipv4/udp.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history
net/ipv6/udp.c		patch \| blob \| history