From bb2b6be8283dfe91704024128f971be7903a8474 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 12 Apr 2017 22:34:56 +0100 Subject: [PATCH] DTrace tcp/udp provider probes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch adds DTrace SDT probes for the TCP and UDP protocols. For tcp the following probes are added: tcp:::send Fires when a tcp segment is transmitted tcp:::receive Fires when a tcp segment is received tcp:::state-change Fires when a tcp connection changes state tcp:::connect-request Fires when a SYN segment is sent tcp:::connect-refused Fires when a RST is received for connection attempt tcp:::connect-established Fires when three-way handshake completes for initiator tcp:::accept-refused Fires when a RST is sent refusing connection attempt tcp:::accept-established Fires when three-way handshake succeeds for acceptor Arguments for all of these probes are: arg0 struct sk_buff *; to be translated into pktinfo_t * containing implementation-independent packet data arg1 struct sock *; to be translated into csinfo_t * containing implementation-independent connection data arg2 __dtrace_tcp_void_ip_t *; to be translated into ipinfo_t * containing implementation-independent IP information. Custom type is used as this gives DTrace a hint that we can source IP information from other arguments if the IP header is not available. arg3 struct tcp_sock *; to be translated into tcpsinfo_t * containing implementation-independent TCP connection data arg4 struct tcphdr *; to be translated into a tcpinfo_t * containing implementation-independent TCP header data arg5 int representing previous state; to be translated into a tcplsinfo_t * which contains the previous state. Differs from current state (arg6) for state-change probes only. arg6 int representing current state. Cannot be sourced from struct tcp_sock as we sometimes need to probe before state change is reflected there arg7 int representing direction of traffic for probe; values are DTRACE_NET_PROBE_INBOUND for receipt of data and DTRACE_NET_PROBE_OUTBOUND for transmission. For udp the following probes are added: udp:::send Fires when a udp datagram is sent udp:::receive Fires when a udp datagram is received Arguments for these probes are: arg0 struct sk_buff *; to be translated into pktinfo_t * containing implementation-independent packet data arg1 struct sock *; to be translated into csinfo_t * containing implementation-independent connection data arg2 void_ip_t *; to be translated into ipinfo_t * containing implementation-independent IP information. arg3 struct udp_sock *; to be translated into a udpsinfo_t * containing implementation-independent UDP connection data arg4 struct udphdr *; to be translated into a udpinfo_t * containing implementation-independent UDP header information. Orabug: 25815197 Signed-off-by: Alan Maguire Reviewed-by: Rao Shoaib Acked-by: HÃ¥kon Bugge --- include/linux/sdt.h | 34 +++++++++++- net/ipv4/tcp.c | 26 +++++++++ net/ipv4/tcp_input.c | 44 +++++++++++++++ net/ipv4/tcp_ipv4.c | 115 ++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_minisocks.c | 15 +++++ net/ipv4/tcp_output.c | 29 ++++++++++ net/ipv4/udp.c | 26 ++++++++- net/ipv6/tcp_ipv6.c | 106 +++++++++++++++++++++++++++++++++++- net/ipv6/udp.c | 23 ++++++++ 9 files changed, 414 insertions(+), 4 deletions(-) diff --git a/include/linux/sdt.h b/include/linux/sdt.h index bab41ce09951..d585ed612874 100644 --- a/include/linux/sdt.h +++ b/include/linux/sdt.h @@ -113,14 +113,46 @@ typedef struct sdt_probedesc { #define DTRACE_SMB(name, ...) \ DTRACE_PROBE(__smb_##name, ## __VA_ARGS__); +/* + * These definitions are used at probe points to specify the traffic direction; + * this helps simplify argument translation. + */ +#define DTRACE_NET_PROBE_OUTBOUND 0x0 +#define DTRACE_NET_PROBE_INBOUND 0x1 + #define DTRACE_IP(name, ...) \ DTRACE_PROBE(__ip_##name, ## __VA_ARGS__); +/* + * Default DTRACE_TCP() and DTRACE_UDP() provider definitions specify the + * probe point within an is-enabled predicate. This is to avoid the overhead + * incurred during argument dereferencing (e.g. calls to ip_hdr(skb)), along + * with any conditional evaluation (which would require branching) when the + * probe is disabled. + * + * Because some TCP probe points require additional argument preparation, + * we also define the is-enabled predicate directly as + * DTRACE_TCP_ENABLED(probename) along with a probe point which does not + * the probe in an is-enabled predicate; this allows us to handle cases such + * as this: + * + * if (DTRACE_TCP_ENABLED(state__change)) { + * ...argument preparation... + * DTRACE_TCP_NOCHECK(state__change, ...); + * } + */ + #define DTRACE_TCP(name, ...) \ + if (DTRACE_PROBE_ENABLED(__tcp_##name)) \ + DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__) +#define DTRACE_TCP_ENABLED(name) \ + DTRACE_PROBE_ENABLED(__tcp_##name) +#define DTRACE_TCP_NOCHECK(name, ...) \ DTRACE_PROBE(__tcp_##name, ## __VA_ARGS__); #define DTRACE_UDP(name, ...) \ - DTRACE_PROBE(__udp_##name, ## __VA_ARGS__); + if (DTRACE_PROBE_ENABLED(__udp_##name)) \ + DTRACE_PROBE(__udp_##name, ## __VA_ARGS__); #define DTRACE_SYSEVENT(name, ...) \ DTRACE_PROBE(__sysevent_##name, ## __VA_ARGS__); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5f4f5cb452e..8bdd39ba2950 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -269,6 +269,7 @@ #include #include #include +#include #include #include @@ -1874,6 +1875,19 @@ recv_sndq: } EXPORT_SYMBOL(tcp_recvmsg); +/* We wish to avoid instrumenting TCP state transitions to SYN_SENT as we trace + * those state changes later once the destination address is committed to the + * sk. We also need to deal with the fact that separate timewait sockets are + * used to handle the TIME_WAIT state. We do not want to trace direct + * transitions from CLOSING/FIN_WAIT2 -> CLOSE since they do not represent + * connection close, rather a transition to using the timewait socket. + * Accordingly skip instrumentation of transitions from CLOSING/FIN_WAIT2 to + * CLOSE. + */ +#define REAL_STATE_CHANGE(old, new) \ + (old != new && new != TCP_SYN_SENT && \ + ((old != TCP_CLOSING && old != TCP_FIN_WAIT2) || new != TCP_CLOSE)) + void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; @@ -1903,6 +1917,18 @@ void tcp_set_state(struct sock *sk, int state) */ sk->sk_state = state; + if (DTRACE_TCP_ENABLED(state__change) && + REAL_STATE_CHANGE(oldstate, state)) + DTRACE_TCP_NOCHECK(state__change, + struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, NULL, + struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, NULL, + int : tcplsinfo_t *, oldstate, + int : int, state, + int : int, DTRACE_NET_PROBE_OUTBOUND); + #ifdef STATE_TRACE SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); #endif diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7b1fc955b6b6..76d24c510c66 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -75,6 +75,7 @@ #include #include #include +#include int sysctl_tcp_timestamps __read_mostly = 1; int sysctl_tcp_window_scaling __read_mostly = 1; @@ -5377,6 +5378,14 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + DTRACE_TCP(connect__established, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, TCP_ESTABLISHED, + int, TCP_ESTABLISHED, int, DTRACE_NET_PROBE_INBOUND); tcp_set_state(sk, TCP_ESTABLISHED); if (skb) { @@ -5507,6 +5516,17 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, */ if (th->rst) { + DTRACE_TCP(connect__refused, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, + ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, th, + int : tcplsinfo_t *, + sk ? sk->sk_state : TCP_CLOSE, + int, sk ? sk->sk_state : TCP_CLOSE, + int, DTRACE_NET_PROBE_INBOUND); tcp_reset(sk); goto discard; } @@ -5798,6 +5818,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_buffer_space(sk); } smp_mb(); + + DTRACE_TCP(accept__established, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, TCP_ESTABLISHED, + int, TCP_ESTABLISHED, + int, DTRACE_NET_PROBE_INBOUND); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -6208,6 +6238,20 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_try_fastopen(sk, skb, req, &foc, dst); err = af_ops->send_synack(sk, dst, &fl, req, skb_get_queue_mapping(skb), &foc); + + /* Do not pass in tcp sock as ports/addresses are not yet + * populated - instead translators will fill them in from + * skb data. + */ + DTRACE_TCP(state__change, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, NULL, + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, TCP_LISTEN, + int, TCP_SYN_RECV, int, DTRACE_NET_PROBE_INBOUND); + if (!fastopen) { if (err || want_cookie) goto drop_and_free; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index aa4d525bbce8..f8ecc36ce190 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -84,6 +84,7 @@ #include #include +#include int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; @@ -563,6 +564,21 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +/* Since we want to trace send events in TCP prior to pushing the segment to + * IP - where the IP header is added - we need to construct an argument + * containing relevant IP info so that TCP probe consumers can utilize it. + */ +static inline void dtrace_tcp_build_iphdr(__be32 saddr, __be32 daddr, + struct iphdr *iph) +{ + iph->version = 4; + iph->ihl = 5; + iph->tot_len = 5; + iph->protocol = IPPROTO_TCP; + iph->saddr = saddr; + iph->daddr = daddr; +} + /* * This routine will send an RST to the other tcp. * @@ -684,6 +700,39 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; + + if (DTRACE_TCP_ENABLED(send) || + DTRACE_TCP_ENABLED(accept__refused)) { + struct iphdr iph; + + dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, + &iph); + + DTRACE_TCP_NOCHECK(send, + struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, NULL, + __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph, + struct tcp_sock * : tcpsinfo_t *, NULL, + struct tcphdr * : tcpinfo_t *, &rep.th, + int : tcplsinfo_t *, TCP_CLOSE, + int : int, TCP_CLOSE, + int : int, DTRACE_NET_PROBE_OUTBOUND); + if (th->syn && rep.th.seq == 0) + DTRACE_TCP_NOCHECK(accept__refused, + struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, NULL, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, &iph, + struct tcp_sock * : tcpsinfo_t *, + NULL, + struct tcphdr * : tcpinfo_t *, + &rep.th, + int : tcplsinfo_t *, TCP_CLOSE, + int : int, TCP_CLOSE, + int : int, + DTRACE_NET_PROBE_OUTBOUND); + } + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -769,6 +818,24 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; + + if (DTRACE_TCP_ENABLED(send)) { + struct iphdr iph; + + dtrace_tcp_build_iphdr(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, + &iph); + + DTRACE_TCP_NOCHECK(send, + struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, NULL, + __dtrace_tcp_void_ip_t * : ipinfo_t *, &iph, + struct tcp_sock * : tcpsinfo_t *, NULL, + struct tcphdr * : tcpinfo_t *, &rep, + int : tcplsinfo_t *, TCP_CLOSE, + int : int, TCP_CLOSE, + int : int, DTRACE_NET_PROBE_OUTBOUND); + } + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, @@ -842,6 +909,31 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); skb_set_queue_mapping(skb, queue_mapping); + + if (DTRACE_TCP_ENABLED(send)) { + struct iphdr iph; + + dtrace_tcp_build_iphdr(ireq->ir_loc_addr, + ireq->ir_rmt_addr, &iph); + + /* Do not supply tcp sk - addresses/ports are not + * committed yet - instead translators will fill them + * in from skb/IP info. + */ + DTRACE_TCP_NOCHECK(send, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, &iph, + struct tcp_sock * : tcpsinfo_t *, + NULL, + struct tcphdr * : tcpinfo_t *, + tcp_hdr(skb), + int : tcplsinfo_t *, TCP_LISTEN, + int, TCP_LISTEN, + int, DTRACE_NET_PROBE_OUTBOUND); + } + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, ireq->opt); @@ -1547,7 +1639,7 @@ int tcp_v4_rcv(struct sk_buff *skb) { const struct iphdr *iph; const struct tcphdr *th; - struct sock *sk; + struct sock *sk = NULL; int ret; struct net *net = dev_net(skb->dev); @@ -1597,6 +1689,15 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; + DTRACE_TCP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE, + int, sk ? sk->sk_state : TCP_CLOSE, + int, DTRACE_NET_PROBE_INBOUND); process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; @@ -1660,6 +1761,18 @@ bad_packet: discard_it: /* Discard frame. */ + if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST) + DTRACE_TCP_NOCHECK(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, + ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, + sk ? sk->sk_state : TCP_CLOSE, + int, sk ? sk->sk_state : TCP_CLOSE, + int, DTRACE_NET_PROBE_INBOUND); kfree_skb(skb); return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fec2907b85e8..00ba8e607eff 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -340,6 +341,20 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) /* Linkage updates. */ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo); inet_twsk_put(tw); + + if (DTRACE_TCP_ENABLED(state__change) && + state != sk->sk_state) + DTRACE_TCP_NOCHECK(state__change, + struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, NULL, + struct tcp_sock * : tcpsinfo_t *, + tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, NULL, + int : tcplsinfo_t *, sk->sk_state, + int, state, + int, DTRACE_NET_PROBE_OUTBOUND); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1ea4322c3b0c..841fa0fd36fc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -41,6 +41,7 @@ #include #include #include +#include /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; @@ -1014,6 +1015,27 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) tcp_event_data_sent(tp, sk); + DTRACE_TCP(send, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, sk->sk_state, int, sk->sk_state, + int, DTRACE_NET_PROBE_OUTBOUND); + if (DTRACE_TCP_ENABLED(connect__request) && th->syn && + th->ack_seq == 0) + DTRACE_TCP_NOCHECK(connect__request, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, + ip_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, th, + int : tcplsinfo_t *, sk->sk_state, + int, sk->sk_state, + int, DTRACE_NET_PROBE_OUTBOUND); + if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); @@ -3251,6 +3273,13 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = tcp_time_stamp; tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); + DTRACE_TCP(state__change, struct sk_buff * : pktinfo_t *, NULL, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, ip_hdr(buff), + struct tcp_sock * : tcpsinfo_t *, tp, + struct tcphdr * : tcpinfo_t *, tcp_hdr(buff), + int : tcplsinfo_t *, TCP_CLOSE, + int, sk->sk_state, int, DTRACE_NET_PROBE_OUTBOUND); /* Send off SYN; include data in Fast Open. */ err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a390174b96de..1f9bce3fd8f6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -111,6 +111,7 @@ #include #include #include +#include #include #include #include "udp_impl.h" @@ -836,6 +837,13 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) uh->check = CSUM_MANGLED_0; send: + DTRACE_UDP(send, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, udp_sk(sk), + struct udphdr * : udpinfo_t *, uh); + err = ip_send_skb(sock_net(sk), skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { @@ -1314,9 +1322,16 @@ try_again: goto out_free; } - if (!peeked) + if (!peeked) { + DTRACE_UDP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, udp_sk(sk), + struct udphdr * : udpinfo_t *, udp_hdr(skb)); UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_ts_and_drops(msg, sk, skb); @@ -1529,6 +1544,15 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { + DTRACE_UDP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, + ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, + udp_sk(sk), + struct udphdr * : udpinfo_t *, + udp_hdr(skb)); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c1147acbc8c4..b9ddb6665aa3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -69,6 +69,7 @@ #include #include +#include static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, @@ -437,6 +438,20 @@ out: sock_put(sk); } +/* Since we want to trace send events in TCP prior to pushing the segment to + * IP - where the IP header is added - we need to construct an argument + * containing relevant IP info so that TCP probe consumers can utilize it. + */ +static inline void dtrace_tcp_build_ipv6hdr(struct in6_addr *saddr, + struct in6_addr *daddr, + struct ipv6hdr *ip6h) +{ + ip6h->version = 6; + ip6h->payload_len = 0; + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = *saddr; + ip6h->daddr = *daddr; +} static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi *fl, @@ -466,6 +481,32 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, skb_set_queue_mapping(skb, queue_mapping); rcu_read_lock(); + + if (DTRACE_TCP_ENABLED(send)) { + struct ipv6hdr ip6h; + + dtrace_tcp_build_ipv6hdr(&ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr, &ip6h); + + /* Do not supply tcp sk - addresses/ports are not + * committed yet - instead translators will fill them + * in from IP/TCP data. + */ + DTRACE_TCP_NOCHECK(send, + struct sk_buff * : pktinfo_t *, + NULL, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, &ip6h, + struct tcp_sock * : tcpsinfo_t *, + NULL, + struct tcphdr * : tcpinfo_t *, + tcp_hdr(skb), + int : tcplsinfo_t *, TCP_LISTEN, + int, TCP_LISTEN, + int, DTRACE_NET_PROBE_OUTBOUND); + } + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass); rcu_read_unlock(); @@ -819,6 +860,48 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); + if (DTRACE_TCP_ENABLED(send) || + DTRACE_TCP_ENABLED(accept__refused)) { + struct ipv6hdr ip6h; + + dtrace_tcp_build_ipv6hdr(&fl6.saddr, &fl6.daddr, + &ip6h); + + /* Do not supply tcp sk - addresses/ports are not + * committed yet - instead translators will fill them + * in from IP/TCP data. + */ + DTRACE_TCP_NOCHECK(send, + struct sk_buff * : pktinfo_t *, + NULL, + struct sock * : csinfo_t *, NULL, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, &ip6h, + struct tcp_sock * : tcpsinfo_t *, + NULL, + struct tcphdr * : tcpinfo_t *, t1, + int : tcplsinfo_t *, TCP_CLOSE, + int, TCP_CLOSE, + int, DTRACE_NET_PROBE_OUTBOUND); + if (rst && th->syn && th->ack == 0) + DTRACE_TCP_NOCHECK(accept__refused, + struct sk_buff * : + pktinfo_t *, NULL, + struct sock * : csinfo_t *, + NULL, + __dtrace_tcp_void_ip_t * : + ipinfo_t *, &ip6h, + struct tcp_sock * : + tcpsinfo_t *, NULL, + struct tcphdr * : + tcpinfo_t *, t1, + int : tcplsinfo_t *, + TCP_CLOSE, + int, TCP_CLOSE, + int, + DTRACE_NET_PROBE_OUTBOUND); + } + ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1369,7 +1452,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; const struct ipv6hdr *hdr; - struct sock *sk; + struct sock *sk = NULL; int ret; struct net *net = dev_net(skb->dev); @@ -1402,6 +1485,15 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!sk) goto no_tcp_socket; + DTRACE_TCP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, hdr, + struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, th, + int : tcplsinfo_t *, sk ? sk->sk_state : TCP_CLOSE, + int, sk ? sk->sk_state : TCP_CLOSE, + int, DTRACE_NET_PROBE_INBOUND); process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; @@ -1459,6 +1551,18 @@ bad_packet: } discard_it: + if (DTRACE_TCP_ENABLED(receive) && skb->pkt_type == PACKET_HOST) + DTRACE_TCP_NOCHECK(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + __dtrace_tcp_void_ip_t * : ipinfo_t *, + ipv6_hdr(skb), + struct tcp_sock * : tcpsinfo_t *, tcp_sk(sk), + struct tcphdr * : tcpinfo_t *, tcp_hdr(skb), + int : tcplsinfo_t *, + sk ? sk->sk_state : TCP_CLOSE, + int, sk ? sk->sk_state : TCP_CLOSE, + int, DTRACE_NET_PROBE_INBOUND); kfree_skb(skb); return 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7333f3575fc5..8deb823ada20 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -50,6 +50,7 @@ #include #include +#include #include #include "udp_impl.h" @@ -458,6 +459,12 @@ try_again: goto out_free; } if (!peeked) { + DTRACE_UDP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, udp_sk(sk), + struct udphdr * : udpinfo_t *, udp_hdr(skb)); if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); @@ -653,6 +660,15 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { + DTRACE_UDP(receive, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, + ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, + udp_sk(sk), + struct udphdr * : udpinfo_t *, + udp_hdr(skb)); UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INDATAGRAMS, is_udplite); @@ -1058,6 +1074,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6) uh->check = CSUM_MANGLED_0; send: + DTRACE_UDP(send, + struct sk_buff * : pktinfo_t *, skb, + struct sock * : csinfo_t *, sk, + void_ip_t * : ipinfo_t *, ip_hdr(skb), + struct udp_sock * : udpsinfo_t *, udp_sk(sk), + struct udphdr * : udpinfo_t *, uh); + err = ip6_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { -- 2.50.1