From: Santosh Shilimkar Date: Fri, 11 Dec 2015 20:01:56 +0000 (-0800) Subject: RDS: Add interface for receive MSG latency trace X-Git-Tag: v4.1.12-92~194^2 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=67fb74441f1bf4645987c366d3e60b0c26305269;p=users%2Fjedix%2Flinux-maple.git RDS: Add interface for receive MSG latency trace Socket option to tap receive path latency. SO_RDS: SO_RDS_MSG_RXPATH_LATENCY with parameter, struct rds_rx_trace_so { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; } CMSG: RDS_CMSG_RXPATH_LATENCY(recvmsg) Returns rds message latencies in various stages of receive path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY socket option. Legitimate points are defined in enum rds_message_rxpath_latency. More points can be added in future. CSMG format: struct rds_cmsg_rx_trace { u8 rx_traces; u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; } Receive MSG trace points: RDS message Receive Path Latency points enum rds_message_rxpath_latency { RDS_MSG_RX_HDR_TO_DGRAM_START = 0, RDS_MSG_RX_DGRAM_REASSEMBLE, RDS_MSG_RX_DGRAM_DELIVERED, RDS_MSG_RX_DGRAM_TRACE_MAX } Tested-by: Namrata Jampani Reviewed-by: Ajaykumar Hotchandani Reviewed-by: Sowmini Varadhan Orabug: 22630180 Signed-off-by: Santosh Shilimkar --- diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 40090ebf54333..4667926a17cb0 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -75,6 +75,12 @@ #define RDS_TRANS_COUNT 3 #define RDS_TRANS_NONE (~0) +/* Socket option to tap receive path latency + * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY + * Format used struct rds_rx_trace_so + */ +#define SO_RDS_MSG_RXPATH_LATENCY 10 + /* * ioctl commands for SOL_RDS */ @@ -86,6 +92,25 @@ typedef u_int8_t rds_tos_t; +/* RDS message Receive Path Latency points */ +enum rds_message_rxpath_latency { + RDS_MSG_RX_HDR_TO_DGRAM_START = 0, + RDS_MSG_RX_DGRAM_REASSEMBLE, + RDS_MSG_RX_DGRAM_DELIVERED, + RDS_MSG_RX_DGRAM_TRACE_MAX +}; + +struct rds_rx_trace_so { + u8 rx_traces; + u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; +}; + +struct rds_cmsg_rx_trace { + u8 rx_traces; + u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; + u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; +}; + /* * Control message types for SOL_RDS. * @@ -104,6 +129,12 @@ typedef u_int8_t rds_tos_t; * the same as for the GET_MR setsockopt. * RDS_CMSG_RDMA_SEND_STATUS (recvmsg) * Returns the status of a completed RDMA/async send operation. + * RDS_CMSG_RXPATH_LATENCY(recvmsg) + * Returns rds message latencies in various stages of receive + * path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY + * socket option. Legitimate points are defined in + * enum rds_message_rxpath_latency. More points can be added in + * future. CSMG format is struct rds_cmsg_rx_trace. */ #define RDS_CMSG_RDMA_ARGS 1 #define RDS_CMSG_RDMA_DEST 2 @@ -115,6 +146,7 @@ typedef u_int8_t rds_tos_t; #define RDS_CMSG_MASKED_ATOMIC_FADD 8 #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_CMSG_ASYNC_SEND 10 +#define RDS_CMSG_RXPATH_LATENCY 11 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 8d131d3343609..53e808296ebb3 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -383,6 +383,31 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, return 0; } +static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, + int optlen) +{ + struct rds_rx_trace_so trace; + int i; + + if (optlen != sizeof(struct rds_rx_trace_so)) + return -EFAULT; + + if (copy_from_user(&trace, (struct rds_rx_trace_so *)optval, sizeof(trace))) + return -EFAULT; + + rs->rs_rx_traces = trace.rx_traces; + for (i = 0; i < rs->rs_rx_traces; i++) { + if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + rs->rs_rx_traces = 0; + return -EFAULT; + } + rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; + } + + return 0; +} + + static int rds_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -426,6 +451,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, ret = rds_enable_recvtstamp(sock->sk, optval, optlen); release_sock(sock->sk); break; + case SO_RDS_MSG_RXPATH_LATENCY: + ret = rds_recv_track_latency(rs, optval, optlen); + break; default: ret = -ENOPROTOOPT; } @@ -576,6 +604,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) rs->rs_tos = 0; rs->rs_conn = 0; rs->rs_netfilter_enabled = 0; + rs->rs_rx_traces = 0; if (rs->rs_bound_addr) printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 9343810aeb06b..2e99bcb0fe88d 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1102,8 +1102,12 @@ static void rds_ib_process_recv(struct rds_connection *conn, ic->i_ibinc = ibinc; hdr = &ibinc->ii_inc.i_hdr; + ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = + local_clock(); memcpy(hdr, ihdr, sizeof(*hdr)); ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); + ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] = + local_clock(); rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, ic->i_recv_data_rem, hdr->h_flags); diff --git a/net/rds/rds.h b/net/rds/rds.h index e4a9266892c12..a26152110da46 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -270,6 +270,11 @@ struct rds_ext_header_rdma_bytes { }; #define __RDS_EXTHDR_MAX 16 /* for now */ +#define RDS_RX_MAX_TRACES (RDS_MSG_RX_DGRAM_TRACE_MAX + 1) +#define RDS_MSG_RX_HDR 0 +#define RDS_MSG_RX_START 1 +#define RDS_MSG_RX_END 2 +#define RDS_MSG_RX_CMSG 3 struct rds_incoming { atomic_t i_refcount; @@ -285,6 +290,7 @@ struct rds_incoming { rds_rdma_cookie_t i_rdma_cookie; struct timeval i_rx_tstamp; + u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; }; struct rds_mr { @@ -595,6 +601,10 @@ struct rds_sock { int rs_netfilter_enabled; u8 rs_tos; + + /* Socket receive path trace points*/ + u8 rs_rx_traces; + u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) diff --git a/net/rds/recv.c b/net/rds/recv.c index ae827970e36a1..c00d28759d385 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -68,6 +68,8 @@ rds_recv_ok(struct sock *sk, struct sk_buff *skb) void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, __be32 saddr) { + int i; + atomic_set(&inc->i_refcount, 1); INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; @@ -77,6 +79,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, inc->i_skb = NULL; inc->i_rx_tstamp.tv_sec = 0; inc->i_rx_tstamp.tv_usec = 0; + + for (i = 0; i < RDS_RX_MAX_TRACES; i++) + inc->i_rx_lat_trace[i] = 0; } EXPORT_SYMBOL_GPL(rds_inc_init); @@ -554,6 +559,7 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr, do_gettimeofday(&inc->i_rx_tstamp); rds_inc_addref(inc); list_add_tail(&inc->i_item, &rs->rs_recv_queue); + inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); __rds_wake_sk_sleep(sk); } } else { @@ -726,7 +732,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); if (ret) - return ret; + goto out; } if ((inc->i_rx_tstamp.tv_sec != 0) && @@ -735,10 +741,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, sizeof(struct timeval), &inc->i_rx_tstamp); if (ret) - return ret; + goto out; } - return 0; + if (rs->rs_rx_traces) { + struct rds_cmsg_rx_trace t; + int i, j; + + inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); + t.rx_traces = rs->rs_rx_traces; + for (i = 0; i < rs->rs_rx_traces; i++) { + j = rs->rs_rx_trace[i]; + t.rx_trace_pos[j] = j; + t.rx_trace[j] = inc->i_rx_lat_trace[j + 1] - + inc->i_rx_lat_trace[j]; + } + + ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY, + sizeof(t), &t); + if (ret) + goto out; + } + +out: + return ret; } int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index f941f60963f9a..4ee2145ca935d 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -178,6 +178,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, tc->t_tinc = tinc; rdsdebug("alloced tinc %p\n", tinc); rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = + local_clock(); + /* * XXX * we might be able to use the __ variants when * we've already serialized at a higher level. @@ -202,6 +205,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, /* could be 0 for a 0 len message */ tc->t_tinc_data_rem = be32_to_cpu(tinc->ti_inc.i_hdr.h_len); + tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] = + local_clock(); } }