]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: Add interface for receive MSG latency trace
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Fri, 11 Dec 2015 20:01:56 +0000 (12:01 -0800)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 26 Feb 2016 02:51:46 +0000 (18:51 -0800)
Socket option to tap receive path latency.
SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
with parameter,
struct rds_rx_trace_so {
u8 rx_traces;
        u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
}

CMSG:
RDS_CMSG_RXPATH_LATENCY(recvmsg)
Returns rds message latencies in various stages of receive
path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
socket option. Legitimate points are defined in
enum rds_message_rxpath_latency. More points can be added in
future.

CSMG format:
struct rds_cmsg_rx_trace {
        u8 rx_traces;
        u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
        u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
}

Receive MSG trace points: RDS message Receive Path Latency points
enum rds_message_rxpath_latency {
RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
RDS_MSG_RX_DGRAM_REASSEMBLE,
RDS_MSG_RX_DGRAM_DELIVERED,
RDS_MSG_RX_DGRAM_TRACE_MAX
}

Tested-by: Namrata Jampani <namrata.jampani@oracle.com>
Reviewed-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Orabug: 22630180
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
include/uapi/linux/rds.h
net/rds/af_rds.c
net/rds/ib_recv.c
net/rds/rds.h
net/rds/recv.c
net/rds/tcp_recv.c

index 40090ebf54333e40aee18486e4f4bdfd07db9f84..4667926a17cb0d6c9c7cffba4b1882f473f81f91 100644 (file)
 #define        RDS_TRANS_COUNT 3
 #define        RDS_TRANS_NONE  (~0)
 
+/* Socket option to tap receive path latency
+ *     SO_RDS: SO_RDS_MSG_RXPATH_LATENCY
+ *     Format used struct rds_rx_trace_so
+ */
+#define SO_RDS_MSG_RXPATH_LATENCY      10
+
 /*
  * ioctl commands for SOL_RDS
 */
 
 typedef u_int8_t         rds_tos_t;
 
+/* RDS message Receive Path Latency points */
+enum rds_message_rxpath_latency {
+       RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
+       RDS_MSG_RX_DGRAM_REASSEMBLE,
+       RDS_MSG_RX_DGRAM_DELIVERED,
+       RDS_MSG_RX_DGRAM_TRACE_MAX
+};
+
+struct rds_rx_trace_so {
+       u8 rx_traces;
+       u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
+};
+
+struct rds_cmsg_rx_trace {
+       u8 rx_traces;
+       u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX];
+       u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
+};
+
 /*
  * Control message types for SOL_RDS.
  *
@@ -104,6 +129,12 @@ typedef u_int8_t         rds_tos_t;
  *     the same as for the GET_MR setsockopt.
  * RDS_CMSG_RDMA_SEND_STATUS (recvmsg)
  *     Returns the status of a completed RDMA/async send operation.
+ * RDS_CMSG_RXPATH_LATENCY(recvmsg)
+ *     Returns rds message latencies in various stages of receive
+ *     path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY
+ *     socket option. Legitimate points are defined in
+ *     enum rds_message_rxpath_latency. More points can be added in
+ *     future. CSMG format is struct rds_cmsg_rx_trace.
  */
 #define RDS_CMSG_RDMA_ARGS             1
 #define RDS_CMSG_RDMA_DEST             2
@@ -115,6 +146,7 @@ typedef u_int8_t         rds_tos_t;
 #define RDS_CMSG_MASKED_ATOMIC_FADD     8
 #define RDS_CMSG_MASKED_ATOMIC_CSWP     9
 #define RDS_CMSG_ASYNC_SEND             10
+#define RDS_CMSG_RXPATH_LATENCY                11
 
 #define RDS_INFO_FIRST                 10000
 #define RDS_INFO_COUNTERS              10000
index 8d131d33436097ac84c2bb976a16cb7f2ae21c7c..53e808296ebb3bf96bb9762c3bd8255d0af7bf8b 100644 (file)
@@ -383,6 +383,31 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
        return 0;
 }
 
+static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval,
+                                int optlen)
+{
+       struct rds_rx_trace_so trace;
+       int i;
+
+       if (optlen != sizeof(struct rds_rx_trace_so))
+               return -EFAULT;
+
+       if (copy_from_user(&trace, (struct rds_rx_trace_so *)optval, sizeof(trace)))
+               return -EFAULT;
+
+       rs->rs_rx_traces = trace.rx_traces;
+       for (i = 0; i < rs->rs_rx_traces; i++) {
+               if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+                       rs->rs_rx_traces = 0;
+                       return -EFAULT;
+               }
+               rs->rs_rx_trace[i] = trace.rx_trace_pos[i];
+       }
+
+       return 0;
+}
+
+
 static int rds_setsockopt(struct socket *sock, int level, int optname,
                          char __user *optval, unsigned int optlen)
 {
@@ -426,6 +451,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
                ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
                release_sock(sock->sk);
                break;
+       case SO_RDS_MSG_RXPATH_LATENCY:
+               ret = rds_recv_track_latency(rs, optval, optlen);
+               break;
        default:
                ret = -ENOPROTOOPT;
        }
@@ -576,6 +604,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
        rs->rs_tos = 0;
        rs->rs_conn = 0;
        rs->rs_netfilter_enabled = 0;
+       rs->rs_rx_traces = 0;
 
        if (rs->rs_bound_addr)
                printk(KERN_CRIT "bound addr %x at create\n", rs->rs_bound_addr);
index 9343810aeb06b03e9032235be080fa9884e0da4c..2e99bcb0fe88d4997f2a7bdc37c411f1cd4a2af8 100644 (file)
@@ -1102,8 +1102,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
                ic->i_ibinc = ibinc;
 
                hdr = &ibinc->ii_inc.i_hdr;
+               ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
+                               local_clock();
                memcpy(hdr, ihdr, sizeof(*hdr));
                ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
+               ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
+                               local_clock();
 
                rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
                         ic->i_recv_data_rem, hdr->h_flags);
index e4a9266892c12c0cc792631a00d8934b7f32342b..a26152110da46da918eede578c54e71d782d6222 100644 (file)
@@ -270,6 +270,11 @@ struct rds_ext_header_rdma_bytes {
 };
 
 #define __RDS_EXTHDR_MAX       16 /* for now */
+#define RDS_RX_MAX_TRACES      (RDS_MSG_RX_DGRAM_TRACE_MAX + 1)
+#define        RDS_MSG_RX_HDR          0
+#define        RDS_MSG_RX_START        1
+#define        RDS_MSG_RX_END          2
+#define        RDS_MSG_RX_CMSG         3
 
 struct rds_incoming {
        atomic_t                i_refcount;
@@ -285,6 +290,7 @@ struct rds_incoming {
 
        rds_rdma_cookie_t       i_rdma_cookie;
        struct timeval          i_rx_tstamp;
+       u64                     i_rx_lat_trace[RDS_RX_MAX_TRACES];
 };
 
 struct rds_mr {
@@ -595,6 +601,10 @@ struct rds_sock {
        int                     rs_netfilter_enabled;
 
        u8                      rs_tos;
+
+       /* Socket receive path trace points*/
+       u8                      rs_rx_traces;
+       u8                      rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
index ae827970e36a1ff05bfb166966c9504a243e232c..c00d28759d385abda3cbd43cea2c83df5f433eb2 100644 (file)
@@ -68,6 +68,8 @@ rds_recv_ok(struct sock *sk, struct sk_buff *skb)
 void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
                  __be32 saddr)
 {
+       int i;
+
        atomic_set(&inc->i_refcount, 1);
        INIT_LIST_HEAD(&inc->i_item);
        inc->i_conn = conn;
@@ -77,6 +79,9 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
        inc->i_skb   = NULL;
        inc->i_rx_tstamp.tv_sec = 0;
        inc->i_rx_tstamp.tv_usec = 0;
+
+       for (i = 0; i < RDS_RX_MAX_TRACES; i++)
+               inc->i_rx_lat_trace[i] = 0;
 }
 EXPORT_SYMBOL_GPL(rds_inc_init);
 
@@ -554,6 +559,7 @@ rds_recv_local(struct rds_connection *conn, __be32 saddr, __be32 daddr,
                                do_gettimeofday(&inc->i_rx_tstamp);
                        rds_inc_addref(inc);
                        list_add_tail(&inc->i_item, &rs->rs_recv_queue);
+                       inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock();
                        __rds_wake_sk_sleep(sk);
                }
        } else {
@@ -726,7 +732,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
                ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
                                sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
                if (ret)
-                       return ret;
+                       goto out;
        }
 
        if ((inc->i_rx_tstamp.tv_sec != 0) &&
@@ -735,10 +741,30 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
                               sizeof(struct timeval),
                               &inc->i_rx_tstamp);
                if (ret)
-                       return ret;
+                       goto out;
        }
 
-       return 0;
+       if (rs->rs_rx_traces) {
+               struct rds_cmsg_rx_trace t;
+               int i, j;
+
+               inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock();
+               t.rx_traces =  rs->rs_rx_traces;
+               for (i = 0; i < rs->rs_rx_traces; i++) {
+                       j = rs->rs_rx_trace[i];
+                       t.rx_trace_pos[j] = j;
+                       t.rx_trace[j] = inc->i_rx_lat_trace[j + 1] -
+                                         inc->i_rx_lat_trace[j];
+               }
+
+               ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RXPATH_LATENCY,
+                               sizeof(t), &t);
+               if (ret)
+                       goto out;
+       }
+
+out:
+       return ret;
 }
 
 int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
index f941f60963f9a4ad31158b412aad5ea030b25804..4ee2145ca935d72127d07a5d69fdc0364f2de0d9 100644 (file)
@@ -178,6 +178,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
                        tc->t_tinc = tinc;
                        rdsdebug("alloced tinc %p\n", tinc);
                        rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+                       tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
+                                       local_clock();
+
                        /*
                         * XXX * we might be able to use the __ variants when
                         * we've already serialized at a higher level.
@@ -202,6 +205,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
                                /* could be 0 for a 0 len message */
                                tc->t_tinc_data_rem =
                                        be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
+                               tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_START] =
+                                       local_clock();
                        }
                }