From 579ed040aad9ff380be59b234b9651e3d59e6362 Mon Sep 17 00:00:00 2001 From: Jim Mott Date: Thu, 11 Oct 2007 09:33:09 -0700 Subject: [PATCH] SDP: Add keepalive support Signed-off-by: Jim Mott --- drivers/infiniband/ulp/sdp/sdp.h | 10 ++ drivers/infiniband/ulp/sdp/sdp_bcopy.c | 51 +++++++++- drivers/infiniband/ulp/sdp/sdp_cma.c | 8 +- drivers/infiniband/ulp/sdp/sdp_main.c | 134 ++++++++++++++++++++++++- 4 files changed, 194 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h index b40a3503f1cc..5366df4d6591 100644 --- a/drivers/infiniband/ulp/sdp/sdp.h +++ b/drivers/infiniband/ulp/sdp/sdp.h @@ -42,6 +42,7 @@ extern int sdp_data_debug_level; #define SDP_RESOLVE_TIMEOUT 1000 #define SDP_ROUTE_TIMEOUT 1000 #define SDP_RETRY_COUNT 5 +#define SDP_KEEPALIVE_TIME (120 * 60 * HZ) #define SDP_TX_SIZE 0x40 #define SDP_RX_SIZE 0x40 @@ -51,6 +52,7 @@ extern int sdp_data_debug_level; #define SDP_NUM_WC 4 #define SDP_OP_RECV 0x800000000LL +#define SDP_OP_SEND 0x400000000LL enum sdp_mid { SDP_MID_HELLO = 0x0, @@ -115,6 +117,12 @@ struct sdp_sock { int time_wait; + unsigned keepalive_time; + + /* tx_head/rx_head when keepalive timer started */ + unsigned keepalive_tx_head; + unsigned keepalive_rx_head; + /* Data below will be reset on error */ /* rdma specific */ struct rdma_cm_id *id; @@ -219,5 +227,7 @@ void sdp_urg(struct sdp_sock *ssk, struct sk_buff *skb); void sdp_add_sock(struct sdp_sock *ssk); void sdp_remove_sock(struct sdp_sock *ssk); void sdp_remove_large_sock(void); +void sdp_post_keepalive(struct sdp_sock *ssk); +void sdp_start_keepalive_timer(struct sock *sk); #endif diff --git a/drivers/infiniband/ulp/sdp/sdp_bcopy.c b/drivers/infiniband/ulp/sdp/sdp_bcopy.c index d33525df746f..0ae8ec29cbe9 100644 --- a/drivers/infiniband/ulp/sdp/sdp_bcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_bcopy.c @@ -60,6 +60,12 @@ static int max_large_sockets = 1000; module_param_named(max_large_sockets, max_large_sockets, int, 0644); MODULE_PARM_DESC(max_large_sockets, "Max number of large sockets (32k buffers)."); +#define sdp_cnt(var) do { (var)++; } while (0) +static unsigned sdp_keepalive_probes_sent = 0; + +module_param_named(sdp_keepalive_probes_sent, sdp_keepalive_probes_sent, uint, 0644); +MODULE_PARM_DESC(sdp_keepalive_probes_sent, "Total number of keepalive probes sent."); + static int curr_large_sockets = 0; atomic_t sdp_current_mem_usage; spinlock_t sdp_large_sockets_lock; @@ -107,6 +113,31 @@ static void sdp_fin(struct sock *sk) } } +void sdp_post_keepalive(struct sdp_sock *ssk) +{ + int rc; + struct ib_send_wr wr, *bad_wr; + + sdp_dbg(&ssk->isk.sk, "%s\n", __func__); + + memset(&wr, 0, sizeof(wr)); + + wr.next = NULL; + wr.wr_id = 0; + wr.sg_list = NULL; + wr.num_sge = 0; + wr.opcode = IB_WR_RDMA_WRITE; + + rc = ib_post_send(ssk->qp, &wr, &bad_wr); + if (rc) { + sdp_dbg(&ssk->isk.sk, "ib_post_keepalive failed with status %d.\n", rc); + sdp_set_error(&ssk->isk.sk, -ECONNRESET); + wake_up(&ssk->wq); + } + + sdp_cnt(sdp_keepalive_probes_sent); +} + void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid) { struct sdp_buf *tx_req; @@ -158,7 +189,7 @@ void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid) } ssk->tx_wr.next = NULL; - ssk->tx_wr.wr_id = ssk->tx_head; + ssk->tx_wr.wr_id = ssk->tx_head | SDP_OP_SEND; ssk->tx_wr.sg_list = ssk->ibsge; ssk->tx_wr.num_sge = frags + 1; ssk->tx_wr.opcode = IB_WR_SEND; @@ -589,7 +620,7 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc) __kfree_skb(skb); } } - } else { + } else if (likely(wc->wr_id & SDP_OP_SEND)) { skb = sdp_send_completion(ssk, wc->wr_id); if (unlikely(!skb)) return; @@ -605,6 +636,22 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc) } sk_stream_write_space(&ssk->isk.sk); + } else { + sdp_cnt(sdp_keepalive_probes_sent); + + if (likely(!wc->status)) + return; + + sdp_dbg(&ssk->isk.sk, " %s consumes KEEPALIVE status %d\n", + __func__, wc->status); + + if (wc->status == IB_WC_WR_FLUSH_ERR) + return; + + sdp_set_error(&ssk->isk.sk, -ECONNRESET); + wake_up(&ssk->wq); + + return; } if (likely(!wc->status)) { diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c b/drivers/infiniband/ulp/sdp/sdp_cma.c index ffa90e758922..8da10298a5d3 100644 --- a/drivers/infiniband/ulp/sdp/sdp_cma.c +++ b/drivers/infiniband/ulp/sdp/sdp_cma.c @@ -268,8 +268,8 @@ static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id, sk->sk_state = TCP_ESTABLISHED; - /* TODO: If SOCK_KEEPOPEN set, need to reset and start - keepalive timer here */ + if (sock_flag(sk, SOCK_KEEPOPEN)) + sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; @@ -307,8 +307,8 @@ int sdp_connected_handler(struct sock *sk, struct rdma_cm_event *event) sk->sk_state = TCP_ESTABLISHED; - /* TODO: If SOCK_KEEPOPEN set, need to reset and start - keepalive timer here */ + if (sock_flag(sk, SOCK_KEEPOPEN)) + sdp_start_keepalive_timer(sk); if (sock_flag(sk, SOCK_DEAD)) return 0; diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index 1570c8155c59..0cbeccba00c9 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -117,6 +117,11 @@ static int send_poll_thresh = 8192; module_param_named(send_poll_thresh, send_poll_thresh, int, 0644); MODULE_PARM_DESC(send_poll_thresh, "Send message size thresh hold over which to start polling."); +static unsigned int sdp_keepalive_time = SDP_KEEPALIVE_TIME; + +module_param_named(sdp_keepalive_time, sdp_keepalive_time, uint, 0644); +MODULE_PARM_DESC(sdp_keepalive_time, "Default idle time in seconds before keepalive probe sent."); + struct workqueue_struct *sdp_workqueue; static struct list_head sock_list; @@ -124,6 +129,11 @@ static spinlock_t sock_list_lock; DEFINE_RWLOCK(device_removal_lock); +static inline unsigned int sdp_keepalive_time_when(const struct sdp_sock *ssk) +{ + return ssk->keepalive_time ? : sdp_keepalive_time; +} + inline void sdp_add_sock(struct sdp_sock *ssk) { spin_lock_irq(&sock_list_lock); @@ -221,6 +231,86 @@ static void sdp_destroy_qp(struct sdp_sock *ssk) kfree(ssk->tx_ring); } + +static void sdp_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + struct sdp_sock *ssk = sdp_sk(sk); + + sdp_dbg(sk, "%s\n", __func__); + + ssk->keepalive_tx_head = ssk->tx_head; + ssk->keepalive_rx_head = ssk->rx_head; + + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +static void sdp_delete_keepalive_timer(struct sock *sk) +{ + struct sdp_sock *ssk = sdp_sk(sk); + + sdp_dbg(sk, "%s\n", __func__); + + ssk->keepalive_tx_head = 0; + ssk->keepalive_rx_head = 0; + + sk_stop_timer(sk, &sk->sk_timer); +} + +static void sdp_keepalive_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct sdp_sock *ssk = sdp_sk(sk); + + sdp_dbg(sk, "%s\n", __func__); + + /* Only process if the socket is not in use */ + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + sdp_reset_keepalive_timer(sk, HZ / 20); + goto out; + } + + if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_LISTEN || + sk->sk_state == TCP_CLOSE) + goto out; + + if (ssk->keepalive_tx_head == ssk->tx_head && + ssk->keepalive_rx_head == ssk->rx_head) + sdp_post_keepalive(ssk); + + sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(ssk)); + +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static void sdp_init_timer(struct sock *sk) +{ + init_timer(&sk->sk_timer); + + sk->sk_timer.function = sdp_keepalive_timer; + sk->sk_timer.data = (unsigned long)sk; +} + +static void sdp_set_keepalive(struct sock *sk, int val) +{ + sdp_dbg(sk, "%s %d\n", __func__, val); + + if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) + return; + + if (val && !sock_flag(sk, SOCK_KEEPOPEN)) + sdp_start_keepalive_timer(sk); + else if (!val) + sdp_delete_keepalive_timer(sk); +} + +void sdp_start_keepalive_timer(struct sock *sk) +{ + sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(sdp_sk(sk))); +} + void sdp_reset_sk(struct sock *sk, int rc) { struct sdp_sock *ssk = sdp_sk(sk); @@ -365,6 +455,8 @@ static void sdp_close(struct sock *sk, long timeout) sdp_dbg(sk, "%s\n", __func__); + sdp_delete_keepalive_timer(sk); + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_SYN_SENT) { sdp_set_state(sk, TCP_CLOSE); @@ -820,9 +912,6 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname, int err = 0; sdp_dbg(sk, "%s\n", __func__); - if (level != SOL_TCP) - return -ENOPROTOOPT; - if (optlen < sizeof(int)) return -EINVAL; @@ -831,6 +920,28 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); + /* SOCK_KEEPALIVE is really a SOL_SOCKET level option but there + * is a problem handling it at that level. In order to start + * the keepalive timer on an SDP socket, we must call an SDP + * specific routine. Since sock_setsockopt() can not be modifed + * to understand SDP, the application must pass that option + * through to us. Since SO_KEEPALIVE and TCP_DEFER_ACCEPT both + * use the same optname, the level must not be SOL_TCP or SOL_SOCKET + */ + if (level == PF_INET_SDP && optname == SO_KEEPALIVE) { + sdp_set_keepalive(sk, val); + if (val) + sock_set_flag(sk, SOCK_KEEPOPEN); + else + sock_reset_flag(sk, SOCK_KEEPOPEN); + goto out; + } + + if (level != SOL_TCP) { + err = -ENOPROTOOPT; + goto out; + } + switch (optname) { case TCP_NODELAY: if (val) { @@ -869,11 +980,23 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname, sdp_push_pending_frames(sk); } break; + case TCP_KEEPIDLE: + if (val < 1 || val > MAX_TCP_KEEPIDLE) + err = -EINVAL; + else { + ssk->keepalive_time = val * HZ; + + if (sock_flag(sk, SOCK_KEEPOPEN) && + !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + sdp_reset_keepalive_timer(sk, ssk->keepalive_time); + } + break; default: err = -ENOPROTOOPT; break; } +out: release_sock(sk); return err; } @@ -906,6 +1029,9 @@ static int sdp_getsockopt(struct sock *sk, int level, int optname, case TCP_CORK: val = !!(ssk->nonagle&TCP_NAGLE_CORK); break; + case TCP_KEEPIDLE: + val = (ssk->keepalive_time ? : sdp_keepalive_time) / HZ; + break; default: return -ENOPROTOOPT; } @@ -1692,6 +1818,8 @@ static int sdp_create_socket(struct socket *sock, int protocol) sk->sk_destruct = sdp_destruct; + sdp_init_timer(sk); + sock->ops = &sdp_proto_ops; sock->state = SS_UNCONNECTED; -- 2.50.1