module_param_named(max_large_sockets, max_large_sockets, int, 0644);
MODULE_PARM_DESC(max_large_sockets, "Max number of large sockets (32k buffers).");
+#define sdp_cnt(var) do { (var)++; } while (0)
+static unsigned sdp_keepalive_probes_sent = 0;
+
+module_param_named(sdp_keepalive_probes_sent, sdp_keepalive_probes_sent, uint, 0644);
+MODULE_PARM_DESC(sdp_keepalive_probes_sent, "Total number of keepalive probes sent.");
+
static int curr_large_sockets = 0;
atomic_t sdp_current_mem_usage;
spinlock_t sdp_large_sockets_lock;
}
}
+void sdp_post_keepalive(struct sdp_sock *ssk)
+{
+ int rc;
+ struct ib_send_wr wr, *bad_wr;
+
+ sdp_dbg(&ssk->isk.sk, "%s\n", __func__);
+
+ memset(&wr, 0, sizeof(wr));
+
+ wr.next = NULL;
+ wr.wr_id = 0;
+ wr.sg_list = NULL;
+ wr.num_sge = 0;
+ wr.opcode = IB_WR_RDMA_WRITE;
+
+ rc = ib_post_send(ssk->qp, &wr, &bad_wr);
+ if (rc) {
+ sdp_dbg(&ssk->isk.sk, "ib_post_keepalive failed with status %d.\n", rc);
+ sdp_set_error(&ssk->isk.sk, -ECONNRESET);
+ wake_up(&ssk->wq);
+ }
+
+ sdp_cnt(sdp_keepalive_probes_sent);
+}
+
void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid)
{
struct sdp_buf *tx_req;
}
ssk->tx_wr.next = NULL;
- ssk->tx_wr.wr_id = ssk->tx_head;
+ ssk->tx_wr.wr_id = ssk->tx_head | SDP_OP_SEND;
ssk->tx_wr.sg_list = ssk->ibsge;
ssk->tx_wr.num_sge = frags + 1;
ssk->tx_wr.opcode = IB_WR_SEND;
__kfree_skb(skb);
}
}
- } else {
+ } else if (likely(wc->wr_id & SDP_OP_SEND)) {
skb = sdp_send_completion(ssk, wc->wr_id);
if (unlikely(!skb))
return;
}
sk_stream_write_space(&ssk->isk.sk);
+ } else {
+ sdp_cnt(sdp_keepalive_probes_sent);
+
+ if (likely(!wc->status))
+ return;
+
+ sdp_dbg(&ssk->isk.sk, " %s consumes KEEPALIVE status %d\n",
+ __func__, wc->status);
+
+ if (wc->status == IB_WC_WR_FLUSH_ERR)
+ return;
+
+ sdp_set_error(&ssk->isk.sk, -ECONNRESET);
+ wake_up(&ssk->wq);
+
+ return;
}
if (likely(!wc->status)) {
module_param_named(send_poll_thresh, send_poll_thresh, int, 0644);
MODULE_PARM_DESC(send_poll_thresh, "Send message size thresh hold over which to start polling.");
+static unsigned int sdp_keepalive_time = SDP_KEEPALIVE_TIME;
+
+module_param_named(sdp_keepalive_time, sdp_keepalive_time, uint, 0644);
+MODULE_PARM_DESC(sdp_keepalive_time, "Default idle time in seconds before keepalive probe sent.");
+
struct workqueue_struct *sdp_workqueue;
static struct list_head sock_list;
DEFINE_RWLOCK(device_removal_lock);
+static inline unsigned int sdp_keepalive_time_when(const struct sdp_sock *ssk)
+{
+ return ssk->keepalive_time ? : sdp_keepalive_time;
+}
+
inline void sdp_add_sock(struct sdp_sock *ssk)
{
spin_lock_irq(&sock_list_lock);
kfree(ssk->tx_ring);
}
+
+static void sdp_reset_keepalive_timer(struct sock *sk, unsigned long len)
+{
+ struct sdp_sock *ssk = sdp_sk(sk);
+
+ sdp_dbg(sk, "%s\n", __func__);
+
+ ssk->keepalive_tx_head = ssk->tx_head;
+ ssk->keepalive_rx_head = ssk->rx_head;
+
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+
+static void sdp_delete_keepalive_timer(struct sock *sk)
+{
+ struct sdp_sock *ssk = sdp_sk(sk);
+
+ sdp_dbg(sk, "%s\n", __func__);
+
+ ssk->keepalive_tx_head = 0;
+ ssk->keepalive_rx_head = 0;
+
+ sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void sdp_keepalive_timer(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct sdp_sock *ssk = sdp_sk(sk);
+
+ sdp_dbg(sk, "%s\n", __func__);
+
+ /* Only process if the socket is not in use */
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sdp_reset_keepalive_timer(sk, HZ / 20);
+ goto out;
+ }
+
+ if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_LISTEN ||
+ sk->sk_state == TCP_CLOSE)
+ goto out;
+
+ if (ssk->keepalive_tx_head == ssk->tx_head &&
+ ssk->keepalive_rx_head == ssk->rx_head)
+ sdp_post_keepalive(ssk);
+
+ sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(ssk));
+
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+static void sdp_init_timer(struct sock *sk)
+{
+ init_timer(&sk->sk_timer);
+
+ sk->sk_timer.function = sdp_keepalive_timer;
+ sk->sk_timer.data = (unsigned long)sk;
+}
+
+static void sdp_set_keepalive(struct sock *sk, int val)
+{
+ sdp_dbg(sk, "%s %d\n", __func__, val);
+
+ if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
+ return;
+
+ if (val && !sock_flag(sk, SOCK_KEEPOPEN))
+ sdp_start_keepalive_timer(sk);
+ else if (!val)
+ sdp_delete_keepalive_timer(sk);
+}
+
+void sdp_start_keepalive_timer(struct sock *sk)
+{
+ sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(sdp_sk(sk)));
+}
+
void sdp_reset_sk(struct sock *sk, int rc)
{
struct sdp_sock *ssk = sdp_sk(sk);
sdp_dbg(sk, "%s\n", __func__);
+ sdp_delete_keepalive_timer(sk);
+
sk->sk_shutdown = SHUTDOWN_MASK;
if (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_SYN_SENT) {
sdp_set_state(sk, TCP_CLOSE);
int err = 0;
sdp_dbg(sk, "%s\n", __func__);
- if (level != SOL_TCP)
- return -ENOPROTOOPT;
-
if (optlen < sizeof(int))
return -EINVAL;
lock_sock(sk);
+ /* SOCK_KEEPALIVE is really a SOL_SOCKET level option but there
+ * is a problem handling it at that level. In order to start
+ * the keepalive timer on an SDP socket, we must call an SDP
+ * specific routine. Since sock_setsockopt() can not be modifed
+ * to understand SDP, the application must pass that option
+ * through to us. Since SO_KEEPALIVE and TCP_DEFER_ACCEPT both
+ * use the same optname, the level must not be SOL_TCP or SOL_SOCKET
+ */
+ if (level == PF_INET_SDP && optname == SO_KEEPALIVE) {
+ sdp_set_keepalive(sk, val);
+ if (val)
+ sock_set_flag(sk, SOCK_KEEPOPEN);
+ else
+ sock_reset_flag(sk, SOCK_KEEPOPEN);
+ goto out;
+ }
+
+ if (level != SOL_TCP) {
+ err = -ENOPROTOOPT;
+ goto out;
+ }
+
switch (optname) {
case TCP_NODELAY:
if (val) {
sdp_push_pending_frames(sk);
}
break;
+ case TCP_KEEPIDLE:
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ err = -EINVAL;
+ else {
+ ssk->keepalive_time = val * HZ;
+
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ sdp_reset_keepalive_timer(sk, ssk->keepalive_time);
+ }
+ break;
default:
err = -ENOPROTOOPT;
break;
}
+out:
release_sock(sk);
return err;
}
case TCP_CORK:
val = !!(ssk->nonagle&TCP_NAGLE_CORK);
break;
+ case TCP_KEEPIDLE:
+ val = (ssk->keepalive_time ? : sdp_keepalive_time) / HZ;
+ break;
default:
return -ENOPROTOOPT;
}
sk->sk_destruct = sdp_destruct;
+ sdp_init_timer(sk);
+
sock->ops = &sdp_proto_ops;
sock->state = SS_UNCONNECTED;