]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
SDP: Add keepalive support
authorJim Mott <jim@mellanox.com>
Thu, 11 Oct 2007 16:33:09 +0000 (09:33 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Tue, 6 Oct 2015 12:04:09 +0000 (05:04 -0700)
Signed-off-by: Jim Mott <jim@mellanox.com>
drivers/infiniband/ulp/sdp/sdp.h
drivers/infiniband/ulp/sdp/sdp_bcopy.c
drivers/infiniband/ulp/sdp/sdp_cma.c
drivers/infiniband/ulp/sdp/sdp_main.c

index b40a3503f1cc225c38d3c02c5b131536fa149337..5366df4d6591bc0f3cae7367fdc8b9d45644fc60 100644 (file)
@@ -42,6 +42,7 @@ extern int sdp_data_debug_level;
 #define SDP_RESOLVE_TIMEOUT 1000
 #define SDP_ROUTE_TIMEOUT 1000
 #define SDP_RETRY_COUNT 5
+#define SDP_KEEPALIVE_TIME (120 * 60 * HZ)
 
 #define SDP_TX_SIZE 0x40
 #define SDP_RX_SIZE 0x40
@@ -51,6 +52,7 @@ extern int sdp_data_debug_level;
 #define SDP_NUM_WC 4
 
 #define SDP_OP_RECV 0x800000000LL
+#define SDP_OP_SEND 0x400000000LL
 
 enum sdp_mid {
        SDP_MID_HELLO = 0x0,
@@ -115,6 +117,12 @@ struct sdp_sock {
 
        int time_wait;
 
+       unsigned keepalive_time;
+
+       /* tx_head/rx_head when keepalive timer started */
+       unsigned keepalive_tx_head;
+       unsigned keepalive_rx_head;
+
        /* Data below will be reset on error */
        /* rdma specific */
        struct rdma_cm_id *id;
@@ -219,5 +227,7 @@ void sdp_urg(struct sdp_sock *ssk, struct sk_buff *skb);
 void sdp_add_sock(struct sdp_sock *ssk);
 void sdp_remove_sock(struct sdp_sock *ssk);
 void sdp_remove_large_sock(void);
+void sdp_post_keepalive(struct sdp_sock *ssk);
+void sdp_start_keepalive_timer(struct sock *sk);
 
 #endif
index d33525df746fee67cfa5875db0461e4756c285d2..0ae8ec29cbe9be24303acd38834891347ebc0108 100644 (file)
@@ -60,6 +60,12 @@ static int max_large_sockets = 1000;
 module_param_named(max_large_sockets, max_large_sockets, int, 0644);
 MODULE_PARM_DESC(max_large_sockets, "Max number of large sockets (32k buffers).");
 
+#define sdp_cnt(var) do { (var)++; } while (0)
+static unsigned sdp_keepalive_probes_sent = 0;
+
+module_param_named(sdp_keepalive_probes_sent, sdp_keepalive_probes_sent, uint, 0644);
+MODULE_PARM_DESC(sdp_keepalive_probes_sent, "Total number of keepalive probes sent.");
+
 static int curr_large_sockets = 0;
 atomic_t sdp_current_mem_usage;
 spinlock_t sdp_large_sockets_lock;
@@ -107,6 +113,31 @@ static void sdp_fin(struct sock *sk)
        }
 }
 
+void sdp_post_keepalive(struct sdp_sock *ssk)
+{
+       int rc;
+       struct ib_send_wr wr, *bad_wr;
+
+       sdp_dbg(&ssk->isk.sk, "%s\n", __func__);
+
+       memset(&wr, 0, sizeof(wr));
+
+       wr.next    = NULL;
+       wr.wr_id   = 0;
+       wr.sg_list = NULL;
+       wr.num_sge = 0;
+       wr.opcode  = IB_WR_RDMA_WRITE;
+
+       rc = ib_post_send(ssk->qp, &wr, &bad_wr);
+       if (rc) {
+               sdp_dbg(&ssk->isk.sk, "ib_post_keepalive failed with status %d.\n", rc);
+               sdp_set_error(&ssk->isk.sk, -ECONNRESET);
+               wake_up(&ssk->wq);
+       }
+
+       sdp_cnt(sdp_keepalive_probes_sent);
+}
+
 void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid)
 {
        struct sdp_buf *tx_req;
@@ -158,7 +189,7 @@ void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb, u8 mid)
        }
 
        ssk->tx_wr.next = NULL;
-       ssk->tx_wr.wr_id = ssk->tx_head;
+       ssk->tx_wr.wr_id = ssk->tx_head | SDP_OP_SEND;
        ssk->tx_wr.sg_list = ssk->ibsge;
        ssk->tx_wr.num_sge = frags + 1;
        ssk->tx_wr.opcode = IB_WR_SEND;
@@ -589,7 +620,7 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                                __kfree_skb(skb);
                        }
                }
-       } else {
+       } else if (likely(wc->wr_id & SDP_OP_SEND)) {
                skb = sdp_send_completion(ssk, wc->wr_id);
                if (unlikely(!skb))
                        return;
@@ -605,6 +636,22 @@ static void sdp_handle_wc(struct sdp_sock *ssk, struct ib_wc *wc)
                }
 
                sk_stream_write_space(&ssk->isk.sk);
+       } else {
+               sdp_cnt(sdp_keepalive_probes_sent);
+
+               if (likely(!wc->status))
+                       return;
+
+               sdp_dbg(&ssk->isk.sk, " %s consumes KEEPALIVE status %d\n",
+                       __func__, wc->status);
+
+               if (wc->status == IB_WC_WR_FLUSH_ERR)
+                       return;
+
+               sdp_set_error(&ssk->isk.sk, -ECONNRESET);
+               wake_up(&ssk->wq);
+
+               return;
        }
 
        if (likely(!wc->status)) {
index ffa90e75892290f85e069f9ca48c7a6b8201b3a8..8da10298a5d35739998a9e88a973652b89a8d15a 100644 (file)
@@ -268,8 +268,8 @@ static int sdp_response_handler(struct sock *sk, struct rdma_cm_id *id,
 
        sk->sk_state = TCP_ESTABLISHED;
 
-       /* TODO: If SOCK_KEEPOPEN set, need to reset and start
-          keepalive timer here */
+       if (sock_flag(sk, SOCK_KEEPOPEN))
+               sdp_start_keepalive_timer(sk);
 
        if (sock_flag(sk, SOCK_DEAD))
                return 0;
@@ -307,8 +307,8 @@ int sdp_connected_handler(struct sock *sk, struct rdma_cm_event *event)
 
        sk->sk_state = TCP_ESTABLISHED;
 
-       /* TODO: If SOCK_KEEPOPEN set, need to reset and start
-          keepalive timer here */
+       if (sock_flag(sk, SOCK_KEEPOPEN))
+               sdp_start_keepalive_timer(sk);
 
        if (sock_flag(sk, SOCK_DEAD))
                return 0;
index 1570c8155c59f72bf874a24d58cdb664c735ef52..0cbeccba00c9ef1d229dc1b7d0d43b0bc8d51e36 100644 (file)
@@ -117,6 +117,11 @@ static int send_poll_thresh = 8192;
 module_param_named(send_poll_thresh, send_poll_thresh, int, 0644);
 MODULE_PARM_DESC(send_poll_thresh, "Send message size thresh hold over which to start polling.");
 
+static unsigned int sdp_keepalive_time = SDP_KEEPALIVE_TIME;
+
+module_param_named(sdp_keepalive_time, sdp_keepalive_time, uint, 0644);
+MODULE_PARM_DESC(sdp_keepalive_time, "Default idle time in seconds before keepalive probe sent.");
+
 struct workqueue_struct *sdp_workqueue;
 
 static struct list_head sock_list;
@@ -124,6 +129,11 @@ static spinlock_t sock_list_lock;
 
 DEFINE_RWLOCK(device_removal_lock);
 
+static inline unsigned int sdp_keepalive_time_when(const struct sdp_sock *ssk)
+{
+       return ssk->keepalive_time ? : sdp_keepalive_time;
+}
+
 inline void sdp_add_sock(struct sdp_sock *ssk)
 {
        spin_lock_irq(&sock_list_lock);
@@ -221,6 +231,86 @@ static void sdp_destroy_qp(struct sdp_sock *ssk)
        kfree(ssk->tx_ring);
 }
 
+
+static void sdp_reset_keepalive_timer(struct sock *sk, unsigned long len)
+{
+       struct sdp_sock *ssk = sdp_sk(sk);
+
+       sdp_dbg(sk, "%s\n", __func__);
+
+       ssk->keepalive_tx_head = ssk->tx_head;
+       ssk->keepalive_rx_head = ssk->rx_head;
+
+       sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+}
+
+static void sdp_delete_keepalive_timer(struct sock *sk)
+{
+       struct sdp_sock *ssk = sdp_sk(sk);
+
+       sdp_dbg(sk, "%s\n", __func__);
+
+       ssk->keepalive_tx_head = 0;
+       ssk->keepalive_rx_head = 0;
+
+       sk_stop_timer(sk, &sk->sk_timer);
+}
+
+static void sdp_keepalive_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct sdp_sock *ssk = sdp_sk(sk);
+
+       sdp_dbg(sk, "%s\n", __func__);
+
+       /* Only process if the socket is not in use */
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               sdp_reset_keepalive_timer(sk, HZ / 20);
+               goto out;
+       }
+
+       if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_LISTEN ||
+           sk->sk_state == TCP_CLOSE)
+               goto out;
+
+       if (ssk->keepalive_tx_head == ssk->tx_head &&
+           ssk->keepalive_rx_head == ssk->rx_head)
+               sdp_post_keepalive(ssk);
+
+       sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(ssk));
+
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+static void sdp_init_timer(struct sock *sk)
+{
+       init_timer(&sk->sk_timer);
+
+       sk->sk_timer.function = sdp_keepalive_timer;
+       sk->sk_timer.data = (unsigned long)sk;
+}
+
+static void sdp_set_keepalive(struct sock *sk, int val)
+{
+       sdp_dbg(sk, "%s %d\n", __func__, val);
+
+       if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
+               return;
+
+       if (val && !sock_flag(sk, SOCK_KEEPOPEN))
+               sdp_start_keepalive_timer(sk);
+       else if (!val)
+               sdp_delete_keepalive_timer(sk);
+}
+
+void sdp_start_keepalive_timer(struct sock *sk)
+{
+       sdp_reset_keepalive_timer(sk, sdp_keepalive_time_when(sdp_sk(sk)));
+}
+
 void sdp_reset_sk(struct sock *sk, int rc)
 {
        struct sdp_sock *ssk = sdp_sk(sk);
@@ -365,6 +455,8 @@ static void sdp_close(struct sock *sk, long timeout)
 
        sdp_dbg(sk, "%s\n", __func__);
 
+       sdp_delete_keepalive_timer(sk);
+
        sk->sk_shutdown = SHUTDOWN_MASK;
        if (sk->sk_state == TCP_LISTEN || sk->sk_state == TCP_SYN_SENT) {
                sdp_set_state(sk, TCP_CLOSE);
@@ -820,9 +912,6 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname,
        int err = 0;
 
        sdp_dbg(sk, "%s\n", __func__);
-       if (level != SOL_TCP)
-               return -ENOPROTOOPT;
-
        if (optlen < sizeof(int))
                return -EINVAL;
 
@@ -831,6 +920,28 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname,
 
        lock_sock(sk);
 
+       /* SOCK_KEEPALIVE is really a SOL_SOCKET level option but there
+        * is a problem handling it at that level.  In order to start
+        * the keepalive timer on an SDP socket, we must call an SDP
+        * specific routine.  Since sock_setsockopt() can not be modifed
+        * to understand SDP, the application must pass that option
+        * through to us.  Since SO_KEEPALIVE and TCP_DEFER_ACCEPT both
+        * use the same optname, the level must not be SOL_TCP or SOL_SOCKET
+        */
+       if (level == PF_INET_SDP && optname == SO_KEEPALIVE) {
+               sdp_set_keepalive(sk, val);
+               if (val)
+                       sock_set_flag(sk, SOCK_KEEPOPEN);
+               else
+                       sock_reset_flag(sk, SOCK_KEEPOPEN);
+               goto out;
+       }
+
+       if (level != SOL_TCP) {
+               err = -ENOPROTOOPT;
+               goto out;
+       }
+
        switch (optname) {
        case TCP_NODELAY:
                if (val) {
@@ -869,11 +980,23 @@ static int sdp_setsockopt(struct sock *sk, int level, int optname,
                        sdp_push_pending_frames(sk);
                }
                break;
+       case TCP_KEEPIDLE:
+               if (val < 1 || val > MAX_TCP_KEEPIDLE)
+                       err = -EINVAL;
+               else {
+                       ssk->keepalive_time = val * HZ;
+
+                       if (sock_flag(sk, SOCK_KEEPOPEN) &&
+                           !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+                               sdp_reset_keepalive_timer(sk, ssk->keepalive_time);
+               }
+               break;
        default:
                err = -ENOPROTOOPT;
                break;
        }
 
+out:
        release_sock(sk);
        return err;
 }
@@ -906,6 +1029,9 @@ static int sdp_getsockopt(struct sock *sk, int level, int optname,
        case TCP_CORK:
                val = !!(ssk->nonagle&TCP_NAGLE_CORK);
                break;
+       case TCP_KEEPIDLE:
+               val = (ssk->keepalive_time ? : sdp_keepalive_time) / HZ;
+               break;
        default:
                return -ENOPROTOOPT;
        }
@@ -1692,6 +1818,8 @@ static int sdp_create_socket(struct socket *sock, int protocol)
 
        sk->sk_destruct = sdp_destruct;
 
+       sdp_init_timer(sk);
+
        sock->ops = &sdp_proto_ops;
        sock->state = SS_UNCONNECTED;