From: Amir Vadai Date: Tue, 1 Jun 2010 14:04:59 +0000 (+0300) Subject: sdp: use polling in rx X-Git-Tag: v4.1.12-92~264^2~5^2~177 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=bb22f9995ad3ceec9c89f92c6d2349919b56ed25;p=users%2Fjedix%2Flinux-maple.git sdp: use polling in rx poll on RX whenever possible, use interrupts only before sleeping, during graceful close or in zcopy. Signed-off-by: Amir Vadai --- diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h index 99e14546e38ae..0052be0606ead 100644 --- a/drivers/infiniband/ulp/sdp/sdp.h +++ b/drivers/infiniband/ulp/sdp/sdp.h @@ -685,10 +685,22 @@ static inline int sdp_tx_ring_slots_left(struct sdp_sock *ssk) return SDP_TX_SIZE - tx_ring_posted(ssk); } +/* Return true if need to send credit update. Rules are: + * - at least half of the RX buffer is available + * - 1.5 * c < p + * - has TX credits + * - has room in tx Q + * + * p = number of posted buffers + * c = current credits count at the peer + */ static inline int credit_update_needed(struct sdp_sock *ssk) { int c; + if (rx_ring_posted(ssk) < (SDP_RX_SIZE >> 1)) + return 0; + c = remote_credits(ssk); if (likely(c > SDP_MIN_TX_CREDITS)) c += c/2; @@ -718,6 +730,7 @@ struct sdpstats { u32 send_wait_for_mem; u32 send_miss_no_credits; u32 rx_poll_miss; + u32 rx_poll_hit; u32 tx_poll_miss; u32 tx_poll_hit; u32 tx_poll_busy; @@ -814,6 +827,7 @@ void sdp_do_posts(struct sdp_sock *ssk); void sdp_rx_comp_full(struct sdp_sock *ssk); void sdp_remove_large_sock(const struct sdp_sock *ssk); void sdp_handle_disconn(struct sock *sk); +int sdp_process_rx(struct sdp_sock *ssk); /* sdp_zcopy.c */ int sdp_sendmsg_zcopy(struct kiocb *iocb, struct sock *sk, struct iovec *iov); diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index f8a154b825294..70679d6276933 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -87,9 +87,7 @@ SDP_MODPARAM_INT(sdp_data_debug_level, 0, "Enable data path debug tracing if > 0."); #endif -SDP_MODPARAM_SINT(recv_poll_hit, -1, "How many times recv poll helped."); -SDP_MODPARAM_SINT(recv_poll_miss, -1, "How many times recv poll missed."); -SDP_MODPARAM_SINT(recv_poll, 1000, "How many times to poll recv."); +SDP_MODPARAM_SINT(recv_poll, 10, "How many msec to poll recv."); SDP_MODPARAM_SINT(sdp_keepalive_time, SDP_KEEPALIVE_TIME, "Default idle time in seconds before keepalive probe sent."); static int sdp_bzcopy_thresh = 0; @@ -553,6 +551,8 @@ static void sdp_send_disconnect(struct sock *sk) sdp_sk(sk)->sdp_disconnect = 1; sdp_post_sends(sdp_sk(sk), 0); + + sdp_arm_rx_cq(sk); } /* @@ -1328,6 +1328,9 @@ static int sdp_getsockopt(struct sock *sk, int level, int optname, case TCP_KEEPIDLE: val = (ssk->keepalive_time ? : sdp_keepalive_time) / HZ; break; + case TCP_MAXSEG: + val = ssk->xmit_size_goal; + break; case SDP_ZCOPY_THRESH: val = ssk->zcopy_thresh; break; @@ -1347,14 +1350,17 @@ static int sdp_getsockopt(struct sock *sk, int level, int optname, static inline int poll_recv_cq(struct sock *sk) { - int i; - for (i = 0; i < recv_poll; ++i) { - if (!skb_queue_empty(&sk->sk_receive_queue)) { - ++recv_poll_hit; + unsigned long jiffies_end = jiffies + recv_poll * HZ / 1000; + + sdp_prf(sk, NULL, "polling recv"); + + while (jiffies <= jiffies_end) { + if (sdp_process_rx(sdp_sk(sk))) { + SDPSTATS_COUNTER_INC(rx_poll_hit); return 0; } } - ++recv_poll_miss; + SDPSTATS_COUNTER_INC(rx_poll_miss); return 1; } @@ -1796,6 +1802,9 @@ int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed) posts_handler_put(ssk); + /* Before going to sleep, make sure no credit update is missed */ + sdp_arm_rx_cq(sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; @@ -1817,10 +1826,11 @@ int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed) !(sk->sk_shutdown & SEND_SHUTDOWN) && !ssk->tx_compl_pending && sk_stream_memory_free(sk) && + tx_credits(ssk) > SDP_MIN_TX_CREDITS && vm_wait); } - sdp_prf1(sk, NULL, "Woke up"); + sdp_prf(sk, NULL, "Woke up. memfree: %d", sk_stream_memory_free(sk)); sk->sk_write_pending--; posts_handler_get(ssk); @@ -1946,12 +1956,17 @@ new_segment: * we stop sending once we run out of remote * receive credits. */ - if (bz) { - if (tx_slots_free(ssk) < bz->busy) - goto wait_for_sndbuf; - } else { - if (!sk_stream_memory_free(sk)) +#define can_not_tx(__bz) (\ + ( __bz && tx_slots_free(ssk) < __bz->busy) || \ + (!__bz && !sk_stream_memory_free(sk))) + if (unlikely(can_not_tx(bz))) { + if (!poll_recv_cq(sk)) { + sdp_do_posts(ssk); + } + if ((can_not_tx(bz))) { + sdp_arm_rx_cq(sk); goto wait_for_sndbuf; + } } skb = sdp_alloc_skb_data(sk, 0); @@ -2028,7 +2043,7 @@ new_segment: wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: - sdp_prf(sk, skb, "wait for mem"); + sdp_prf(sk, skb, "wait for mem. credits: %d", tx_credits(ssk)); SDPSTATS_COUNTER_INC(send_wait_for_mem); if (copied) sdp_push(sk, ssk, flags & ~MSG_MORE); @@ -2050,12 +2065,11 @@ out: sdp_auto_moderation(ssk); - posts_handler_put(ssk); + err = copied; - release_sock(sk); + sdp_dbg_data(sk, "copied: 0x%x\n", copied); - sdp_dbg_data(sk, "copied: 0x%x\n", copied); - return copied; + goto fin; do_fault: sdp_prf(sk, skb, "prepare fault"); @@ -2074,7 +2088,9 @@ out_err: if (bz) bz = sdp_bz_cleanup(bz); err = sk_stream_error(sk, flags, err); + sdp_dbg_data(sk, "err: %d\n", err); +fin: posts_handler_put(ssk); release_sock(sk); @@ -2293,6 +2309,9 @@ sdp_mid_data: } rc = poll_recv_cq(sk); + if (!rc) { + sdp_do_posts(ssk); + } if (copied >= target && !recv_poll) { /* Do not sleep, just process backlog. */ @@ -2300,6 +2319,8 @@ sdp_mid_data: lock_sock(sk); } else if (rc) { sdp_dbg_data(sk, "sk_wait_data %ld\n", timeo); + sdp_prf(sk, NULL, "giving up polling"); + sdp_arm_rx_cq(sk); posts_handler_put(ssk); @@ -2521,7 +2542,14 @@ static unsigned int sdp_poll(struct file *file, struct socket *socket, struct sock *sk = socket->sk; struct sdp_sock *ssk = sdp_sk(sk); - sdp_dbg_data(socket->sk, "%s\n", __func__); + sdp_dbg_data(sk, "%s\n", __func__); + + if (sk->sk_state == TCP_ESTABLISHED) { + sdp_prf(sk, NULL, "polling"); + if (poll_recv_cq(sk)) { + sdp_arm_rx_cq(sk); + } + } mask = datagram_poll(file, socket, wait); diff --git a/drivers/infiniband/ulp/sdp/sdp_proc.c b/drivers/infiniband/ulp/sdp/sdp_proc.c index 965104cb27795..d10ed6e9bf8f1 100644 --- a/drivers/infiniband/ulp/sdp/sdp_proc.c +++ b/drivers/infiniband/ulp/sdp/sdp_proc.c @@ -303,6 +303,7 @@ static int sdpstats_seq_show(struct seq_file *seq, void *v) SDPSTATS_COUNTER_GET(send_miss_no_credits)); seq_printf(seq, "rx_poll_miss \t\t: %d\n", SDPSTATS_COUNTER_GET(rx_poll_miss)); + seq_printf(seq, "rx_poll_hit \t\t: %d\n", SDPSTATS_COUNTER_GET(rx_poll_hit)); seq_printf(seq, "tx_poll_miss \t\t: %d\n", SDPSTATS_COUNTER_GET(tx_poll_miss)); seq_printf(seq, "tx_poll_busy \t\t: %d\n", SDPSTATS_COUNTER_GET(tx_poll_busy)); seq_printf(seq, "tx_poll_hit \t\t: %d\n", SDPSTATS_COUNTER_GET(tx_poll_hit)); diff --git a/drivers/infiniband/ulp/sdp/sdp_rx.c b/drivers/infiniband/ulp/sdp/sdp_rx.c index d85210532d8a4..617ec08617c64 100644 --- a/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -406,7 +406,7 @@ static struct sk_buff *sdp_recv_completion(struct sdp_sock *ssk, int id) struct sk_buff *skb; if (unlikely(id != ring_tail(ssk->rx_ring))) { - printk(KERN_WARNING "Bogus recv completion id %d tail %d\n", + sdp_warn(&ssk->isk.sk, "Bogus recv completion id %d tail %d\n", id, ring_tail(ssk->rx_ring)); return NULL; } @@ -521,7 +521,7 @@ static int sdp_process_rx_skb(struct sdp_sock *ssk, struct sk_buff *skb) if (mseq_ack >= ssk->nagle_last_unacked) ssk->nagle_last_unacked = 0; - sdp_prf1(&ssk->isk.sk, skb, "RX %s +%d c:%d->%d mseq:%d ack:%d", + sdp_prf1(&ssk->isk.sk, skb, "RX: %s +%d c:%d->%d mseq:%d ack:%d", mid2str(h->mid), ntohs(h->bufs), credits_before, tx_credits(ssk), ntohl(h->mseq), ntohl(h->mseq_ack)); @@ -581,7 +581,6 @@ static int sdp_process_rx_skb(struct sdp_sock *ssk, struct sk_buff *skb) return 0; } -/* called only from irq */ static struct sk_buff *sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc) { @@ -670,7 +669,6 @@ static void sdp_bzcopy_write_space(struct sdp_sock *ssk) } } -/* only from interrupt. */ static int sdp_poll_rx_cq(struct sdp_sock *ssk) { struct ib_cq *cq = ssk->rx_ring.cq; @@ -742,6 +740,8 @@ void sdp_do_posts(struct sdp_sock *ssk) return; } + sdp_process_rx(sdp_sk(sk)); + while ((skb = skb_dequeue(&ssk->rx_ctl_q))) sdp_process_rx_ctl_skb(ssk, skb); @@ -790,22 +790,23 @@ static void sdp_rx_irq(struct ib_cq *cq, void *cq_context) tasklet_hi_schedule(&ssk->rx_ring.tasklet); } -static void sdp_process_rx(unsigned long data) +int sdp_process_rx(struct sdp_sock *ssk) { - struct sdp_sock *ssk = (struct sdp_sock *)data; struct sock *sk = &ssk->isk.sk; int wc_processed = 0; int credits_before; if (!rx_ring_trylock(&ssk->rx_ring)) { sdp_dbg(&ssk->isk.sk, "ring destroyed. not polling it\n"); - return; + return 0; } credits_before = tx_credits(ssk); wc_processed = sdp_poll_rx_cq(ssk); - sdp_prf(&ssk->isk.sk, NULL, "processed %d", wc_processed); + + if (wc_processed) + sdp_prf(&ssk->isk.sk, NULL, "processed %d", wc_processed); if (wc_processed) { sdp_prf(&ssk->isk.sk, NULL, "credits: %d -> %d", @@ -824,9 +825,19 @@ static void sdp_process_rx(unsigned long data) queue_work(rx_comp_wq, &ssk->rx_comp_work); } } - sdp_arm_rx_cq(sk); + + if (ssk->sdp_disconnect || ssk->tx_sa) + sdp_arm_rx_cq(sk); rx_ring_unlock(&ssk->rx_ring); + + return wc_processed; +} + +static void sdp_process_rx_tasklet(unsigned long data) +{ + struct sdp_sock *ssk = (struct sdp_sock *)data; + sdp_process_rx(ssk); } static void sdp_rx_ring_purge(struct sdp_sock *ssk) @@ -882,7 +893,7 @@ int sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device) sdp_sk(&ssk->isk.sk)->rx_ring.cq = rx_cq; INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work); - tasklet_init(&ssk->rx_ring.tasklet, sdp_process_rx, + tasklet_init(&ssk->rx_ring.tasklet, sdp_process_rx_tasklet, (unsigned long) ssk); sdp_arm_rx_cq(&ssk->isk.sk); diff --git a/drivers/infiniband/ulp/sdp/sdp_tx.c b/drivers/infiniband/ulp/sdp/sdp_tx.c index fbbc2d4284641..13c77a2dda5f5 100644 --- a/drivers/infiniband/ulp/sdp/sdp_tx.c +++ b/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -107,9 +107,9 @@ void sdp_post_send(struct sdp_sock *ssk, struct sk_buff *skb) h->mseq = htonl(mseq); h->mseq_ack = htonl(mseq_ack(ssk)); - sdp_prf1(&ssk->isk.sk, skb, "TX: %s bufs: %d mseq:%ld ack:%d", + sdp_prf(&ssk->isk.sk, skb, "TX: %s bufs: %d mseq:%ld ack:%d c: %d", mid2str(h->mid), rx_ring_posted(ssk), mseq, - ntohl(h->mseq_ack)); + ntohl(h->mseq_ack), tx_credits(ssk)); SDP_DUMP_PACKET(&ssk->isk.sk, "TX", skb, h); diff --git a/drivers/infiniband/ulp/sdp/sdp_zcopy.c b/drivers/infiniband/ulp/sdp/sdp_zcopy.c index 36660ed03439a..6cad48a63b631 100644 --- a/drivers/infiniband/ulp/sdp/sdp_zcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_zcopy.c @@ -205,7 +205,7 @@ static int sdp_wait_rdmardcompl(struct sdp_sock *ssk, long *timeo_p, ssk->rx_sa && (tx_sa->bytes_acked < tx_sa->bytes_sent) && vm_wait); - sdp_dbg_data(&ssk->isk.sk, "woke up sleepers\n"); + sdp_prf(&ssk->isk.sk, NULL, "woke up sleepers"); posts_handler_get(ssk); @@ -664,6 +664,8 @@ static int do_sdp_sendmsg_zcopy(struct sock *sk, struct tx_srcavail_state *tx_sa goto err_abort_send; } + sdp_arm_rx_cq(sk); + rc = sdp_wait_rdmardcompl(ssk, timeo, 0); if (unlikely(rc)) { enum tx_sa_flag f = tx_sa->abort_flags;