From: Amir Vadai Date: Thu, 10 Jun 2010 08:32:50 +0000 (+0300) Subject: sdp: Fix bad handling of small rcvbuf size in zcopy X-Git-Tag: v4.1.12-92~264^2~5^2~171 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=fb8b1936b0e48a27b33706266daa425b0ebd3752;p=users%2Fjedix%2Flinux-maple.git sdp: Fix bad handling of small rcvbuf size in zcopy - Do not send RdmaRdComp when no buffers - Same for SendSm - post at least 3 buffers in RX to have the minimal number of credits - make purge_tx_ring ignore WR used by RDMA - fixed a typo, to reschedule tx_cq_poll timer according to tx queue and not to rx queue (!) - Allow credit updates when less than half RX Q is filled Signed-off-by: Amir Vadai --- diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h index 3bfa90ab69d02..1fe68ce60f239 100644 --- a/drivers/infiniband/ulp/sdp/sdp.h +++ b/drivers/infiniband/ulp/sdp/sdp.h @@ -700,9 +700,6 @@ static inline int credit_update_needed(struct sdp_sock *ssk) { int c; - if (rx_ring_posted(ssk) < (SDP_RX_SIZE >> 1)) - return 0; - c = remote_credits(ssk); if (likely(c > SDP_MIN_TX_CREDITS)) c += c/2; @@ -781,6 +778,7 @@ static inline void sdp_cleanup_sdp_buf(struct sdp_sock *ssk, struct sdp_buf *sbu struct ib_device *dev = ssk->ib_device; skb = sbuf->skb; + sbuf->skb = NULL; ib_dma_unmap_single(dev, sbuf->mapping[0], head_size, dir); @@ -803,7 +801,7 @@ void sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk); void sdp_reset_sk(struct sock *sk, int rc); void sdp_reset(struct sock *sk); int sdp_tx_wait_memory(struct sdp_sock *ssk, long *timeo_p, int *credits_needed); -void skb_entail(struct sock *sk, struct sdp_sock *ssk, struct sk_buff *skb); +void sdp_skb_entail(struct sock *sk, struct sk_buff *skb); extern rwlock_t device_removal_lock; /* sdp_proc.c */ @@ -843,7 +841,7 @@ void sdp_handle_rdma_read_compl(struct sdp_sock *ssk, u32 mseq_ack, int sdp_handle_rdma_read_cqe(struct sdp_sock *ssk); int sdp_rdma_to_iovec(struct sock *sk, struct iovec *iov, struct sk_buff *skb, unsigned long *used); -int sdp_post_rdma_rd_compl(struct sdp_sock *ssk, +int sdp_post_rdma_rd_compl(struct sock *sk, struct rx_srcavail_state *rx_sa); int sdp_post_sendsm(struct sock *sk); void srcavail_cancel_timeout(struct work_struct *work); diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index fe8da8aa5fa7e..4ce51568627c0 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -1436,15 +1436,15 @@ static inline void sdp_push(struct sock *sk, struct sdp_sock *ssk, int flags) sdp_do_posts(sdp_sk(sk)); } -void skb_entail(struct sock *sk, struct sdp_sock *ssk, struct sk_buff *skb) +void sdp_skb_entail(struct sock *sk, struct sk_buff *skb) { __skb_queue_tail(&sk->sk_write_queue, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); if (!sk->sk_send_head) sk->sk_send_head = skb; - if (ssk->nonagle & TCP_NAGLE_PUSH) - ssk->nonagle &= ~TCP_NAGLE_PUSH; + if (sdp_sk(sk)->nonagle & TCP_NAGLE_PUSH) + sdp_sk(sk)->nonagle &= ~TCP_NAGLE_PUSH; } static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state *bz) @@ -1985,7 +1985,7 @@ new_segment: NETIF_F_HW_CSUM)) skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, ssk, skb); + sdp_skb_entail(sk, skb); copy = size_goal; sdp_dbg_data(sk, "created new skb: %p" @@ -2408,7 +2408,7 @@ skip_copy: if (rx_sa) { - rc = sdp_post_rdma_rd_compl(ssk, rx_sa); + rc = sdp_post_rdma_rd_compl(sk, rx_sa); BUG_ON(rc); } diff --git a/drivers/infiniband/ulp/sdp/sdp_rx.c b/drivers/infiniband/ulp/sdp/sdp_rx.c index 60ba587344721..2c6f80de8eab8 100644 --- a/drivers/infiniband/ulp/sdp/sdp_rx.c +++ b/drivers/infiniband/ulp/sdp/sdp_rx.c @@ -251,35 +251,39 @@ static int sdp_post_recv(struct sdp_sock *ssk) static inline int sdp_post_recvs_needed(struct sdp_sock *ssk) { struct sock *sk = &ssk->isk.sk; - int scale = ssk->rcvbuf_scale; int buffer_size = SDP_SKB_HEAD_SIZE + ssk->recv_frags * PAGE_SIZE; unsigned long max_bytes; + unsigned long bytes_in_process; + int posted = rx_ring_posted(ssk); - if (!ssk->qp_active) + if (unlikely(!ssk->qp_active)) return 0; - if (top_mem_usage && (top_mem_usage * 0x100000) < + if (likely(posted >= SDP_RX_SIZE)) + return 0; + + if (unlikely(posted < SDP_MIN_TX_CREDITS)) + return 1; + + /* If rcvbuf is very small, must leave at least 1 skb for data, + * in addition to SDP_MIN_TX_CREDITS */ + max_bytes = max(sk->sk_rcvbuf, (1 + SDP_MIN_TX_CREDITS) * buffer_size); + + if (!top_mem_usage || (top_mem_usage * 0x100000) >= atomic_read(&sdp_current_mem_usage) * PAGE_SIZE) { - scale = 1; + max_bytes *= ssk->rcvbuf_scale; } - max_bytes = sk->sk_rcvbuf * scale; + /* Bytes posted to HW */ + bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size; - if (unlikely(rx_ring_posted(ssk) >= SDP_RX_SIZE)) - return 0; - - if (likely(rx_ring_posted(ssk) >= SDP_MIN_TX_CREDITS)) { - unsigned long bytes_in_process = - (rx_ring_posted(ssk) - SDP_MIN_TX_CREDITS) * - buffer_size; - bytes_in_process += rcv_nxt(ssk) - ssk->copied_seq; + /* Bytes waiting in socket RX queue */ + bytes_in_process += rcv_nxt(ssk) - ssk->copied_seq; - if (bytes_in_process >= max_bytes) { - sdp_prf(sk, NULL, - "bytes_in_process:%ld > max_bytes:%ld", + if (bytes_in_process >= max_bytes) { + sdp_prf(sk, NULL, "bytes_in_process: 0x%lx > max_bytes: 0x%lx", bytes_in_process, max_bytes); - return 0; - } + return 0; } return 1; diff --git a/drivers/infiniband/ulp/sdp/sdp_tx.c b/drivers/infiniband/ulp/sdp/sdp_tx.c index 13c77a2dda5f5..7dc471dd0b304 100644 --- a/drivers/infiniband/ulp/sdp/sdp_tx.c +++ b/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -186,6 +186,8 @@ static struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) dev = ssk->ib_device; tx_req = &tx_ring->buffer[mseq & (SDP_TX_SIZE - 1)]; skb = tx_req->skb; + if (!skb) + goto skip; /* This slot was used by RDMA WR */ sdp_cleanup_sdp_buf(ssk, tx_req, skb->len - skb->data_len, DMA_TO_DEVICE); @@ -195,6 +197,7 @@ static struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) if (BZCOPY_STATE(skb)) BZCOPY_STATE(skb)->busy--; +skip: atomic_inc(&tx_ring->tail); out: @@ -384,7 +387,7 @@ static void sdp_poll_tx_timeout(unsigned long data) else SDPSTATS_COUNTER_INC(tx_poll_hit); - inflight = (u32) rx_ring_posted(ssk); + inflight = (u32) tx_ring_posted(ssk); sdp_prf1(&ssk->isk.sk, NULL, "finished tx proccessing. inflight = %d", tx_ring_posted(ssk)); diff --git a/drivers/infiniband/ulp/sdp/sdp_zcopy.c b/drivers/infiniband/ulp/sdp/sdp_zcopy.c index 6cad48a63b631..6de390bcab609 100644 --- a/drivers/infiniband/ulp/sdp/sdp_zcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_zcopy.c @@ -98,7 +98,7 @@ static int sdp_post_srcavail(struct sock *sk, struct tx_srcavail_state *tx_sa) // sk->sk_wmem_queued += payload_len; // sk->sk_forward_alloc -= payload_len; - skb_entail(sk, ssk, skb); + sdp_skb_entail(sk, skb); ssk->write_seq += payload_len; SDP_SKB_CB(skb)->end_seq += payload_len; @@ -119,7 +119,7 @@ static int sdp_post_srcavail_cancel(struct sock *sk) sdp_dbg_data(&ssk->isk.sk, "Posting srcavail cancel\n"); skb = sdp_alloc_skb_srcavail_cancel(sk, 0); - skb_entail(sk, ssk, skb); + sdp_skb_entail(sk, skb); sdp_post_sends(ssk, 0); @@ -277,7 +277,7 @@ static void sdp_wait_rdma_wr_finished(struct sdp_sock *ssk) sdp_dbg_data(sk, "Finished waiting\n"); } -int sdp_post_rdma_rd_compl(struct sdp_sock *ssk, +int sdp_post_rdma_rd_compl(struct sock *sk, struct rx_srcavail_state *rx_sa) { struct sk_buff *skb; @@ -286,12 +286,13 @@ int sdp_post_rdma_rd_compl(struct sdp_sock *ssk, if (rx_sa->used <= rx_sa->reported) return 0; - skb = sdp_alloc_skb_rdmardcompl(&ssk->isk.sk, copied, 0); + skb = sdp_alloc_skb_rdmardcompl(sk, copied, 0); + + sdp_skb_entail(sk, skb); rx_sa->reported += copied; - /* TODO: What if no tx_credits available? */ - sdp_post_send(ssk, skb); + sdp_post_sends(sdp_sk(sk), 0); return 0; } @@ -300,7 +301,9 @@ int sdp_post_sendsm(struct sock *sk) { struct sk_buff *skb = sdp_alloc_skb_sendsm(sk, 0); - sdp_post_send(sdp_sk(sk), skb); + sdp_skb_entail(sk, skb); + + sdp_post_sends(sdp_sk(sk), 0); return 0; }