From: Jim Mott Date: Wed, 5 Dec 2007 09:02:11 +0000 (+0200) Subject: SDP: various bzcopy fixes V2 X-Git-Tag: v4.1.12-92~264^2~5^2~328 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=ef7e1b9dee420405cd31c846cfbbe9b7d27497fc;p=users%2Fjedix%2Flinux-maple.git SDP: various bzcopy fixes V2 The Mellanox regression tests posted a number of failures when multiple threads were accessing the same sockets concurrently. In addition to test failures, there were log messages of the form: sdp_sock(54386:19002): Could not reap -5 in-flight sends This fix handles all these failures and errors. The V2 is a fix to handle 2.6.22+ kernels where sk_buffs have changed. Signed-off-by: Jim Mott --- diff --git a/drivers/infiniband/ulp/sdp/sdp.h b/drivers/infiniband/ulp/sdp/sdp.h index 31966eb505551..e4d3effb53a54 100644 --- a/drivers/infiniband/ulp/sdp/sdp.h +++ b/drivers/infiniband/ulp/sdp/sdp.h @@ -172,7 +172,6 @@ struct sdp_sock { /* BZCOPY data */ int zcopy_thresh; - void *zcopy_context; struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1]; struct ib_wc ibwc[SDP_NUM_WC]; diff --git a/drivers/infiniband/ulp/sdp/sdp_bcopy.c b/drivers/infiniband/ulp/sdp/sdp_bcopy.c index 95836110b1180..ad788f7efe0a5 100644 --- a/drivers/infiniband/ulp/sdp/sdp_bcopy.c +++ b/drivers/infiniband/ulp/sdp/sdp_bcopy.c @@ -218,6 +218,7 @@ struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) struct ib_device *dev; struct sdp_buf *tx_req; struct sk_buff *skb; + struct bzcopy_state *bz; int i, frags; if (unlikely(mseq != ssk->tx_tail)) { @@ -242,16 +243,9 @@ struct sk_buff *sdp_send_completion(struct sdp_sock *ssk, int mseq) ++ssk->tx_tail; /* TODO: AIO and real zcopy cdoe; add their context support here */ - if (ssk->zcopy_context && skb->data_len) { - struct bzcopy_state *bz; - struct sdp_bsdh *h; - - h = (struct sdp_bsdh *)skb->data; - if (h->mid == SDP_MID_DATA) { - bz = (struct bzcopy_state *)ssk->zcopy_context; - bz->busy--; - } - } + bz = *(struct bzcopy_state **)skb->cb; + if (bz) + bz->busy--; return skb; } @@ -751,12 +745,8 @@ int sdp_poll_cq(struct sdp_sock *ssk, struct ib_cq *cq) sdp_post_recvs(ssk); sdp_post_sends(ssk, 0); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { - if (ssk->zcopy_context) - sdp_bzcopy_write_space(ssk); - else - sk_stream_write_space(&ssk->isk.sk); - } + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + sk_stream_write_space(&ssk->isk.sk); } return ret; diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c b/drivers/infiniband/ulp/sdp/sdp_main.c index cb1bdcaaf6cd8..03af9602b6388 100644 --- a/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/drivers/infiniband/ulp/sdp/sdp_main.c @@ -1205,10 +1205,24 @@ void sdp_push_one(struct sock *sk, unsigned int mss_now) static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state *bz) { - int i; + int i, max_retry; struct sdp_sock *ssk = (struct sdp_sock *)bz->ssk; - ssk->zcopy_context = NULL; + /* Wait for in-flight sends; should be quick */ + if (bz->busy) { + struct sock *sk = &ssk->isk.sk; + + for (max_retry = 0; max_retry < 10000; max_retry++) { + poll_send_cq(sk); + + if (!bz->busy) + break; + } + + if (bz->busy) + sdp_warn(sk, "Could not reap %d in-flight sends\n", + bz->busy); + } if (bz->pages) { for (i = bz->cur_page; i < bz->page_cnt; i++) @@ -1282,14 +1296,14 @@ static struct bzcopy_state *sdp_bz_setup(struct sdp_sock *ssk, } up_write(¤t->mm->mmap_sem); - ssk->zcopy_context = bz; return bz; out_2: up_write(¤t->mm->mmap_sem); + kfree(bz->pages); out_1: - sdp_bz_cleanup(bz); + kfree(bz); return NULL; } @@ -1463,19 +1477,17 @@ static inline int slots_free(struct sdp_sock *ssk) }; /* like sk_stream_memory_free - except measures remote credits */ -static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk) +static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk, + struct bzcopy_state *bz) { - struct bzcopy_state *bz = (struct bzcopy_state *)ssk->zcopy_context; - - BUG_ON(!bz); return slots_free(ssk) > bz->busy; } /* like sk_stream_wait_memory - except waits on remote credits */ -static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p) +static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p, + struct bzcopy_state *bz) { struct sock *sk = &ssk->isk.sk; - struct bzcopy_state *bz = (struct bzcopy_state *)ssk->zcopy_context; int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; @@ -1483,7 +1495,7 @@ static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p) BUG_ON(!bz); - if (sdp_bzcopy_slots_avail(ssk)) + if (sdp_bzcopy_slots_avail(ssk, bz)) current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2; while (1) { @@ -1508,13 +1520,13 @@ static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p) clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - if (sdp_bzcopy_slots_avail(ssk)) + if (sdp_bzcopy_slots_avail(ssk, bz)) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; sk_wait_event(sk, ¤t_timeo, - sdp_bzcopy_slots_avail(ssk) && vm_wait); + sdp_bzcopy_slots_avail(ssk, bz) && vm_wait); sk->sk_write_pending--; if (vm_wait) { @@ -1605,7 +1617,8 @@ int sdp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, skb = sk->sk_write_queue.prev; if (!sk->sk_send_head || - (copy = size_goal - skb->len) <= 0) { + (copy = size_goal - skb->len) <= 0 || + bz != *(struct bzcopy_state **)skb->cb) { new_segment: /* @@ -1616,7 +1629,7 @@ new_segment: * receive credits. */ if (bz) { - if (!sdp_bzcopy_slots_avail(ssk)) + if (!sdp_bzcopy_slots_avail(ssk, bz)) goto wait_for_sndbuf; } else { if (!sk_stream_memory_free(sk)) @@ -1628,6 +1641,8 @@ new_segment: if (!skb) goto wait_for_memory; + *((struct bzcopy_state **)skb->cb) = bz; + /* * Check whether we can use HW checksum. */ @@ -1693,7 +1708,7 @@ wait_for_memory: if (copied) sdp_push(sk, ssk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); - err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo) : + err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo, bz) : sk_stream_wait_memory(sk, &timeo); if (err) goto do_error; @@ -1706,24 +1721,10 @@ wait_for_memory: out: if (copied) { sdp_push(sk, ssk, flags, mss_now, ssk->nonagle); - if (bz) { - int max_retry; - - /* Wait for in-flight sends; should be quick */ - for (max_retry = 0; max_retry < 10000; max_retry++) { - if (!bz->busy) - break; - - poll_send_cq(sk); - } - - if (bz->busy) - sdp_warn(sk, - "Could not reap %d in-flight sends\n", - bz->busy); + if (bz) bz = sdp_bz_cleanup(bz); - } else + else if (size > send_poll_thresh) poll_send_cq(sk); }