tcp: TCP Small Queues and strange attractors

author Eric Dumazet <edumazet@google.com>

Mon, 13 Oct 2014 13:27:47 +0000 (06:27 -0700)

committer David S. Miller <davem@davemloft.net>

Tue, 14 Oct 2014 21:16:26 +0000 (17:16 -0400)
author Eric Dumazet <edumazet@google.com>
Mon, 13 Oct 2014 13:27:47 +0000 (06:27 -0700)
committer David S. Miller <davem@davemloft.net>
Tue, 14 Oct 2014 21:16:26 +0000 (17:16 -0400)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index 0a5d97c20aa9fefa61bec7297abfffd5c65d2a65..e13d7785722576b215c2e10149bf85fefe333897 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb)
  {
         struct sock *sk = skb->sk;
         struct tcp_sock *tp = tcp_sk(sk);
+       int wmem;
+
+       /* Keep one reference on sk_wmem_alloc.
+        * Will be released by sk_free() from here or tcp_tasklet_func()
+        */
+       wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
+
+       /* If this softirq is serviced by ksoftirqd, we are likely under stress.
+        * Wait until our queues (qdisc + devices) are drained.
+        * This gives :
+        * - less callbacks to tcp_write_xmit(), reducing stress (batches)
+        * - chance for incoming ACK (processed by another cpu maybe)
+        *   to migrate this flow (skb->ooo_okay will be eventually set)
+        */
+       if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+               goto out;
  
         if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
             !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
                 unsigned long flags;
                 struct tsq_tasklet *tsq;
  
-               /* Keep a ref on socket.
-                * This last ref will be released in tcp_tasklet_func()
-                */
-               atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
-
                 /* queue this socket to tasklet queue */
                 local_irq_save(flags);
                 tsq = &__get_cpu_var(tsq_tasklet);
                 list_add(&tp->tsq_node, &tsq->head);
                 tasklet_schedule(&tsq->tasklet);
                 local_irq_restore(flags);
-       } else {
-               sock_wfree(skb);
+               return;
         }
+out:
+       sk_free(sk);
  }
  
  /* This routine actually transmits TCP packets queued in by
author	Eric Dumazet <edumazet@google.com>
	Mon, 13 Oct 2014 13:27:47 +0000 (06:27 -0700)
committer	David S. Miller <davem@davemloft.net>
	Tue, 14 Oct 2014 21:16:26 +0000 (17:16 -0400)