From bd0c1daca868545cbe93c97f8f39776044517080 Mon Sep 17 00:00:00 2001 From: Jack Vogel Date: Wed, 25 Jan 2017 13:51:40 -0800 Subject: [PATCH] drivers/net/intel: use napi_complete_done() As per Eric Dumazet's previous patches: (see commit (24d2e4a50737) - tg3: use napi_complete_done()) Quoting verbatim: Using napi_complete_done() instead of napi_complete() allows us to use /sys/class/net/ethX/gro_flush_timeout GRO layer can aggregate more packets if the flush is delayed a bit, without having to set too big coalescing parameters that impact latencies. Tested configuration: low latency via ethtool -C ethx adaptive-rx off rx-usecs 10 adaptive-tx off tx-usecs 15 workload: streaming rx using netperf TCP_MAERTS igb: MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET : demo ... Interim result: 941.48 10^6bits/s over 1.000 seconds ending at 1440193171.589 Alignment Offset Bytes Bytes Recvs Bytes Sends Local Remote Local Remote Xfered Per Per Recv Send Recv Send Recv (avg) Send (avg) 8 8 0 0 1176930056 1475.36 797726 16384.00 71905 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET : demo ... Interim result: 941.49 10^6bits/s over 0.997 seconds ending at 1440193142.763 Alignment Offset Bytes Bytes Recvs Bytes Sends Local Remote Local Remote Xfered Per Per Recv Send Recv Send Recv (avg) Send (avg) 8 8 0 0 1175182320 50476.00 23282 16384.00 71816 i40e: Hard to test because the traffic is incoming so fast (24Gb/s) that GRO always receives 87kB, even at the highest interrupt rate. Other drivers were only compile tested. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Orabug: 25394529 (cherry picked from commit 32b3e08fff60494cd1d281a39b51583edfd2b18f) Signed-off-by: Jack Vogel --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index cea888649c43..a1229ee3db83 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -619,9 +619,9 @@ static void fm10k_receive_skb(struct fm10k_q_vector *q_vector, napi_gro_receive(&q_vector->napi, skb); } -static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, - struct fm10k_ring *rx_ring, - int budget) +static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, + struct fm10k_ring *rx_ring, + int budget) { struct sk_buff *skb = rx_ring->skb; unsigned int total_bytes = 0, total_packets = 0; @@ -688,7 +688,7 @@ static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, q_vector->rx.total_packets += total_packets; q_vector->rx.total_bytes += total_bytes; - return total_packets < budget; + return total_packets; } #define VXLAN_HLEN (sizeof(struct udphdr) + 8) @@ -1448,7 +1448,7 @@ static int fm10k_poll(struct napi_struct *napi, int budget) struct fm10k_q_vector *q_vector = container_of(napi, struct fm10k_q_vector, napi); struct fm10k_ring *ring; - int per_ring_budget; + int per_ring_budget, work_done = 0; bool clean_complete = true; fm10k_for_each_ring(ring, q_vector->tx) @@ -1462,16 +1462,19 @@ static int fm10k_poll(struct napi_struct *napi, int budget) else per_ring_budget = budget; - fm10k_for_each_ring(ring, q_vector->rx) - clean_complete &= fm10k_clean_rx_irq(q_vector, ring, - per_ring_budget); + fm10k_for_each_ring(ring, q_vector->rx) { + int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget); + + work_done += work; + clean_complete &= !!(work < per_ring_budget); + } /* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget; /* all work done, exit the polling mode */ - napi_complete(napi); + napi_complete_done(napi, work_done); /* re-enable the q_vector */ fm10k_qv_enable(q_vector); -- 2.50.1