]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net: more accurate skb truesize
authorEric Dumazet <eric.dumazet@gmail.com>
Thu, 13 Oct 2011 07:28:54 +0000 (07:28 +0000)
committerJoe Jin <joe.jin@oracle.com>
Wed, 16 May 2012 14:40:05 +0000 (22:40 +0800)
skb truesize currently accounts for sk_buff struct and part of skb head.
kmalloc() roundings are also ignored.

Considering that skb_shared_info is larger than sk_buff, its time to
take it into account for better memory accounting.

This patch introduces SKB_TRUESIZE(X) macro to centralize various
assumptions into a single place.

At skb alloc phase, we put skb_shared_info struct at the exact end of
skb head, to allow a better use of memory (lowering number of
reallocations), since kmalloc() gives us power-of-two memory blocks.

Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are
aligned to cache lines, as before.

Note: This patch might trigger performance regressions because of
misconfigured protocol stacks, hitting per socket or global memory
limits that were previously not reached. But its a necessary step for a
more accurate memory accounting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 87fb4b7b533073eeeaed0b6bf7c2328995f6c075)

Signed-off-by: Joe Jin <joe.jin@oracle.com>
include/linux/skbuff.h
net/core/skbuff.c
net/core/sock.c
net/ipv4/icmp.c
net/ipv4/tcp_input.c
net/ipv6/icmp.c
net/iucv/af_iucv.c
net/sctp/protocol.c

index 6787a095f7cc1c0eb4f01bcb518dc60ee9b881d6..0ef1c5278c545914c04ceac36daff088ad26c188 100644 (file)
 #define SKB_MAX_HEAD(X)                (SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC          (SKB_MAX_ORDER(0, 2))
 
+/* return minimum truesize of one skb containing X bytes of data */
+#define SKB_TRUESIZE(X) ((X) +                                         \
+                        SKB_DATA_ALIGN(sizeof(struct sk_buff)) +       \
+                        SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 /* A. Checksumming of received packets by device.
  *
  *     NONE: device failed to checksum this packet.
index 46cbd28f40f9698bfbfad886047223c4bba16df2..a26edfceb9b930c9f40b6a561ef9e639515d1e8c 100644 (file)
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                goto out;
        prefetchw(skb);
 
-       size = SKB_DATA_ALIGN(size);
-       data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
-                       gfp_mask, node);
+       /* We do our best to align skb_shared_info on a separate cache
+        * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+        * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+        * Both skb->head and skb_shared_info are cache line aligned.
+        */
+       size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       data = kmalloc_node_track_caller(size, gfp_mask, node);
        if (!data)
                goto nodata;
+       /* kmalloc(size) might give us more room than requested.
+        * Put skb_shared_info exactly at the end of allocated zone,
+        * to allow max possible filling before reallocation.
+        */
+       size = SKB_WITH_OVERHEAD(ksize(data));
        prefetchw(data + size);
 
        /*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
         * the tail pointer in struct sk_buff!
         */
        memset(skb, 0, offsetof(struct sk_buff, tail));
-       skb->truesize = size + sizeof(struct sk_buff);
+       /* Account for allocated memory : skb + skb->head */
+       skb->truesize = SKB_TRUESIZE(size);
        atomic_set(&skb->users, 1);
        skb->head = data;
        skb->data = data;
index aebb419519b38163f1b60ef613ce6388692ac3d4..6c157b777b28e457a0d724d9991e2c78cfe67fb6 100644 (file)
@@ -205,7 +205,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
  * not depend upon such differences.
  */
 #define _SK_MEM_PACKETS                256
-#define _SK_MEM_OVERHEAD       (sizeof(struct sk_buff) + 256)
+#define _SK_MEM_OVERHEAD       SKB_TRUESIZE(256)
 #define SK_WMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_MAX            (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 
index 23ef31baa1afb84d3b81af7ab031c88830efe0f5..ab188ae12fd9e64635f47c92a1cc0acf1b9d3a13 100644 (file)
@@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net)
                net->ipv4.icmp_sk[i] = sk;
 
                /* Enough space for 2 64K ICMP packets, including
-                * sk_buff struct overhead.
+                * sk_buff/skb_shared_info struct overhead.
                 */
-               sk->sk_sndbuf =
-                       (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
 
                /*
                 * Speedup sock_wfree()
index c68040fe9cd9e04b705607d51cf994aadedf7754..9c6d6636aceb600ba02bf45960ab2e80b0754f65 100644 (file)
@@ -256,8 +256,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
 
 static void tcp_fixup_sndbuf(struct sock *sk)
 {
-       int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 +
-                    sizeof(struct sk_buff);
+       int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER);
 
        if (sk->sk_sndbuf < 3 * sndmem) {
                sk->sk_sndbuf = 3 * sndmem;
@@ -340,7 +339,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
 static void tcp_fixup_rcvbuf(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+       int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 
        /* Try to select rcvbuf so that 4 mss-sized segments
         * will fit to window and corresponding skbs will fit to our rcvbuf.
@@ -531,8 +530,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
                        space /= tp->advmss;
                        if (!space)
                                space = 1;
-                       rcvmem = (tp->advmss + MAX_TCP_HEADER +
-                                 16 + sizeof(struct sk_buff));
+                       rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
                        while (tcp_win_from_space(rcvmem) < tp->advmss)
                                rcvmem += 128;
                        space *= rcvmem;
@@ -4885,8 +4883,10 @@ static void tcp_new_space(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
 
        if (tcp_should_expand_sndbuf(sk)) {
-               int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
-                       MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
+               int sndmem = SKB_TRUESIZE(max_t(u32,
+                                               tp->rx_opt.mss_clamp,
+                                               tp->mss_cache) +
+                                         MAX_TCP_HEADER);
                int demanded = max_t(unsigned int, tp->snd_cwnd,
                                     tp->reordering + 1);
                sndmem *= 2 * demanded;
index 11900417b1cc0daea102552858a07a1efb179e67..447039342ac3a05e91c5ed9ccef24d6b9527e494 100644 (file)
@@ -840,8 +840,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
                /* Enough space for 2 64K ICMP packets, including
                 * sk_buff struct overhead.
                 */
-               sk->sk_sndbuf =
-                       (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+               sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
        }
        return 0;
 
index e2013e434d03a5758ae2c269b7beafa3987c1c8f..a9fa7be285a4fb5fca08f18a108ecc93972ba200 100644 (file)
@@ -1600,7 +1600,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
                goto save_message;
 
        len = atomic_read(&sk->sk_rmem_alloc);
-       len += iucv_msg_length(msg) + sizeof(struct sk_buff);
+       len += SKB_TRUESIZE(iucv_msg_length(msg));
        if (len > sk->sk_rcvbuf)
                goto save_message;
 
index 946afd6045c370ff67e50fc2592912e6dff476e6..8f21bb14ceea488b1e9f568c97bd9819b238fccc 100644 (file)
@@ -1161,7 +1161,7 @@ SCTP_STATIC __init int sctp_init(void)
        max_share = min(4UL*1024*1024, limit);
 
        sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */
-       sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
+       sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1);
        sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
 
        sysctl_sctp_wmem[0] = SK_MEM_QUANTUM;