Crack open GRE packets in __skb_get_rxhash to compute 4-tuple hash on
in encapsulated packet.  Note that this is used only when the
__skb_get_rxhash is taken, in particular only when the device does
not compute provide the rxhash (ie. feature is disabled).
This was tested by creating a single GRE tunnel between two 16 core
AMD machines.  200 netperf TCP_RR streams were ran with 1 byte
request and response size.
Without patch: 157497 tps, 50/90/99% latencies 1250/1292/1364 usecs
With patch: 325896 tps, 50/90/99% latencies 603/848/1169
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
        }
 
        switch (ip_proto) {
+       case IPPROTO_GRE:
+               if (pskb_may_pull(skb, nhoff + 16)) {
+                       u8 *h = skb->data + nhoff;
+                       __be16 flags = *(__be16 *)h;
+
+                       /*
+                        * Only look inside GRE if version zero and no
+                        * routing
+                        */
+                       if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
+                               proto = *(__be16 *)(h + 2);
+                               nhoff += 4;
+                               if (flags & GRE_CSUM)
+                                       nhoff += 4;
+                               if (flags & GRE_KEY)
+                                       nhoff += 4;
+                               if (flags & GRE_SEQ)
+                                       nhoff += 4;
+                               goto again;
+                       }
+               }
+               break;
        default:
                break;
        }