]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net: ipv4: add support for ECMP hash policy choice
authorVenkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Wed, 16 May 2018 21:08:50 +0000 (14:08 -0700)
committerBrian Maly <brian.maly@oracle.com>
Mon, 21 May 2018 21:05:45 +0000 (17:05 -0400)
This patch adds support for ECMP hash policy choice via a new sysctl
called fib_multipath_hash_policy and also adds support for L4 hashes.
The current values for fib_multipath_hash_policy are:
 0 - layer 3 (default)
 1 - layer 4
If there's an skb hash already set and it matches the chosen policy then it
will be used instead of being calculated (currently only for L4).
In L3 mode we always calculate the hash due to the ICMP error special
case, the flow dissector's field consistentification should handle the
address order thus we can remove the address reversals.
If the skb is provided we always use it for the hash calculation,
otherwise we fallback to fl4, that is if skb is NULL fl4 has to be set.

Orabug: 27547114

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit bf4e0a3db97eb882368fd82980b3b1fa0b5b9778)

Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
include/net/ip_fib.h
include/net/netns/ipv4.h
include/net/route.h
net/ipv4/fib_semantics.c
net/ipv4/icmp.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Documentation/networking/ip-sysctl.txt
include/net/ip_fib.h
include/net/route.h
net/ipv4/fib_semantics.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c

index 8686ccd8563fd804a53083666a2da2ecd5d8ed02..904edfc1b7b9437710ff8f09acf0580b0d412a77 100644 (file)
@@ -73,6 +73,14 @@ fib_multipath_use_neigh - BOOLEAN
        0 - disabled
        1 - enabled
 
+fib_multipath_hash_policy - INTEGER
+       Controls which hash policy to use for multipath routes. Only valid
+       for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled.
+       Default: 0 (Layer 3)
+       Possible values:
+       0 - Layer 3
+       1 - Layer 4
+
 route/max_size - INTEGER
        Maximum number of routes allowed in the kernel.  Increase
        this when using large numbers of interfaces and/or routes.
index dc62e6b33ab9048a40c5782f3060591b18a1530b..9234435c6b37f47a49a5eac2b55261407bc1031c 100644 (file)
@@ -310,13 +310,10 @@ int fib_sync_down_dev(struct net_device *dev, int force);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev);
 
-extern u32 fib_multipath_secret __read_mostly;
-
-static inline int fib_multipath_hash(__be32 saddr, __be32 daddr)
-{
-       return jhash_2words(saddr, daddr, fib_multipath_secret) >> 1;
-}
-
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+                      const struct sk_buff *skb);
+#endif
 void fib_select_multipath(struct fib_result *res, int hash);
 
 /* Exported by fib_trie.c */
index 90a956bde3eee008c90fd9bb84613a6bd103daf7..be440ef39788beab7a9b8cb0f55361e7662a58af 100644 (file)
@@ -46,6 +46,7 @@
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 extern int sysctl_fib_multipath_use_neigh;
+extern int sysctl_fib_multipath_hash_policy;
 #endif
 
 struct fib_nh;
index b3b10a64b0df0f7cb708a817e3c559a96370a933..3fd376bf8894dd6df159d016e21f153197224553 100644 (file)
@@ -56,7 +56,6 @@ static unsigned int fib_info_cnt;
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-u32 fib_multipath_secret __read_mostly;
 
 #define for_nexthops(fi) {                                             \
        int nhsel; const struct fib_nh *nh;                             \
@@ -512,9 +511,6 @@ static void fib_rebalance(struct fib_info *fi)
 
                atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
        } endfor_nexthops(fi);
-
-       net_get_random_once(&fib_multipath_secret,
-                           sizeof(fib_multipath_secret));
 }
 
 static inline void fib_add_weight(struct fib_info *fi,
index b0ab3d5e8e6cec48b10e220b83c5afdb2efbb030..caf3467840472381ae6468465c9e2ae57390e8f2 100644 (file)
@@ -192,6 +192,8 @@ EXPORT_SYMBOL(ip_tos2prio);
 int sysctl_fib_multipath_use_neigh = 0;
 EXPORT_SYMBOL(sysctl_fib_multipath_use_neigh);
 
+int sysctl_fib_multipath_hash_policy = 0;
+EXPORT_SYMBOL(sysctl_fib_multipath_hash_policy);
 #endif
 
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
@@ -1679,6 +1681,96 @@ out:
        return err;
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+/* To make ICMP packets follow the right flow, the multipath hash is
+ * calculated from the inner IP addresses.
+ */
+static void ip_multipath_l3_keys(const struct sk_buff *skb,
+                                struct flow_keys *hash_keys)
+{
+       const struct iphdr *outer_iph = ip_hdr(skb);
+       const struct iphdr *inner_iph;
+       const struct icmphdr *icmph;
+       struct iphdr _inner_iph;
+       struct icmphdr _icmph;
+
+       hash_keys->src = outer_iph->saddr;
+       hash_keys->dst = outer_iph->daddr;
+       if (likely(outer_iph->protocol != IPPROTO_ICMP))
+               return;
+
+       if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
+               return;
+
+       icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
+                                  &_icmph);
+       if (!icmph)
+               return;
+
+       if (icmph->type != ICMP_DEST_UNREACH &&
+           icmph->type != ICMP_REDIRECT &&
+           icmph->type != ICMP_TIME_EXCEEDED &&
+           icmph->type != ICMP_PARAMETERPROB)
+               return;
+
+       inner_iph = skb_header_pointer(skb,
+                                      outer_iph->ihl * 4 + sizeof(_icmph),
+                                      sizeof(_inner_iph), &_inner_iph);
+       if (!inner_iph)
+               return;
+       hash_keys->src = inner_iph->saddr;
+       hash_keys->dst = inner_iph->daddr;
+}
+
+/* if skb is set it will be used and fl4 can be NULL */
+int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4,
+                      const struct sk_buff *skb)
+{
+       struct flow_keys hash_keys;
+       u32 mhash;
+
+       switch (sysctl_fib_multipath_hash_policy) {
+       case 0:
+               memset(&hash_keys, 0, sizeof(hash_keys));
+               if (skb) {
+                       ip_multipath_l3_keys(skb, &hash_keys);
+               } else {
+                       hash_keys.src = fl4->saddr;
+                       hash_keys.dst = fl4->daddr;
+               }
+               break;
+       case 1:
+               /* skb is currently provided only when forwarding */
+               if (skb) {
+                       struct flow_keys keys;
+
+                       /* short-circuit if we already have L4 hash present */
+                       if (skb->l4_hash)
+                               return skb_get_hash_raw(skb) >> 1;
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+                       skb_flow_dissect(skb, &keys);
+
+                       hash_keys.src = keys.src;
+                       hash_keys.dst = keys.dst;
+                       hash_keys.port16[0] = keys.port16[0];
+                       hash_keys.port16[1] = keys.port16[1];
+               } else {
+                       memset(&hash_keys, 0, sizeof(hash_keys));
+
+                       hash_keys.src = fl4->saddr;
+                       hash_keys.dst = fl4->daddr;
+                       hash_keys.port16[0] = fl4->fl4_sport;
+                       hash_keys.port16[1] = fl4->fl4_dport;
+               }
+               break;
+       }
+       mhash = flow_hash_from_keys(&hash_keys);
+
+       return mhash >> 1;
+}
+EXPORT_SYMBOL_GPL(fib_multipath_hash);
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
 static int ip_mkroute_input(struct sk_buff *skb,
                            struct fib_result *res,
                            const struct flowi4 *fl4,
@@ -1689,7 +1781,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
        if (res->fi && res->fi->fib_nhs > 1) {
                int h;
 
-               h = fib_multipath_hash(saddr, daddr);
+               h = fib_multipath_hash(res->fi, NULL, skb);
                fib_select_multipath(res, h);
        }
 #endif
@@ -2233,7 +2325,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
        if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
                int h;
 
-               h = fib_multipath_hash(fl4->saddr, fl4->daddr);
+               h = fib_multipath_hash(res.fi, fl4, NULL);
                fib_select_multipath(&res, h);
        }
        else
index 78974c7b98f13532b7b4f8ba463001569289712f..e42c2dca080aee18fe392bb3bf04b6ee31f60cd5 100644 (file)
@@ -770,6 +770,15 @@ static struct ctl_table ipv4_table[] = {
                .extra1         = &zero,
                .extra2         = &one,
        },
+       {
+               .procname       = "fib_multipath_hash_policy",
+               .data           = &sysctl_fib_multipath_hash_policy,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
 #endif
        { }
 };