From: Peter Nørlund Date: Wed, 30 Sep 2015 08:12:21 +0000 (+0200) Subject: ipv4: L3 hash-based multipath X-Git-Tag: v4.1.12-124.31.3~797 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=0d59b283b49d32d05e6ce6456fe6ba37ab19034b;p=users%2Fjedix%2Flinux-maple.git ipv4: L3 hash-based multipath Replaces the per-packet multipath with a hash-based multipath using source and destination address. Orabug: 27547114 Signed-off-by: Peter Nørlund Signed-off-by: David S. Miller (cherry picked from commit 0e884c78ee19e902f300ed147083c28a0c6302f0) Reviewed-by: Zhu Yanjun Signed-off-by: Venkat Venkatsubra Signed-off-by: Brian Maly Conflicts: include/net/ip_fib.h net/ipv4/fib_semantics.c Signed-off-by: Brian Maly --- diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 13f1a97f6b2b8..dc62e6b33ab90 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -78,7 +78,7 @@ struct fib_nh { unsigned char nh_scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH int nh_weight; - int nh_power; + atomic_t nh_upper_bound; #endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; @@ -116,7 +116,7 @@ struct fib_info { #define fib_advmss fib_metrics[RTAX_ADVMSS-1] int fib_nhs; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_power; + int fib_weight; #endif struct rcu_head rcu; struct fib_nh fib_nh[0]; @@ -309,7 +309,15 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, int force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev); -void fib_select_multipath(struct fib_result *res); + +extern u32 fib_multipath_secret __read_mostly; + +static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) +{ + return jhash_2words(saddr, daddr, fib_multipath_secret) >> 1; +} + +void fib_select_multipath(struct fib_result *res, int hash); /* Exported by fib_trie.c */ void fib_trie_init(void); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8d695b6659c71..a867a4f829a91 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -56,8 +56,7 @@ static unsigned int fib_info_cnt; static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH - -static DEFINE_SPINLOCK(fib_multipath_lock); +u32 fib_multipath_secret __read_mostly; #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ @@ -483,7 +482,53 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return 0; } -#endif +static void fib_rebalance(struct fib_info *fi) +{ + int total; + int w; + + if (fi->fib_nhs < 2) + return; + + total = 0; + for_nexthops(fi) { + if (nh->nh_flags & RTNH_F_DEAD) + continue; + + total += nh->nh_weight; + } endfor_nexthops(fi); + + w = 0; + change_nexthops(fi) { + int upper_bound; + + if (nexthop_nh->nh_flags & RTNH_F_DEAD) { + upper_bound = -1; + } else { + w += nexthop_nh->nh_weight; + upper_bound = DIV_ROUND_CLOSEST(2147483648LL * w, + total) - 1; + } + + atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); + } endfor_nexthops(fi); + + net_get_random_once(&fib_multipath_secret, + sizeof(fib_multipath_secret)); +} + +static inline void fib_add_weight(struct fib_info *fi, + const struct fib_nh *nh) +{ + fi->fib_weight += nh->nh_weight; +} + +#else /* CONFIG_IP_ROUTE_MULTIPATH */ + +#define fib_rebalance(fi) do { } while (0) +#define fib_add_weight(fi, nh) do { } while (0) + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { @@ -940,8 +985,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); + fib_add_weight(fi, nexthop_nh); } endfor_nexthops(fi) + fib_rebalance(fi); + link_it: ofi = fib_find_info(fi); if (ofi) { @@ -1134,12 +1182,6 @@ int fib_sync_down_dev(struct net_device *dev, int force) else if (nexthop_nh->nh_dev == dev && nexthop_nh->nh_scope != scope) { nexthop_nh->nh_flags |= RTNH_F_DEAD; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - spin_lock_bh(&fib_multipath_lock); - fi->fib_power -= nexthop_nh->nh_power; - nexthop_nh->nh_power = 0; - spin_unlock_bh(&fib_multipath_lock); -#endif dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1153,6 +1195,8 @@ int fib_sync_down_dev(struct net_device *dev, int force) fi->fib_flags |= RTNH_F_DEAD; ret++; } + + fib_rebalance(fi); } return ret; @@ -1258,71 +1302,33 @@ int fib_sync_up(struct net_device *dev) !__in_dev_get_rtnl(dev)) continue; alive++; - spin_lock_bh(&fib_multipath_lock); - nexthop_nh->nh_power = 0; nexthop_nh->nh_flags &= ~RTNH_F_DEAD; - spin_unlock_bh(&fib_multipath_lock); } endfor_nexthops(fi) if (alive > 0) { fi->fib_flags &= ~RTNH_F_DEAD; ret++; } + + fib_rebalance(fi); } return ret; } -/* - * The algorithm is suboptimal, but it provides really - * fair weighted route distribution. - */ -void fib_select_multipath(struct fib_result *res) +void fib_select_multipath(struct fib_result *res, int hash) { struct fib_info *fi = res->fi; - int w; - spin_lock_bh(&fib_multipath_lock); - if (fi->fib_power <= 0) { - int power = 0; - change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { - power += nexthop_nh->nh_weight; - nexthop_nh->nh_power = nexthop_nh->nh_weight; - } - } endfor_nexthops(fi); - fi->fib_power = power; - if (power <= 0) { - spin_unlock_bh(&fib_multipath_lock); - /* Race condition: route has just become dead. */ - res->nh_sel = 0; - return; - } - } - - - /* w should be random number [0..fi->fib_power-1], - * it is pretty bad approximation. - */ - - w = jiffies % fi->fib_power; + for_nexthops(fi) { + if (hash > atomic_read(&nh->nh_upper_bound)) + continue; - change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && - nexthop_nh->nh_power) { - w -= nexthop_nh->nh_power; - if (w <= 0) { - nexthop_nh->nh_power--; - fi->fib_power--; - res->nh_sel = nhsel; - spin_unlock_bh(&fib_multipath_lock); - return; - } - } + res->nh_sel = nhsel; + return; } endfor_nexthops(fi); /* Race condition: route has just become dead. */ res->nh_sel = 0; - spin_unlock_bh(&fib_multipath_lock); } #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1d3cdb4d4ebcb..07f82e85fcc64 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1680,8 +1680,12 @@ static int ip_mkroute_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi && res->fi->fib_nhs > 1) - fib_select_multipath(res); + if (res->fi && res->fi->fib_nhs > 1) { + int h; + + h = fib_multipath_hash(saddr, daddr); + fib_select_multipath(res, h); + } #endif /* create a routing cache entry */ @@ -2220,8 +2224,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) - fib_select_multipath(&res); + if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + int h; + + h = fib_multipath_hash(fl4->saddr, fl4->daddr); + fib_select_multipath(&res, h); + } else #endif if (!res.prefixlen &&