]> www.infradead.org Git - users/hch/dma-mapping.git/commitdiff
net/ipv6: Remove expired routes with a separated list of routes.
authorKui-Feng Lee <thinker.li@gmail.com>
Thu, 8 Feb 2024 22:06:51 +0000 (14:06 -0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 12 Feb 2024 10:24:12 +0000 (10:24 +0000)
FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree
can be expensive if the number of routes in a table is big, even if most of
them are permanent. Checking routes in a separated list of routes having
expiration will avoid this potential issue.

Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/ip6_fib.h
net/ipv6/addrconf.c
net/ipv6/ip6_fib.c
net/ipv6/ndisc.c
net/ipv6/route.c

index 360b12e61850735f1b2e5bbf7302320298b712b1..323c94f1845b9e3eed52a2a19a4871cf8174d9c2 100644 (file)
@@ -173,6 +173,9 @@ struct fib6_info {
 
        refcount_t                      fib6_ref;
        unsigned long                   expires;
+
+       struct hlist_node               gc_link;
+
        struct dst_metrics              *fib6_metrics;
 #define fib6_pmtu              fib6_metrics->metrics[RTAX_MTU-1]
 
@@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
        return rt->fib6_src.plen > 0;
 }
 
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
 static inline void fib6_clean_expires(struct fib6_info *f6i)
 {
        f6i->fib6_flags &= ~RTF_EXPIRES;
        f6i->expires = 0;
 }
 
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
 static inline void fib6_set_expires(struct fib6_info *f6i,
                                    unsigned long expires)
 {
@@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
 
 static inline void fib6_info_release(struct fib6_info *f6i)
 {
-       if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
+       if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
+               DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
                call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
+       }
 }
 
 enum fib6_walk_state {
@@ -382,6 +393,7 @@ struct fib6_table {
        struct inet_peer_base   tb6_peers;
        unsigned int            flags;
        unsigned int            fib_seq;
+       struct hlist_head       tb6_gc_hlist;   /* GC candidates */
 #define RT6_TABLE_HAS_DFLT_ROUTER      BIT(0)
 };
 
@@ -498,6 +510,38 @@ void fib6_gc_cleanup(void);
 
 int fib6_init(void);
 
+/* Add the route to the gc list if it is not already there
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_add_gc_list(struct fib6_info *f6i)
+{
+       /* If fib6_node is null, the f6i is not in (or removed from) the
+        * table.
+        *
+        * There is a gap between finding the f6i from the table and
+        * calling this function without the protection of the tb6_lock.
+        * This check makes sure the f6i is not added to the gc list when
+        * it is not on the table.
+        */
+       if (!rcu_dereference_protected(f6i->fib6_node,
+                                      lockdep_is_held(&f6i->fib6_table->tb6_lock)))
+               return;
+
+       if (hlist_unhashed(&f6i->gc_link))
+               hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
+}
+
+/* Remove the route from the gc list if it is on the list.
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
+ */
+static inline void fib6_remove_gc_list(struct fib6_info *f6i)
+{
+       if (!hlist_unhashed(&f6i->gc_link))
+               hlist_del_init(&f6i->gc_link);
+}
+
 struct ipv6_route_iter {
        struct seq_net_private p;
        struct fib6_walker w;
index d63f5d063f073cb53f52e187efdbd09b8f78d622..0ea44563454f0053f8938738c34c9b8cba1ceaf2 100644 (file)
@@ -1255,6 +1255,7 @@ static void
 cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
                     bool del_rt, bool del_peer)
 {
+       struct fib6_table *table;
        struct fib6_info *f6i;
 
        f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
                if (del_rt)
                        ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
                else {
-                       if (!(f6i->fib6_flags & RTF_EXPIRES))
+                       if (!(f6i->fib6_flags & RTF_EXPIRES)) {
+                               table = f6i->fib6_table;
+                               spin_lock_bh(&table->tb6_lock);
+
                                fib6_set_expires(f6i, expires);
+                               fib6_add_gc_list(f6i);
+
+                               spin_unlock_bh(&table->tb6_lock);
+                       }
                        fib6_info_release(f6i);
                }
        }
@@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
 void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 {
        struct prefix_info *pinfo;
+       struct fib6_table *table;
        __u32 valid_lft;
        __u32 prefered_lft;
        int addr_type, err;
@@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
                        if (valid_lft == 0) {
                                ip6_del_rt(net, rt, false);
                                rt = NULL;
-                       } else if (addrconf_finite_timeout(rt_expires)) {
-                               /* not infinity */
-                               fib6_set_expires(rt, jiffies + rt_expires);
                        } else {
-                               fib6_clean_expires(rt);
+                               table = rt->fib6_table;
+                               spin_lock_bh(&table->tb6_lock);
+
+                               if (addrconf_finite_timeout(rt_expires)) {
+                                       /* not infinity */
+                                       fib6_set_expires(rt, jiffies + rt_expires);
+                                       fib6_add_gc_list(rt);
+                               } else {
+                                       fib6_clean_expires(rt);
+                                       fib6_remove_gc_list(rt);
+                               }
+
+                               spin_unlock_bh(&table->tb6_lock);
                        }
                } else if (valid_lft) {
                        clock_t expires = 0;
@@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
                               unsigned long expires, u32 flags,
                               bool modify_peer)
 {
+       struct fib6_table *table;
        struct fib6_info *f6i;
        u32 prio;
 
@@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
                                      ifp->rt_priority, ifp->idev->dev,
                                      expires, flags, GFP_KERNEL);
        } else {
-               if (!expires)
+               table = f6i->fib6_table;
+               spin_lock_bh(&table->tb6_lock);
+
+               if (!expires) {
                        fib6_clean_expires(f6i);
-               else
+                       fib6_remove_gc_list(f6i);
+               } else {
                        fib6_set_expires(f6i, expires);
+                       fib6_add_gc_list(f6i);
+               }
+
+               spin_unlock_bh(&table->tb6_lock);
 
                fib6_info_release(f6i);
        }
index 38a0348b1d17803772264389865b503b929e8c95..805bbf26b3efd0239c04c0d7a658b5eac26efd34 100644 (file)
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
        INIT_LIST_HEAD(&f6i->fib6_siblings);
        refcount_set(&f6i->fib6_ref, 1);
 
+       INIT_HLIST_NODE(&f6i->gc_link);
+
        return f6i;
 }
 
@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
                                   net->ipv6.fib6_null_entry);
                table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
                inet_peer_base_init(&table->tb6_peers);
+               INIT_HLIST_HEAD(&table->tb6_gc_hlist);
        }
 
        return table;
@@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
                                    lockdep_is_held(&table->tb6_lock));
                }
        }
+
+       fib6_clean_expires(rt);
+       fib6_remove_gc_list(rt);
 }
 
 /*
@@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                                        rt->fib6_nsiblings = 0;
                                if (!(iter->fib6_flags & RTF_EXPIRES))
                                        return -EEXIST;
-                               if (!(rt->fib6_flags & RTF_EXPIRES))
+                               if (!(rt->fib6_flags & RTF_EXPIRES)) {
                                        fib6_clean_expires(iter);
-                               else
+                                       fib6_remove_gc_list(iter);
+                               } else {
                                        fib6_set_expires(iter, rt->expires);
+                                       fib6_add_gc_list(iter);
+                               }
 
                                if (rt->fib6_pmtu)
                                        fib6_metric_set(iter, RTAX_MTU,
@@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
                if (rt->nh)
                        list_add(&rt->nh_list, &rt->nh->f6i_list);
                __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+               if (rt->fib6_flags & RTF_EXPIRES)
+                       fib6_add_gc_list(rt);
+
                fib6_start_gc(info->nl_net, rt);
        }
 
@@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net)
  *     Garbage collection
  */
 
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
 {
-       struct fib6_gc_args *gc_args = arg;
        unsigned long now = jiffies;
 
        /*
@@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
        return 0;
 }
 
+static void fib6_gc_table(struct net *net,
+                         struct fib6_table *tb6,
+                         struct fib6_gc_args *gc_args)
+{
+       struct fib6_info *rt;
+       struct hlist_node *n;
+       struct nl_info info = {
+               .nl_net = net,
+               .skip_notify = false,
+       };
+
+       hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+               if (fib6_age(rt, gc_args) == -1)
+                       fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+       struct fib6_table *table;
+       struct hlist_head *head;
+       unsigned int h;
+
+       rcu_read_lock();
+       for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+               head = &net->ipv6.fib_table_hash[h];
+               hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+                       spin_lock_bh(&table->tb6_lock);
+
+                       fib6_gc_table(net, table, gc_args);
+
+                       spin_unlock_bh(&table->tb6_lock);
+               }
+       }
+       rcu_read_unlock();
+}
+
 void fib6_run_gc(unsigned long expires, struct net *net, bool force)
 {
        struct fib6_gc_args gc_args;
@@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
                          net->ipv6.sysctl.ip6_rt_gc_interval;
        gc_args.more = 0;
 
-       fib6_clean_all(net, fib6_age, &gc_args);
+       fib6_gc_all(net, &gc_args);
        now = jiffies;
        net->ipv6.ip6_rt_last_gc = now;
 
@@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net)
        net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
                RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
        inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+       INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net)
        net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
                RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
        inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+       INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
 #endif
        fib6_tables_init(net);
 
index a68462668158b4203e871e5057cde4fecf0d49ac..73cb31afe93542285e3f11b7140d2cc1619006e7 100644 (file)
@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
        struct ndisc_options ndopts;
        struct fib6_info *rt = NULL;
        struct inet6_dev *in6_dev;
+       struct fib6_table *table;
        u32 defrtr_usr_metric;
        unsigned int pref = 0;
        __u32 old_if_flags;
@@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
                inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
        }
 
-       if (rt)
+       if (rt) {
+               table = rt->fib6_table;
+               spin_lock_bh(&table->tb6_lock);
+
                fib6_set_expires(rt, jiffies + (HZ * lifetime));
+               fib6_add_gc_list(rt);
+
+               spin_unlock_bh(&table->tb6_lock);
+       }
        if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
            ra_msg->icmph.icmp6_hop_limit) {
                if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
index dd6ff5b209186b4b9c135b2e9db244a8697c925d..707d65bc9c0e5e9b2900063f0ac86c3c5e299088 100644 (file)
@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
        struct net *net = dev_net(dev);
        struct route_info *rinfo = (struct route_info *) opt;
        struct in6_addr prefix_buf, *prefix;
+       struct fib6_table *table;
        unsigned int pref;
        unsigned long lifetime;
        struct fib6_info *rt;
@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                                 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
        if (rt) {
-               if (!addrconf_finite_timeout(lifetime))
+               table = rt->fib6_table;
+               spin_lock_bh(&table->tb6_lock);
+
+               if (!addrconf_finite_timeout(lifetime)) {
                        fib6_clean_expires(rt);
-               else
+                       fib6_remove_gc_list(rt);
+               } else {
                        fib6_set_expires(rt, jiffies + HZ * lifetime);
+                       fib6_add_gc_list(rt);
+               }
+
+               spin_unlock_bh(&table->tb6_lock);
 
                fib6_info_release(rt);
        }