#define FWS_INIT FWS_L
 #endif
 
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
 static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
 static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
 static int fib6_walk(struct net *net, struct fib6_walker *w);
 
        if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
                return -EINVAL;
+       if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+               return -EINVAL;
 
        if (info->nlh) {
                if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 #endif
 
        err = fib6_add_rt2node(fn, rt, info, mxc);
-       if (!err) {
+       if (!err)
                fib6_start_gc(info->nl_net, rt);
-               if (!(rt->rt6i_flags & RTF_CACHE))
-                       fib6_prune_clones(info->nl_net, pn);
-       }
 
 out:
        if (err) {
                read_lock(&net->ipv6.fib6_walker_lock);
                FOR_WALKERS(net, w) {
                        if (!child) {
-                               if (w->root == fn) {
-                                       w->root = w->node = NULL;
-                                       RT6_TRACE("W %p adjusted by delroot 1\n", w);
-                               } else if (w->node == fn) {
+                               if (w->node == fn) {
                                        RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
                                        w->node = pn;
                                        w->state = nstate;
                                }
                        } else {
-                               if (w->root == fn) {
-                                       w->root = child;
-                                       RT6_TRACE("W %p adjusted by delroot 2\n", w);
-                               }
                                if (w->node == fn) {
                                        w->node = child;
                                        if (children&2) {
 
        RT6_TRACE("fib6_del_route\n");
 
+       WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
        /* Unlink it */
        *rtp = rt->dst.rt6_next;
        rt->rt6i_node = NULL;
        net->ipv6.rt6_stats->fib_rt_entries--;
        net->ipv6.rt6_stats->fib_discarded_routes++;
 
+       /* Flush all cached dst in exception table */
+       rt6_flush_exceptions(rt);
+
        /* Reset round-robin state, if necessary */
        if (fn->rr_ptr == rt)
                fn->rr_ptr = NULL;
 
        WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-       if (!(rt->rt6i_flags & RTF_CACHE)) {
-               struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
-               /* clones of this route might be in another subtree */
-               if (rt->rt6i_src.plen) {
-                       while (!(pn->fn_flags & RTN_ROOT))
-                               pn = pn->parent;
-                       pn = pn->parent;
-               }
-#endif
-               fib6_prune_clones(info->nl_net, pn);
-       }
+       /* remove cached dst from exception table */
+       if (rt->rt6i_flags & RTF_CACHE)
+               return rt6_remove_exception_rt(rt);
 
        /*
         *      Walk the leaf entries looking for ourself
 {
        struct fib6_node *fn, *pn;
 
+       /* w->root should always be table->tb6_root */
+       WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
+
        for (;;) {
                fn = w->node;
                if (!fn)
                        return 0;
 
-               if (w->prune && fn != w->root &&
-                   fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
-                       w->state = FWS_C;
-                       w->leaf = fn->leaf;
-               }
                switch (w->state) {
 #ifdef CONFIG_IPV6_SUBTREES
                case FWS_S:
  *     func is called on each route.
  *             It may return -1 -> delete this route.
  *                           0  -> continue walking
- *
- *     prune==1 -> only immediate children of node (certainly,
- *     ignoring pure split nodes) will be scanned.
  */
 
 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
                            int (*func)(struct rt6_info *, void *arg),
-                           bool prune, int sernum, void *arg)
+                           int sernum, void *arg)
 {
        struct fib6_cleaner c;
 
        c.w.root = root;
        c.w.func = fib6_clean_node;
-       c.w.prune = prune;
        c.w.count = 0;
        c.w.skip = 0;
        c.func = func;
                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
                        write_lock_bh(&table->tb6_lock);
                        fib6_clean_tree(net, &table->tb6_root,
-                                       func, false, sernum, arg);
+                                       func, sernum, arg);
                        write_unlock_bh(&table->tb6_lock);
                }
        }
        __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
 }
 
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
-       if (rt->rt6i_flags & RTF_CACHE) {
-               RT6_TRACE("pruning clone %p\n", rt);
-               return -1;
-       }
-
-       return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
-       fib6_clean_tree(net, fn, fib6_prune_clone, true,
-                       FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
 static void fib6_flush_trees(struct net *net)
 {
        int new_sernum = fib6_new_sernum(net);
                        return -1;
                }
                gc_args->more++;
-       /* The following part will soon be removed when the exception
-        * table is hooked up to store all cached routes.
-        */
-       } else if (rt->rt6i_flags & RTF_CACHE) {
-               if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
-                       rt->dst.obsolete = DST_OBSOLETE_KILL;
-               if (atomic_read(&rt->dst.__refcnt) == 1 &&
-                   rt->dst.obsolete == DST_OBSOLETE_KILL) {
-                       RT6_TRACE("aging clone %p\n", rt);
-                       return -1;
-               } else if (rt->rt6i_flags & RTF_GATEWAY) {
-                       struct neighbour *neigh;
-                       __u8 neigh_flags = 0;
-
-                       neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
-                       if (neigh) {
-                               neigh_flags = neigh->flags;
-                               neigh_release(neigh);
-                       }
-                       if (!(neigh_flags & NTF_ROUTER)) {
-                               RT6_TRACE("purging route %p via non-router but gateway\n",
-                                         rt);
-                               return -1;
-                       }
-               }
-               gc_args->more++;
        }
 
        /*      Also age clones in the exception table.
 
                                             struct fib6_table *table,
                                             struct flowi6 *fl6, int flags)
 {
+       struct rt6_info *rt, *rt_cache;
        struct fib6_node *fn;
-       struct rt6_info *rt;
 
        read_lock_bh(&table->tb6_lock);
        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
                if (fn)
                        goto restart;
        }
+       /* Search through exception table */
+       rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+       if (rt_cache)
+               rt = rt_cache;
+
        dst_use(&rt->dst, jiffies);
        read_unlock_bh(&table->tb6_lock);
 
                               int oif, struct flowi6 *fl6, int flags)
 {
        struct fib6_node *fn, *saved_fn;
-       struct rt6_info *rt;
+       struct rt6_info *rt, *rt_cache;
        int strict = 0;
 
        strict |= flags & RT6_LOOKUP_F_IFACE;
                }
        }
 
+       /*Search through exception table */
+       rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+       if (rt_cache)
+               rt = rt_cache;
 
        if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
                dst_use(&rt->dst, jiffies);
 
        if (!rt6_cache_allowed_for_pmtu(rt6)) {
                rt6_do_update_pmtu(rt6, mtu);
+               /* update rt6_ex->stamp for cache */
+               if (rt6->rt6i_flags & RTF_CACHE)
+                       rt6_update_exception_stamp_rt(rt6);
        } else if (daddr) {
                struct rt6_info *nrt6;
 
                nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
                if (nrt6) {
                        rt6_do_update_pmtu(nrt6, mtu);
-
-                       /* ip6_ins_rt(nrt6) will bump the
-                        * rt6->rt6i_node->fn_sernum
-                        * which will fail the next rt6_check() and
-                        * invalidate the sk->sk_dst_cache.
-                        */
-                       ip6_ins_rt(nrt6);
-                       /* Release the reference taken in
-                        * ip6_rt_cache_alloc()
-                        */
-                       dst_release(&nrt6->dst);
+                       if (rt6_insert_exception(nrt6, rt6))
+                               dst_release_immediate(&nrt6->dst);
                }
        }
 }
                                             int flags)
 {
        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-       struct rt6_info *rt;
+       struct rt6_info *rt, *rt_cache;
        struct fib6_node *fn;
 
        /* Get the "current" route for this destination and
                        continue;
                if (fl6->flowi6_oif != rt->dst.dev->ifindex)
                        continue;
-               if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+               /* rt_cache's gateway might be different from its 'parent'
+                * in the case of an ip redirect.
+                * So we keep searching in the exception table if the gateway
+                * is different.
+                */
+               if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+                       rt_cache = rt6_find_cached_rt(rt,
+                                                     &fl6->daddr,
+                                                     &fl6->saddr);
+                       if (rt_cache &&
+                           ipv6_addr_equal(&rdfl->gateway,
+                                           &rt_cache->rt6i_gateway)) {
+                               rt = rt_cache;
+                               break;
+                       }
                        continue;
+               }
                break;
        }
 
 static int ip6_route_del(struct fib6_config *cfg,
                         struct netlink_ext_ack *extack)
 {
+       struct rt6_info *rt, *rt_cache;
        struct fib6_table *table;
        struct fib6_node *fn;
-       struct rt6_info *rt;
        int err = -ESRCH;
 
        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
        fn = fib6_locate(&table->tb6_root,
                         &cfg->fc_dst, cfg->fc_dst_len,
                         &cfg->fc_src, cfg->fc_src_len,
-                        true);
+                        !(cfg->fc_flags & RTF_CACHE));
 
        if (fn) {
                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
-                       if ((rt->rt6i_flags & RTF_CACHE) &&
-                           !(cfg->fc_flags & RTF_CACHE))
-                               continue;
+                       if (cfg->fc_flags & RTF_CACHE) {
+                               rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+                                                             &cfg->fc_src);
+                               if (!rt_cache)
+                                       continue;
+                               rt = rt_cache;
+                       }
                        if (cfg->fc_ifindex &&
                            (!rt->dst.dev ||
                             rt->dst.dev->ifindex != cfg->fc_ifindex))
        nrt->rt6i_protocol = RTPROT_REDIRECT;
        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 
-       if (ip6_ins_rt(nrt))
-               goto out_release;
+       /* No need to remove rt from the exception table if rt is
+        * a cached route because rt6_insert_exception() will
+        * takes care of it
+        */
+       if (rt6_insert_exception(nrt, rt)) {
+               dst_release_immediate(&nrt->dst);
+               goto out;
+       }
 
        netevent.old = &rt->dst;
        netevent.new = &nrt->dst;
        netevent.neigh = neigh;
        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
-       if (rt->rt6i_flags & RTF_CACHE) {
-               rt = (struct rt6_info *) dst_clone(&rt->dst);
-               ip6_del_rt(rt);
-       }
-
-out_release:
-       /* Release the reference taken in
-        * ip6_rt_cache_alloc()
-        */
-       dst_release(&nrt->dst);
-
 out:
        neigh_release(neigh);
 }
 {
        struct in6_addr *gateway = (struct in6_addr *)arg;
 
-       /* RTF_CACHE_GATEWAY case will be removed once the exception
-        * table is hooked up to store all cached routes.
-        */
-       if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
-            ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
-            ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+       if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+           ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
                return -1;
        }
 
            dst_metric_raw(&rt->dst, RTAX_MTU) &&
            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
                spin_lock_bh(&rt6_exception_lock);
-               /* This case will be removed once the exception table
-                * is hooked up.
-                */
-               if (rt->rt6i_flags & RTF_CACHE) {
-                       /* For RTF_CACHE with rt6i_pmtu == 0
-                        * (i.e. a redirected route),
-                        * the metrics of its rt->dst.from has already
-                        * been updated.
-                        */
-                       if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
-                               rt->rt6i_pmtu = arg->mtu;
-               } else if (dst_mtu(&rt->dst) >= arg->mtu ||
-                          (dst_mtu(&rt->dst) < arg->mtu &&
-                           dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+               if (dst_mtu(&rt->dst) >= arg->mtu ||
+                   (dst_mtu(&rt->dst) < arg->mtu &&
+                    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
                        dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
                }
                rt6_exceptions_update_pmtu(rt, arg->mtu);