#include <net/netns/generic.h>
 #include <net/tun_proto.h>
 #include <net/vxlan.h>
+#include <net/nexthop.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_tunnel.h>
        u16               state;        /* see ndm_state */
        __be32            vni;
        u16               flags;        /* see ndm_flags and below */
+       struct list_head  nh_list;
+       struct nexthop __rcu *nh;
 };
 
 #define NTF_VXLAN_ADDED_BY_USER 0x100
  */
 static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
 {
+       if (rcu_access_pointer(fdb->nh))
+               return NULL;
        return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
 }
 
 static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 {
+       if (rcu_access_pointer(fdb->nh))
+               return NULL;
        return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 }
 
 {
        unsigned long now = jiffies;
        struct nda_cacheinfo ci;
+       bool send_ip, send_eth;
        struct nlmsghdr *nlh;
+       struct nexthop *nh;
        struct ndmsg *ndm;
-       bool send_ip, send_eth;
 
        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
        if (nlh == NULL)
 
        send_eth = send_ip = true;
 
+       nh = rcu_dereference_rtnl(fdb->nh);
        if (type == RTM_GETNEIGH) {
-               send_ip = !vxlan_addr_any(&rdst->remote_ip);
+               if (rdst) {
+                       send_ip = !vxlan_addr_any(&rdst->remote_ip);
+                       ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
+               } else if (nh) {
+                       ndm->ndm_family = nexthop_get_family(nh);
+               }
                send_eth = !is_zero_ether_addr(fdb->eth_addr);
-               ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
        } else
                ndm->ndm_family = AF_BRIDGE;
        ndm->ndm_state = fdb->state;
        ndm->ndm_ifindex = vxlan->dev->ifindex;
        ndm->ndm_flags = fdb->flags;
-       if (rdst->offloaded)
+       if (rdst && rdst->offloaded)
                ndm->ndm_flags |= NTF_OFFLOADED;
        ndm->ndm_type = RTN_UNICAST;
 
 
        if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
                goto nla_put_failure;
+       if (nh) {
+               if (nla_put_u32(skb, NDA_NH_ID, nh->id))
+                       goto nla_put_failure;
+       } else if (rdst) {
+               if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
+                                                 &rdst->remote_ip))
+                       goto nla_put_failure;
+
+               if (rdst->remote_port &&
+                   rdst->remote_port != vxlan->cfg.dst_port &&
+                   nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+                       goto nla_put_failure;
+               if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+                   nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
+                       goto nla_put_failure;
+               if (rdst->remote_ifindex &&
+                   nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+                       goto nla_put_failure;
+       }
 
-       if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
-               goto nla_put_failure;
-
-       if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port &&
-           nla_put_be16(skb, NDA_PORT, rdst->remote_port))
-               goto nla_put_failure;
-       if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
-           nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
-               goto nla_put_failure;
        if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
            nla_put_u32(skb, NDA_SRC_VNI,
                        be32_to_cpu(fdb->vni)))
                goto nla_put_failure;
-       if (rdst->remote_ifindex &&
-           nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
-               goto nla_put_failure;
 
        ci.ndm_used      = jiffies_to_clock_t(now - fdb->used);
        ci.ndm_confirmed = 0;
 {
        int err;
 
-       if (swdev_notify) {
+       if (swdev_notify && rd) {
                switch (type) {
                case RTM_NEWNEIGH:
                        err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
        f->flags = ndm_flags;
        f->updated = f->used = jiffies;
        f->vni = src_vni;
+       f->nh = NULL;
+       INIT_LIST_HEAD(&f->nh_list);
        INIT_LIST_HEAD(&f->remotes);
        memcpy(f->eth_addr, mac, ETH_ALEN);
 
                           vxlan_fdb_head(vxlan, mac, src_vni));
 }
 
+static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+                              u32 nhid, struct netlink_ext_ack *extack)
+{
+       struct nexthop *old_nh = rtnl_dereference(fdb->nh);
+       struct nh_group *nhg;
+       struct nexthop *nh;
+       int err = -EINVAL;
+
+       if (old_nh && old_nh->id == nhid)
+               return 0;
+
+       nh = nexthop_find_by_id(vxlan->net, nhid);
+       if (!nh) {
+               NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+               goto err_inval;
+       }
+
+       if (nh) {
+               if (!nexthop_get(nh)) {
+                       NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+                       nh = NULL;
+                       goto err_inval;
+               }
+               if (!nh->is_fdb_nh) {
+                       NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
+                       goto err_inval;
+               }
+
+               if (!nh->is_group || !nh->nh_grp->mpath) {
+                       NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
+                       goto err_inval;
+               }
+
+               /* check nexthop group family */
+               nhg = rtnl_dereference(nh->nh_grp);
+               switch (vxlan->default_dst.remote_ip.sa.sa_family) {
+               case AF_INET:
+                       if (!nhg->has_v4) {
+                               err = -EAFNOSUPPORT;
+                               NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+                               goto err_inval;
+                       }
+                       break;
+               case AF_INET6:
+                       if (nhg->has_v4) {
+                               err = -EAFNOSUPPORT;
+                               NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+                               goto err_inval;
+                       }
+               }
+       }
+
+       if (old_nh) {
+               list_del_rcu(&fdb->nh_list);
+               nexthop_put(old_nh);
+       }
+       rcu_assign_pointer(fdb->nh, nh);
+       list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
+       return 1;
+
+err_inval:
+       if (nh)
+               nexthop_put(nh);
+       return err;
+}
+
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
                            const u8 *mac, union vxlan_addr *ip,
                            __u16 state, __be16 port, __be32 src_vni,
                            __be32 vni, __u32 ifindex, __u16 ndm_flags,
-                           struct vxlan_fdb **fdb)
+                           u32 nhid, struct vxlan_fdb **fdb,
+                           struct netlink_ext_ack *extack)
 {
        struct vxlan_rdst *rd = NULL;
        struct vxlan_fdb *f;
        if (!f)
                return -ENOMEM;
 
-       rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
-       if (rc < 0) {
-               kfree(f);
-               return rc;
-       }
+       if (nhid)
+               rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+       else
+               rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+       if (rc < 0)
+               goto errout;
 
        *fdb = f;
 
        return 0;
+
+errout:
+       kfree(f);
+       return rc;
 }
 
 static void __vxlan_fdb_free(struct vxlan_fdb *f)
 {
        struct vxlan_rdst *rd, *nd;
+       struct nexthop *nh;
+
+       nh = rcu_dereference_raw(f->nh);
+       if (nh) {
+               rcu_assign_pointer(f->nh, NULL);
+               list_del_rcu(&f->nh_list);
+               nexthop_put(nh);
+       }
 
        list_for_each_entry_safe(rd, nd, &f->remotes, list) {
                dst_cache_destroy(&rd->dst_cache);
        netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
 
        --vxlan->addrcnt;
-       if (do_notify)
-               list_for_each_entry(rd, &f->remotes, list)
-                       vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+       if (do_notify) {
+               if (rcu_access_pointer(f->nh))
+                       vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
                                         swdev_notify, NULL);
+               else
+                       list_for_each_entry(rd, &f->remotes, list)
+                               vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+                                                swdev_notify, NULL);
+       }
 
        hlist_del_rcu(&f->hlist);
        call_rcu(&f->rcu, vxlan_fdb_free);
                                     __u16 state, __u16 flags,
                                     __be16 port, __be32 vni,
                                     __u32 ifindex, __u16 ndm_flags,
-                                    struct vxlan_fdb *f,
+                                    struct vxlan_fdb *f, u32 nhid,
                                     bool swdev_notify,
                                     struct netlink_ext_ack *extack)
 {
        int rc = 0;
        int err;
 
+       if (nhid && !rcu_access_pointer(f->nh)) {
+               NL_SET_ERR_MSG(extack,
+                              "Cannot replace an existing non nexthop fdb with a nexthop");
+               return -EOPNOTSUPP;
+       }
+
+       if (nhid && (flags & NLM_F_APPEND)) {
+               NL_SET_ERR_MSG(extack,
+                              "Cannot append to a nexthop fdb");
+               return -EOPNOTSUPP;
+       }
+
        /* Do not allow an externally learned entry to take over an entry added
         * by the user.
         */
                /* Only change unicasts */
                if (!(is_multicast_ether_addr(f->eth_addr) ||
                      is_zero_ether_addr(f->eth_addr))) {
-                       rc = vxlan_fdb_replace(f, ip, port, vni,
-                                              ifindex, &oldrd);
+                       if (nhid) {
+                               rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+                               if (rc < 0)
+                                       return rc;
+                       } else {
+                               rc = vxlan_fdb_replace(f, ip, port, vni,
+                                                      ifindex, &oldrd);
+                       }
                        notify |= rc;
                } else {
+                       NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
                        return -EOPNOTSUPP;
                }
        }
        return 0;
 
 err_notify:
+       if (nhid)
+               return err;
        if ((flags & NLM_F_REPLACE) && rc)
                *rd = oldrd;
        else if ((flags & NLM_F_APPEND) && rc) {
                                   const u8 *mac, union vxlan_addr *ip,
                                   __u16 state, __u16 flags,
                                   __be16 port, __be32 src_vni, __be32 vni,
-                                  __u32 ifindex, __u16 ndm_flags,
+                                  __u32 ifindex, __u16 ndm_flags, u32 nhid,
                                   bool swdev_notify,
                                   struct netlink_ext_ack *extack)
 {
 
        netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
        rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
-                             vni, ifindex, fdb_flags, &f);
+                             vni, ifindex, fdb_flags, nhid, &f, extack);
        if (rc < 0)
                return rc;
 
                            const u8 *mac, union vxlan_addr *ip,
                            __u16 state, __u16 flags,
                            __be16 port, __be32 src_vni, __be32 vni,
-                           __u32 ifindex, __u16 ndm_flags,
+                           __u32 ifindex, __u16 ndm_flags, u32 nhid,
                            bool swdev_notify,
                            struct netlink_ext_ack *extack)
 {
 
                return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
                                                 vni, ifindex, ndm_flags, f,
-                                                swdev_notify, extack);
+                                                nhid, swdev_notify, extack);
        } else {
                if (!(flags & NLM_F_CREATE))
                        return -ENOENT;
 
                return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
                                               port, src_vni, vni, ifindex,
-                                              ndm_flags, swdev_notify, extack);
+                                              ndm_flags, nhid, swdev_notify,
+                                              extack);
        }
 }
 
 
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
                           union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
-                          __be32 *vni, u32 *ifindex)
+                          __be32 *vni, u32 *ifindex, u32 *nhid)
 {
        struct net *net = dev_net(vxlan->dev);
        int err;
                *ifindex = 0;
        }
 
+       if (tb[NDA_NH_ID])
+               *nhid = nla_get_u32(tb[NDA_NH_ID]);
+       else
+               *nhid = 0;
+
        return 0;
 }
 
        union vxlan_addr ip;
        __be16 port;
        __be32 src_vni, vni;
-       u32 ifindex;
+       u32 ifindex, nhid;
        u32 hash_index;
        int err;
 
                return -EINVAL;
        }
 
-       if (tb[NDA_DST] == NULL)
+       if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
                return -EINVAL;
 
-       err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+       err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+                             &nhid);
        if (err)
                return err;
 
        err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
                               port, src_vni, vni, ifindex,
                               ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
-                              true, extack);
+                              nhid, true, extack);
        spin_unlock_bh(&vxlan->hash_lock[hash_index]);
 
        return err;
                              __be16 port, __be32 src_vni, __be32 vni,
                              u32 ifindex, bool swdev_notify)
 {
-       struct vxlan_fdb *f;
        struct vxlan_rdst *rd = NULL;
+       struct vxlan_fdb *f;
        int err = -ENOENT;
 
        f = vxlan_find_mac(vxlan, addr, src_vni);
        struct vxlan_dev *vxlan = netdev_priv(dev);
        union vxlan_addr ip;
        __be32 src_vni, vni;
-       __be16 port;
-       u32 ifindex;
+       u32 ifindex, nhid;
        u32 hash_index;
+       __be16 port;
        int err;
 
-       err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+       err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+                             &nhid);
        if (err)
                return err;
 
                hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
                        struct vxlan_rdst *rd;
 
+                       if (rcu_access_pointer(f->nh)) {
+                               err = vxlan_fdb_info(skb, vxlan, f,
+                                                    NETLINK_CB(cb->skb).portid,
+                                                    cb->nlh->nlmsg_seq,
+                                                    RTM_NEWNEIGH,
+                                                    NLM_F_MULTI, NULL);
+                               if (err < 0)
+                                       goto out;
+                               continue;
+                       }
+
                        list_for_each_entry_rcu(rd, &f->remotes, list) {
                                if (*idx < cb->args[2])
                                        goto skip;
                if (f->state & (NUD_PERMANENT | NUD_NOARP))
                        return true;
 
+               /* Don't override an fdb with nexthop with a learnt entry */
+               if (rcu_access_pointer(f->nh))
+                       return true;
+
                if (net_ratelimit())
                        netdev_info(dev,
                                    "%pM migrated from %pIS to %pIS\n",
                                         vxlan->cfg.dst_port,
                                         vni,
                                         vxlan->default_dst.remote_vni,
-                                        ifindex, NTF_SELF, true, NULL);
+                                        ifindex, NTF_SELF, 0, true, NULL);
                spin_unlock(&vxlan->hash_lock[hash_index]);
        }
 
        kfree_skb(skb);
 }
 
+static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
+                         struct vxlan_fdb *f, __be32 vni, bool did_rsc)
+{
+       struct vxlan_rdst nh_rdst;
+       struct nexthop *nh;
+       bool do_xmit;
+       u32 hash;
+
+       memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+       hash = skb_get_hash(skb);
+
+       rcu_read_lock();
+       nh = rcu_dereference(f->nh);
+       if (!nh) {
+               rcu_read_unlock();
+               goto drop;
+       }
+       do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+       rcu_read_unlock();
+
+       if (likely(do_xmit))
+               vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
+       else
+               goto drop;
+
+       return;
+
+drop:
+       dev->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+}
+
 /* Transmit local packets over Vxlan
  *
  * Outer IP header inherits ECN and DF from inner header.
                }
        }
 
-       list_for_each_entry_rcu(rdst, &f->remotes, list) {
-               struct sk_buff *skb1;
+       if (rcu_access_pointer(f->nh)) {
+               vxlan_xmit_nh(skb, dev, f,
+                             (vni ? : vxlan->default_dst.remote_vni), did_rsc);
+       } else {
+               list_for_each_entry_rcu(rdst, &f->remotes, list) {
+                       struct sk_buff *skb1;
 
-               if (!fdst) {
-                       fdst = rdst;
-                       continue;
+                       if (!fdst) {
+                               fdst = rdst;
+                               continue;
+                       }
+                       skb1 = skb_clone(skb, GFP_ATOMIC);
+                       if (skb1)
+                               vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
                }
-               skb1 = skb_clone(skb, GFP_ATOMIC);
-               if (skb1)
-                       vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
+               if (fdst)
+                       vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
+               else
+                       kfree_skb(skb);
        }
 
-       if (fdst)
-               vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
-       else
-               kfree_skb(skb);
        return NETDEV_TX_OK;
 }
 
                                       dst->remote_vni,
                                       dst->remote_vni,
                                       dst->remote_ifindex,
-                                      NTF_SELF, &f);
+                                      NTF_SELF, 0, &f, extack);
                if (err)
                        return err;
        }
                                               vxlan->cfg.dst_port,
                                               conf.vni, conf.vni,
                                               conf.remote_ifindex,
-                                              NTF_SELF, true, extack);
+                                              NTF_SELF, 0, true, extack);
                        if (err) {
                                spin_unlock_bh(&vxlan->hash_lock[hash_index]);
                                netdev_adjacent_change_abort(dst->remote_dev,
                               fdb_info->remote_vni,
                               fdb_info->remote_ifindex,
                               NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
-                              false, extack);
+                              0, false, extack);
        spin_unlock_bh(&vxlan->hash_lock[hash_index]);
 
        return err;