]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
netfilter: nf_tables: Add flowtable map for xdp offload
authorFlorian Westphal <fw@strlen.de>
Sat, 29 Jun 2024 22:26:48 +0000 (00:26 +0200)
committerDaniel Borkmann <daniel@iogearbox.net>
Mon, 1 Jul 2024 15:01:53 +0000 (17:01 +0200)
This adds a small internal mapping table so that a new bpf (xdp) kfunc
can perform lookups in a flowtable.

As-is, xdp program has access to the device pointer, but no way to do a
lookup in a flowtable -- there is no way to obtain the needed struct
without questionable stunts.

This allows to obtain an nf_flowtable pointer given a net_device
structure.

In order to keep backward compatibility, the infrastructure allows the
user to add a given device to multiple flowtables, but it will always
return the first added mapping performing the lookup since it assumes
the right configuration is 1:1 mapping between flowtables and net_devices.

Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org
include/net/netfilter/nf_flow_table.h
net/netfilter/Makefile
net/netfilter/nf_flow_table_offload.c
net/netfilter/nf_flow_table_xdp.c [new file with mode: 0644]

index 9abb7ee40d72fc2e7d2ef0ec86ef18df939ddd9c..d845745207d2d51b03aa97a1ca4a5d837fd6d66a 100644 (file)
@@ -305,6 +305,11 @@ struct flow_ports {
        __be16 source, dest;
 };
 
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+                             struct net_device *dev,
+                             enum flow_block_command cmd);
+
 unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
                                     const struct nf_hook_state *state);
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
index 614815a3ed73878212d3860e1d89d582f28c5d2b..18046872a38aa016676e7d130b0b5566188ab49f 100644 (file)
@@ -142,7 +142,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV)        += nft_fwd_netdev.o
 # flow table infrastructure
 obj-$(CONFIG_NF_FLOW_TABLE)    += nf_flow_table.o
 nf_flow_table-objs             := nf_flow_table_core.o nf_flow_table_ip.o \
-                                  nf_flow_table_offload.o
+                                  nf_flow_table_offload.o nf_flow_table_xdp.o
 nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
 
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
index a010b25076ca0673ca28b7208f9b24f1e250e136..ff1a4e36c2b5dcf9d12d0f65ff98277f56a17beb 100644 (file)
@@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
        int err;
 
        if (!nf_flowtable_hw_offload(flowtable))
-               return 0;
+               return nf_flow_offload_xdp_setup(flowtable, dev, cmd);
 
        if (dev->netdev_ops->ndo_setup_tc)
                err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c
new file mode 100644 (file)
index 0000000..e1252d0
--- /dev/null
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/netdevice.h>
+#include <net/flow_offload.h>
+#include <net/netfilter/nf_flow_table.h>
+
+struct flow_offload_xdp_ft {
+       struct list_head head;
+       struct nf_flowtable *ft;
+       struct rcu_head rcuhead;
+};
+
+struct flow_offload_xdp {
+       struct hlist_node hnode;
+       unsigned long net_device_addr;
+       struct list_head head;
+};
+
+#define NF_XDP_HT_BITS 4
+static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS);
+static DEFINE_MUTEX(nf_xdp_hashtable_lock);
+
+/* caller must hold rcu read lock */
+struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev)
+{
+       unsigned long key = (unsigned long)dev;
+       struct flow_offload_xdp *iter;
+
+       hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) {
+               if (key == iter->net_device_addr) {
+                       struct flow_offload_xdp_ft *ft_elem;
+
+                       /* The user is supposed to insert a given net_device
+                        * just into a single nf_flowtable so we always return
+                        * the first element here.
+                        */
+                       ft_elem = list_first_or_null_rcu(&iter->head,
+                                                        struct flow_offload_xdp_ft,
+                                                        head);
+                       return ft_elem ? ft_elem->ft : NULL;
+               }
+       }
+
+       return NULL;
+}
+
+static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft,
+                                     const struct net_device *dev)
+{
+       struct flow_offload_xdp *iter, *elem = NULL;
+       unsigned long key = (unsigned long)dev;
+       struct flow_offload_xdp_ft *ft_elem;
+
+       ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT);
+       if (!ft_elem)
+               return -ENOMEM;
+
+       ft_elem->ft = ft;
+
+       mutex_lock(&nf_xdp_hashtable_lock);
+
+       hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+               if (key == iter->net_device_addr) {
+                       elem = iter;
+                       break;
+               }
+       }
+
+       if (!elem) {
+               elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT);
+               if (!elem)
+                       goto err_unlock;
+
+               elem->net_device_addr = key;
+               INIT_LIST_HEAD(&elem->head);
+               hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key);
+       }
+       list_add_tail_rcu(&ft_elem->head, &elem->head);
+
+       mutex_unlock(&nf_xdp_hashtable_lock);
+
+       return 0;
+
+err_unlock:
+       mutex_unlock(&nf_xdp_hashtable_lock);
+       kfree(ft_elem);
+
+       return -ENOMEM;
+}
+
+static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft,
+                                      const struct net_device *dev)
+{
+       struct flow_offload_xdp *iter, *elem = NULL;
+       unsigned long key = (unsigned long)dev;
+
+       mutex_lock(&nf_xdp_hashtable_lock);
+
+       hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) {
+               if (key == iter->net_device_addr) {
+                       elem = iter;
+                       break;
+               }
+       }
+
+       if (elem) {
+               struct flow_offload_xdp_ft *ft_elem, *ft_next;
+
+               list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) {
+                       if (ft_elem->ft == ft) {
+                               list_del_rcu(&ft_elem->head);
+                               kfree_rcu(ft_elem, rcuhead);
+                       }
+               }
+
+               if (list_empty(&elem->head))
+                       hash_del_rcu(&elem->hnode);
+               else
+                       elem = NULL;
+       }
+
+       mutex_unlock(&nf_xdp_hashtable_lock);
+
+       if (elem) {
+               synchronize_rcu();
+               kfree(elem);
+       }
+}
+
+int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
+                             struct net_device *dev,
+                             enum flow_block_command cmd)
+{
+       switch (cmd) {
+       case FLOW_BLOCK_BIND:
+               return nf_flowtable_by_dev_insert(flowtable, dev);
+       case FLOW_BLOCK_UNBIND:
+               nf_flowtable_by_dev_remove(flowtable, dev);
+               return 0;
+       }
+
+       WARN_ON_ONCE(1);
+       return 0;
+}