1 /* netfilter.c: look after the filters for various protocols.
 
   2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
 
   4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
 
   9 #include <linux/kernel.h>
 
  10 #include <linux/netfilter.h>
 
  11 #include <net/protocol.h>
 
  12 #include <linux/init.h>
 
  13 #include <linux/skbuff.h>
 
  14 #include <linux/wait.h>
 
  15 #include <linux/module.h>
 
  16 #include <linux/interrupt.h>
 
  18 #include <linux/netdevice.h>
 
  19 #include <linux/netfilter_ipv6.h>
 
  20 #include <linux/inetdevice.h>
 
  21 #include <linux/proc_fs.h>
 
  22 #include <linux/mutex.h>
 
  24 #include <linux/rcupdate.h>
 
  25 #include <net/net_namespace.h>
 
  28 #include "nf_internals.h"
 
  30 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
 
  31 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
 
  33 DEFINE_PER_CPU(bool, nf_skb_duplicated);
 
  34 EXPORT_SYMBOL_GPL(nf_skb_duplicated);
 
  36 #ifdef HAVE_JUMP_LABEL
 
  37 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
  38 EXPORT_SYMBOL(nf_hooks_needed);
 
  41 static DEFINE_MUTEX(nf_hook_mutex);
 
  43 /* max hooks per family/hooknum */
 
  44 #define MAX_HOOK_COUNT          1024
 
  46 #define nf_entry_dereference(e) \
 
  47         rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
  49 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 
  51         struct nf_hook_entries *e;
 
  52         size_t alloc = sizeof(*e) +
 
  53                        sizeof(struct nf_hook_entry) * num +
 
  54                        sizeof(struct nf_hook_ops *) * num +
 
  55                        sizeof(struct nf_hook_entries_rcu_head);
 
  60         e = kvzalloc(alloc, GFP_KERNEL);
 
  62                 e->num_hook_entries = num;
 
  66 static void __nf_hook_entries_free(struct rcu_head *h)
 
  68         struct nf_hook_entries_rcu_head *head;
 
  70         head = container_of(h, struct nf_hook_entries_rcu_head, head);
 
  71         kvfree(head->allocation);
 
  74 static void nf_hook_entries_free(struct nf_hook_entries *e)
 
  76         struct nf_hook_entries_rcu_head *head;
 
  77         struct nf_hook_ops **ops;
 
  83         num = e->num_hook_entries;
 
  84         ops = nf_hook_entries_get_hook_ops(e);
 
  85         head = (void *)&ops[num];
 
  87         call_rcu(&head->head, __nf_hook_entries_free);
 
  90 static unsigned int accept_all(void *priv,
 
  92                                const struct nf_hook_state *state)
 
  94         return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
 
  97 static const struct nf_hook_ops dummy_ops = {
 
 102 static struct nf_hook_entries *
 
 103 nf_hook_entries_grow(const struct nf_hook_entries *old,
 
 104                      const struct nf_hook_ops *reg)
 
 106         unsigned int i, alloc_entries, nhooks, old_entries;
 
 107         struct nf_hook_ops **orig_ops = NULL;
 
 108         struct nf_hook_ops **new_ops;
 
 109         struct nf_hook_entries *new;
 
 110         bool inserted = false;
 
 113         old_entries = old ? old->num_hook_entries : 0;
 
 116                 orig_ops = nf_hook_entries_get_hook_ops(old);
 
 118                 for (i = 0; i < old_entries; i++) {
 
 119                         if (orig_ops[i] != &dummy_ops)
 
 124         if (alloc_entries > MAX_HOOK_COUNT)
 
 125                 return ERR_PTR(-E2BIG);
 
 127         new = allocate_hook_entries_size(alloc_entries);
 
 129                 return ERR_PTR(-ENOMEM);
 
 131         new_ops = nf_hook_entries_get_hook_ops(new);
 
 135         while (i < old_entries) {
 
 136                 if (orig_ops[i] == &dummy_ops) {
 
 141                 if (reg->nat_hook && orig_ops[i]->nat_hook) {
 
 143                         return ERR_PTR(-EBUSY);
 
 146                 if (inserted || reg->priority > orig_ops[i]->priority) {
 
 147                         new_ops[nhooks] = (void *)orig_ops[i];
 
 148                         new->hooks[nhooks] = old->hooks[i];
 
 151                         new_ops[nhooks] = (void *)reg;
 
 152                         new->hooks[nhooks].hook = reg->hook;
 
 153                         new->hooks[nhooks].priv = reg->priv;
 
 160                 new_ops[nhooks] = (void *)reg;
 
 161                 new->hooks[nhooks].hook = reg->hook;
 
 162                 new->hooks[nhooks].priv = reg->priv;
 
 168 static void hooks_validate(const struct nf_hook_entries *hooks)
 
 170 #ifdef CONFIG_DEBUG_KERNEL
 
 171         struct nf_hook_ops **orig_ops;
 
 175         orig_ops = nf_hook_entries_get_hook_ops(hooks);
 
 177         for (i = 0; i < hooks->num_hook_entries; i++) {
 
 178                 if (orig_ops[i] == &dummy_ops)
 
 181                 WARN_ON(orig_ops[i]->priority < prio);
 
 183                 if (orig_ops[i]->priority > prio)
 
 184                         prio = orig_ops[i]->priority;
 
 190  * __nf_hook_entries_try_shrink - try to shrink hook array
 
 192  * @pp -- location of hook blob
 
 194  * Hook unregistration must always succeed, so to-be-removed hooks
 
 195  * are replaced by a dummy one that will just move to next hook.
 
 197  * This counts the current dummy hooks, attempts to allocate new blob,
 
 198  * copies the live hooks, then replaces and discards old one.
 
 202  * Returns address to free, or NULL.
 
 204 static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
 
 206         struct nf_hook_entries *old, *new = NULL;
 
 207         unsigned int i, j, skip = 0, hook_entries;
 
 208         struct nf_hook_ops **orig_ops;
 
 209         struct nf_hook_ops **new_ops;
 
 211         old = nf_entry_dereference(*pp);
 
 212         if (WARN_ON_ONCE(!old))
 
 215         orig_ops = nf_hook_entries_get_hook_ops(old);
 
 216         for (i = 0; i < old->num_hook_entries; i++) {
 
 217                 if (orig_ops[i] == &dummy_ops)
 
 221         /* if skip == hook_entries all hooks have been removed */
 
 222         hook_entries = old->num_hook_entries;
 
 223         if (skip == hook_entries)
 
 229         hook_entries -= skip;
 
 230         new = allocate_hook_entries_size(hook_entries);
 
 234         new_ops = nf_hook_entries_get_hook_ops(new);
 
 235         for (i = 0, j = 0; i < old->num_hook_entries; i++) {
 
 236                 if (orig_ops[i] == &dummy_ops)
 
 238                 new->hooks[j] = old->hooks[i];
 
 239                 new_ops[j] = (void *)orig_ops[i];
 
 244         rcu_assign_pointer(*pp, new);
 
 248 static struct nf_hook_entries __rcu **
 
 249 nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 
 250                    struct net_device *dev)
 
 255 #ifdef CONFIG_NETFILTER_FAMILY_ARP
 
 257                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
 
 259                 return net->nf.hooks_arp + hooknum;
 
 261 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 
 263                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
 
 265                 return net->nf.hooks_bridge + hooknum;
 
 268                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
 
 270                 return net->nf.hooks_ipv4 + hooknum;
 
 272                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
 
 274                 return net->nf.hooks_ipv6 + hooknum;
 
 275 #if IS_ENABLED(CONFIG_DECNET)
 
 277                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
 
 279                 return net->nf.hooks_decnet + hooknum;
 
 286 #ifdef CONFIG_NETFILTER_INGRESS
 
 287         if (hooknum == NF_NETDEV_INGRESS) {
 
 288                 if (dev && dev_net(dev) == net)
 
 289                         return &dev->nf_hooks_ingress;
 
 296 static int __nf_register_net_hook(struct net *net, int pf,
 
 297                                   const struct nf_hook_ops *reg)
 
 299         struct nf_hook_entries *p, *new_hooks;
 
 300         struct nf_hook_entries __rcu **pp;
 
 302         if (pf == NFPROTO_NETDEV) {
 
 303 #ifndef CONFIG_NETFILTER_INGRESS
 
 304                 if (reg->hooknum == NF_NETDEV_INGRESS)
 
 307                 if (reg->hooknum != NF_NETDEV_INGRESS ||
 
 308                     !reg->dev || dev_net(reg->dev) != net)
 
 312         pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 
 316         mutex_lock(&nf_hook_mutex);
 
 318         p = nf_entry_dereference(*pp);
 
 319         new_hooks = nf_hook_entries_grow(p, reg);
 
 321         if (!IS_ERR(new_hooks))
 
 322                 rcu_assign_pointer(*pp, new_hooks);
 
 324         mutex_unlock(&nf_hook_mutex);
 
 325         if (IS_ERR(new_hooks))
 
 326                 return PTR_ERR(new_hooks);
 
 328         hooks_validate(new_hooks);
 
 329 #ifdef CONFIG_NETFILTER_INGRESS
 
 330         if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 
 331                 net_inc_ingress_queue();
 
 333 #ifdef HAVE_JUMP_LABEL
 
 334         static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
 
 336         BUG_ON(p == new_hooks);
 
 337         nf_hook_entries_free(p);
 
 342  * nf_remove_net_hook - remove a hook from blob
 
 344  * @oldp: current address of hook blob
 
 345  * @unreg: hook to unregister
 
 347  * This cannot fail, hook unregistration must always succeed.
 
 348  * Therefore replace the to-be-removed hook with a dummy hook.
 
 350 static void nf_remove_net_hook(struct nf_hook_entries *old,
 
 351                                const struct nf_hook_ops *unreg, int pf)
 
 353         struct nf_hook_ops **orig_ops;
 
 357         orig_ops = nf_hook_entries_get_hook_ops(old);
 
 358         for (i = 0; i < old->num_hook_entries; i++) {
 
 359                 if (orig_ops[i] != unreg)
 
 361                 WRITE_ONCE(old->hooks[i].hook, accept_all);
 
 362                 WRITE_ONCE(orig_ops[i], &dummy_ops);
 
 368 #ifdef CONFIG_NETFILTER_INGRESS
 
 369                 if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
 
 370                         net_dec_ingress_queue();
 
 372 #ifdef HAVE_JUMP_LABEL
 
 373                 static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
 
 376                 WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
 
 380 static void __nf_unregister_net_hook(struct net *net, int pf,
 
 381                                      const struct nf_hook_ops *reg)
 
 383         struct nf_hook_entries __rcu **pp;
 
 384         struct nf_hook_entries *p;
 
 386         pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 
 390         mutex_lock(&nf_hook_mutex);
 
 392         p = nf_entry_dereference(*pp);
 
 393         if (WARN_ON_ONCE(!p)) {
 
 394                 mutex_unlock(&nf_hook_mutex);
 
 398         nf_remove_net_hook(p, reg, pf);
 
 400         p = __nf_hook_entries_try_shrink(pp);
 
 401         mutex_unlock(&nf_hook_mutex);
 
 405         nf_queue_nf_hook_drop(net);
 
 406         nf_hook_entries_free(p);
 
 409 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
 411         if (reg->pf == NFPROTO_INET) {
 
 412                 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 
 413                 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
 
 415                 __nf_unregister_net_hook(net, reg->pf, reg);
 
 418 EXPORT_SYMBOL(nf_unregister_net_hook);
 
 420 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
 424         if (reg->pf == NFPROTO_INET) {
 
 425                 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
 
 429                 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
 
 431                         __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 
 435                 err = __nf_register_net_hook(net, reg->pf, reg);
 
 442 EXPORT_SYMBOL(nf_register_net_hook);
 
 444 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 
 450         for (i = 0; i < n; i++) {
 
 451                 err = nf_register_net_hook(net, ®[i]);
 
 459                 nf_unregister_net_hooks(net, reg, i);
 
 462 EXPORT_SYMBOL(nf_register_net_hooks);
 
 464 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 
 465                              unsigned int hookcount)
 
 469         for (i = 0; i < hookcount; i++)
 
 470                 nf_unregister_net_hook(net, ®[i]);
 
 472 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
 474 /* Returns 1 if okfn() needs to be executed by the caller,
 
 475  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 
 476 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 
 477                  const struct nf_hook_entries *e, unsigned int s)
 
 479         unsigned int verdict;
 
 482         for (; s < e->num_hook_entries; s++) {
 
 483                 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 
 484                 switch (verdict & NF_VERDICT_MASK) {
 
 489                         ret = NF_DROP_GETERR(verdict);
 
 494                         ret = nf_queue(skb, state, e, s, verdict);
 
 499                         /* Implicit handling for NF_STOLEN, as well as any other
 
 500                          * non conventional verdicts.
 
 508 EXPORT_SYMBOL(nf_hook_slow);
 
 511 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 
 513         if (writable_len > skb->len)
 
 516         /* Not exclusive use of packet?  Must copy. */
 
 517         if (!skb_cloned(skb)) {
 
 518                 if (writable_len <= skb_headlen(skb))
 
 520         } else if (skb_clone_writable(skb, writable_len))
 
 523         if (writable_len <= skb_headlen(skb))
 
 526                 writable_len -= skb_headlen(skb);
 
 528         return !!__pskb_pull_tail(skb, writable_len);
 
 530 EXPORT_SYMBOL(skb_make_writable);
 
 532 /* This needs to be compiled in any case to avoid dependencies between the
 
 533  * nfnetlink_queue code and nf_conntrack.
 
 535 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
 
 536 EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 
 538 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 
 539 /* This does not belong here, but locally generated errors need it if connection
 
 540    tracking in use: without this, connection may not be in hash table, and hence
 
 541    manufactured ICMP or RST packets will not be associated with it. */
 
 542 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
 
 544 EXPORT_SYMBOL(ip_ct_attach);
 
 546 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
 
 548         void (*attach)(struct sk_buff *, const struct sk_buff *);
 
 552                 attach = rcu_dereference(ip_ct_attach);
 
 558 EXPORT_SYMBOL(nf_ct_attach);
 
 560 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 
 561 EXPORT_SYMBOL(nf_ct_destroy);
 
 563 void nf_conntrack_destroy(struct nf_conntrack *nfct)
 
 565         void (*destroy)(struct nf_conntrack *);
 
 568         destroy = rcu_dereference(nf_ct_destroy);
 
 569         BUG_ON(destroy == NULL);
 
 573 EXPORT_SYMBOL(nf_conntrack_destroy);
 
 575 /* Built-in default zone used e.g. by modules. */
 
 576 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 
 577         .id     = NF_CT_DEFAULT_ZONE_ID,
 
 578         .dir    = NF_CT_DEFAULT_ZONE_DIR,
 
 580 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
 
 581 #endif /* CONFIG_NF_CONNTRACK */
 
 583 #ifdef CONFIG_NF_NAT_NEEDED
 
 584 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 
 585 EXPORT_SYMBOL(nf_nat_decode_session_hook);
 
 588 static void __net_init
 
 589 __netfilter_net_init(struct nf_hook_entries __rcu **e, int max)
 
 593         for (h = 0; h < max; h++)
 
 594                 RCU_INIT_POINTER(e[h], NULL);
 
 597 static int __net_init netfilter_net_init(struct net *net)
 
 599         __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
 
 600         __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
 
 601 #ifdef CONFIG_NETFILTER_FAMILY_ARP
 
 602         __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
 
 604 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 
 605         __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
 
 607 #if IS_ENABLED(CONFIG_DECNET)
 
 608         __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
 
 611 #ifdef CONFIG_PROC_FS
 
 612         net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
 
 614         if (!net->nf.proc_netfilter) {
 
 615                 if (!net_eq(net, &init_net))
 
 616                         pr_err("cannot create netfilter proc entry");
 
 625 static void __net_exit netfilter_net_exit(struct net *net)
 
 627         remove_proc_entry("netfilter", net->proc_net);
 
 630 static struct pernet_operations netfilter_net_ops = {
 
 631         .init = netfilter_net_init,
 
 632         .exit = netfilter_net_exit,
 
 635 int __init netfilter_init(void)
 
 639         ret = register_pernet_subsys(&netfilter_net_ops);
 
 643         ret = netfilter_log_init();
 
 649         unregister_pernet_subsys(&netfilter_net_ops);