1 /* netfilter.c: look after the filters for various protocols.
 
   2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
 
   4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
 
   9 #include <linux/kernel.h>
 
  10 #include <linux/netfilter.h>
 
  11 #include <net/protocol.h>
 
  12 #include <linux/init.h>
 
  13 #include <linux/skbuff.h>
 
  14 #include <linux/wait.h>
 
  15 #include <linux/module.h>
 
  16 #include <linux/interrupt.h>
 
  18 #include <linux/netdevice.h>
 
  19 #include <linux/netfilter_ipv6.h>
 
  20 #include <linux/inetdevice.h>
 
  21 #include <linux/proc_fs.h>
 
  22 #include <linux/mutex.h>
 
  24 #include <linux/rcupdate.h>
 
  25 #include <net/net_namespace.h>
 
  28 #include "nf_internals.h"
 
  30 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
 
  31 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
 
  33 DEFINE_PER_CPU(bool, nf_skb_duplicated);
 
  34 EXPORT_SYMBOL_GPL(nf_skb_duplicated);
 
  36 #ifdef HAVE_JUMP_LABEL
 
  37 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
  38 EXPORT_SYMBOL(nf_hooks_needed);
 
  41 static DEFINE_MUTEX(nf_hook_mutex);
 
  43 /* max hooks per family/hooknum */
 
  44 #define MAX_HOOK_COUNT          1024
 
  46 #define nf_entry_dereference(e) \
 
  47         rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
  49 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 
  51         struct nf_hook_entries *e;
 
  52         size_t alloc = sizeof(*e) +
 
  53                        sizeof(struct nf_hook_entry) * num +
 
  54                        sizeof(struct nf_hook_ops *) * num +
 
  55                        sizeof(struct nf_hook_entries_rcu_head);
 
  60         e = kvzalloc(alloc, GFP_KERNEL);
 
  62                 e->num_hook_entries = num;
 
  66 static void __nf_hook_entries_free(struct rcu_head *h)
 
  68         struct nf_hook_entries_rcu_head *head;
 
  70         head = container_of(h, struct nf_hook_entries_rcu_head, head);
 
  71         kvfree(head->allocation);
 
  74 static void nf_hook_entries_free(struct nf_hook_entries *e)
 
  76         struct nf_hook_entries_rcu_head *head;
 
  77         struct nf_hook_ops **ops;
 
  83         num = e->num_hook_entries;
 
  84         ops = nf_hook_entries_get_hook_ops(e);
 
  85         head = (void *)&ops[num];
 
  87         call_rcu(&head->head, __nf_hook_entries_free);
 
  90 static unsigned int accept_all(void *priv,
 
  92                                const struct nf_hook_state *state)
 
  94         return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
 
  97 static const struct nf_hook_ops dummy_ops = {
 
 102 static struct nf_hook_entries *
 
 103 nf_hook_entries_grow(const struct nf_hook_entries *old,
 
 104                      const struct nf_hook_ops *reg)
 
 106         unsigned int i, alloc_entries, nhooks, old_entries;
 
 107         struct nf_hook_ops **orig_ops = NULL;
 
 108         struct nf_hook_ops **new_ops;
 
 109         struct nf_hook_entries *new;
 
 110         bool inserted = false;
 
 113         old_entries = old ? old->num_hook_entries : 0;
 
 116                 orig_ops = nf_hook_entries_get_hook_ops(old);
 
 118                 for (i = 0; i < old_entries; i++) {
 
 119                         if (orig_ops[i] != &dummy_ops)
 
 124         if (alloc_entries > MAX_HOOK_COUNT)
 
 125                 return ERR_PTR(-E2BIG);
 
 127         new = allocate_hook_entries_size(alloc_entries);
 
 129                 return ERR_PTR(-ENOMEM);
 
 131         new_ops = nf_hook_entries_get_hook_ops(new);
 
 135         while (i < old_entries) {
 
 136                 if (orig_ops[i] == &dummy_ops) {
 
 141                 if (inserted || reg->priority > orig_ops[i]->priority) {
 
 142                         new_ops[nhooks] = (void *)orig_ops[i];
 
 143                         new->hooks[nhooks] = old->hooks[i];
 
 146                         new_ops[nhooks] = (void *)reg;
 
 147                         new->hooks[nhooks].hook = reg->hook;
 
 148                         new->hooks[nhooks].priv = reg->priv;
 
 155                 new_ops[nhooks] = (void *)reg;
 
 156                 new->hooks[nhooks].hook = reg->hook;
 
 157                 new->hooks[nhooks].priv = reg->priv;
 
 163 static void hooks_validate(const struct nf_hook_entries *hooks)
 
 165 #ifdef CONFIG_DEBUG_KERNEL
 
 166         struct nf_hook_ops **orig_ops;
 
 170         orig_ops = nf_hook_entries_get_hook_ops(hooks);
 
 172         for (i = 0; i < hooks->num_hook_entries; i++) {
 
 173                 if (orig_ops[i] == &dummy_ops)
 
 176                 WARN_ON(orig_ops[i]->priority < prio);
 
 178                 if (orig_ops[i]->priority > prio)
 
 179                         prio = orig_ops[i]->priority;
 
 184 int nf_hook_entries_insert_raw(struct nf_hook_entries __rcu **pp,
 
 185                                 const struct nf_hook_ops *reg)
 
 187         struct nf_hook_entries *new_hooks;
 
 188         struct nf_hook_entries *p;
 
 190         p = rcu_dereference_raw(*pp);
 
 191         new_hooks = nf_hook_entries_grow(p, reg);
 
 192         if (IS_ERR(new_hooks))
 
 193                 return PTR_ERR(new_hooks);
 
 195         hooks_validate(new_hooks);
 
 197         rcu_assign_pointer(*pp, new_hooks);
 
 199         BUG_ON(p == new_hooks);
 
 200         nf_hook_entries_free(p);
 
 203 EXPORT_SYMBOL_GPL(nf_hook_entries_insert_raw);
 
 206  * __nf_hook_entries_try_shrink - try to shrink hook array
 
 208  * @old -- current hook blob at @pp
 
 209  * @pp -- location of hook blob
 
 211  * Hook unregistration must always succeed, so to-be-removed hooks
 
 212  * are replaced by a dummy one that will just move to next hook.
 
 214  * This counts the current dummy hooks, attempts to allocate new blob,
 
 215  * copies the live hooks, then replaces and discards old one.
 
 219  * Returns address to free, or NULL.
 
 221 static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 
 222                                           struct nf_hook_entries __rcu **pp)
 
 224         unsigned int i, j, skip = 0, hook_entries;
 
 225         struct nf_hook_entries *new = NULL;
 
 226         struct nf_hook_ops **orig_ops;
 
 227         struct nf_hook_ops **new_ops;
 
 229         if (WARN_ON_ONCE(!old))
 
 232         orig_ops = nf_hook_entries_get_hook_ops(old);
 
 233         for (i = 0; i < old->num_hook_entries; i++) {
 
 234                 if (orig_ops[i] == &dummy_ops)
 
 238         /* if skip == hook_entries all hooks have been removed */
 
 239         hook_entries = old->num_hook_entries;
 
 240         if (skip == hook_entries)
 
 246         hook_entries -= skip;
 
 247         new = allocate_hook_entries_size(hook_entries);
 
 251         new_ops = nf_hook_entries_get_hook_ops(new);
 
 252         for (i = 0, j = 0; i < old->num_hook_entries; i++) {
 
 253                 if (orig_ops[i] == &dummy_ops)
 
 255                 new->hooks[j] = old->hooks[i];
 
 256                 new_ops[j] = (void *)orig_ops[i];
 
 261         rcu_assign_pointer(*pp, new);
 
 265 static struct nf_hook_entries __rcu **
 
 266 nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 
 267                    struct net_device *dev)
 
 272 #ifdef CONFIG_NETFILTER_FAMILY_ARP
 
 274                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
 
 276                 return net->nf.hooks_arp + hooknum;
 
 278 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 
 280                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
 
 282                 return net->nf.hooks_bridge + hooknum;
 
 285                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
 
 287                 return net->nf.hooks_ipv4 + hooknum;
 
 289                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
 
 291                 return net->nf.hooks_ipv6 + hooknum;
 
 292 #if IS_ENABLED(CONFIG_DECNET)
 
 294                 if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
 
 296                 return net->nf.hooks_decnet + hooknum;
 
 303 #ifdef CONFIG_NETFILTER_INGRESS
 
 304         if (hooknum == NF_NETDEV_INGRESS) {
 
 305                 if (dev && dev_net(dev) == net)
 
 306                         return &dev->nf_hooks_ingress;
 
 313 static int __nf_register_net_hook(struct net *net, int pf,
 
 314                                   const struct nf_hook_ops *reg)
 
 316         struct nf_hook_entries *p, *new_hooks;
 
 317         struct nf_hook_entries __rcu **pp;
 
 319         if (pf == NFPROTO_NETDEV) {
 
 320 #ifndef CONFIG_NETFILTER_INGRESS
 
 321                 if (reg->hooknum == NF_NETDEV_INGRESS)
 
 324                 if (reg->hooknum != NF_NETDEV_INGRESS ||
 
 325                     !reg->dev || dev_net(reg->dev) != net)
 
 329         pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 
 333         mutex_lock(&nf_hook_mutex);
 
 335         p = nf_entry_dereference(*pp);
 
 336         new_hooks = nf_hook_entries_grow(p, reg);
 
 338         if (!IS_ERR(new_hooks))
 
 339                 rcu_assign_pointer(*pp, new_hooks);
 
 341         mutex_unlock(&nf_hook_mutex);
 
 342         if (IS_ERR(new_hooks))
 
 343                 return PTR_ERR(new_hooks);
 
 345         hooks_validate(new_hooks);
 
 346 #ifdef CONFIG_NETFILTER_INGRESS
 
 347         if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 
 348                 net_inc_ingress_queue();
 
 350 #ifdef HAVE_JUMP_LABEL
 
 351         static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
 
 353         BUG_ON(p == new_hooks);
 
 354         nf_hook_entries_free(p);
 
 359  * nf_remove_net_hook - remove a hook from blob
 
 361  * @oldp: current address of hook blob
 
 362  * @unreg: hook to unregister
 
 364  * This cannot fail, hook unregistration must always succeed.
 
 365  * Therefore replace the to-be-removed hook with a dummy hook.
 
 367 static bool nf_remove_net_hook(struct nf_hook_entries *old,
 
 368                                const struct nf_hook_ops *unreg)
 
 370         struct nf_hook_ops **orig_ops;
 
 373         orig_ops = nf_hook_entries_get_hook_ops(old);
 
 374         for (i = 0; i < old->num_hook_entries; i++) {
 
 375                 if (orig_ops[i] != unreg)
 
 377                 WRITE_ONCE(old->hooks[i].hook, accept_all);
 
 378                 WRITE_ONCE(orig_ops[i], &dummy_ops);
 
 385 static void __nf_unregister_net_hook(struct net *net, int pf,
 
 386                                      const struct nf_hook_ops *reg)
 
 388         struct nf_hook_entries __rcu **pp;
 
 389         struct nf_hook_entries *p;
 
 391         pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
 
 395         mutex_lock(&nf_hook_mutex);
 
 397         p = nf_entry_dereference(*pp);
 
 398         if (WARN_ON_ONCE(!p)) {
 
 399                 mutex_unlock(&nf_hook_mutex);
 
 403         if (nf_remove_net_hook(p, reg)) {
 
 404 #ifdef CONFIG_NETFILTER_INGRESS
 
 405                 if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 
 406                         net_dec_ingress_queue();
 
 408 #ifdef HAVE_JUMP_LABEL
 
 409                 static_key_slow_dec(&nf_hooks_needed[pf][reg->hooknum]);
 
 412                 WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum);
 
 415         p = __nf_hook_entries_try_shrink(p, pp);
 
 416         mutex_unlock(&nf_hook_mutex);
 
 420         nf_queue_nf_hook_drop(net);
 
 421         nf_hook_entries_free(p);
 
 424 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
 426         if (reg->pf == NFPROTO_INET) {
 
 427                 __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 
 428                 __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
 
 430                 __nf_unregister_net_hook(net, reg->pf, reg);
 
 433 EXPORT_SYMBOL(nf_unregister_net_hook);
 
 435 void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp,
 
 436                                 const struct nf_hook_ops *reg)
 
 438         struct nf_hook_entries *p;
 
 440         p = rcu_dereference_raw(*pp);
 
 441         if (nf_remove_net_hook(p, reg)) {
 
 442                 p = __nf_hook_entries_try_shrink(p, pp);
 
 443                 nf_hook_entries_free(p);
 
 446 EXPORT_SYMBOL_GPL(nf_hook_entries_delete_raw);
 
 448 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 
 452         if (reg->pf == NFPROTO_INET) {
 
 453                 err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
 
 457                 err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
 
 459                         __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
 
 463                 err = __nf_register_net_hook(net, reg->pf, reg);
 
 470 EXPORT_SYMBOL(nf_register_net_hook);
 
 472 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 
 478         for (i = 0; i < n; i++) {
 
 479                 err = nf_register_net_hook(net, ®[i]);
 
 487                 nf_unregister_net_hooks(net, reg, i);
 
 490 EXPORT_SYMBOL(nf_register_net_hooks);
 
 492 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 
 493                              unsigned int hookcount)
 
 497         for (i = 0; i < hookcount; i++)
 
 498                 nf_unregister_net_hook(net, ®[i]);
 
 500 EXPORT_SYMBOL(nf_unregister_net_hooks);
 
 502 /* Returns 1 if okfn() needs to be executed by the caller,
 
 503  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 
 504 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 
 505                  const struct nf_hook_entries *e, unsigned int s)
 
 507         unsigned int verdict;
 
 510         for (; s < e->num_hook_entries; s++) {
 
 511                 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 
 512                 switch (verdict & NF_VERDICT_MASK) {
 
 517                         ret = NF_DROP_GETERR(verdict);
 
 522                         ret = nf_queue(skb, state, e, s, verdict);
 
 527                         /* Implicit handling for NF_STOLEN, as well as any other
 
 528                          * non conventional verdicts.
 
 536 EXPORT_SYMBOL(nf_hook_slow);
 
 539 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 
 541         if (writable_len > skb->len)
 
 544         /* Not exclusive use of packet?  Must copy. */
 
 545         if (!skb_cloned(skb)) {
 
 546                 if (writable_len <= skb_headlen(skb))
 
 548         } else if (skb_clone_writable(skb, writable_len))
 
 551         if (writable_len <= skb_headlen(skb))
 
 554                 writable_len -= skb_headlen(skb);
 
 556         return !!__pskb_pull_tail(skb, writable_len);
 
 558 EXPORT_SYMBOL(skb_make_writable);
 
 560 /* This needs to be compiled in any case to avoid dependencies between the
 
 561  * nfnetlink_queue code and nf_conntrack.
 
 563 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
 
 564 EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 
 566 struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
 
 567 EXPORT_SYMBOL_GPL(nf_ct_hook);
 
 569 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 
 570 /* This does not belong here, but locally generated errors need it if connection
 
 571    tracking in use: without this, connection may not be in hash table, and hence
 
 572    manufactured ICMP or RST packets will not be associated with it. */
 
 573 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
 
 575 EXPORT_SYMBOL(ip_ct_attach);
 
 577 struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
 
 578 EXPORT_SYMBOL_GPL(nf_nat_hook);
 
 580 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
 
 582         void (*attach)(struct sk_buff *, const struct sk_buff *);
 
 586                 attach = rcu_dereference(ip_ct_attach);
 
 592 EXPORT_SYMBOL(nf_ct_attach);
 
 594 void nf_conntrack_destroy(struct nf_conntrack *nfct)
 
 596         struct nf_ct_hook *ct_hook;
 
 599         ct_hook = rcu_dereference(nf_ct_hook);
 
 600         BUG_ON(ct_hook == NULL);
 
 601         ct_hook->destroy(nfct);
 
 604 EXPORT_SYMBOL(nf_conntrack_destroy);
 
 606 /* Built-in default zone used e.g. by modules. */
 
 607 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 
 608         .id     = NF_CT_DEFAULT_ZONE_ID,
 
 609         .dir    = NF_CT_DEFAULT_ZONE_DIR,
 
 611 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
 
 612 #endif /* CONFIG_NF_CONNTRACK */
 
 614 static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
 
 618         for (h = 0; h < max; h++)
 
 619                 RCU_INIT_POINTER(e[h], NULL);
 
 622 static int __net_init netfilter_net_init(struct net *net)
 
 624         __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
 
 625         __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
 
 626 #ifdef CONFIG_NETFILTER_FAMILY_ARP
 
 627         __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
 
 629 #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
 
 630         __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
 
 632 #if IS_ENABLED(CONFIG_DECNET)
 
 633         __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
 
 636 #ifdef CONFIG_PROC_FS
 
 637         net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
 
 639         if (!net->nf.proc_netfilter) {
 
 640                 if (!net_eq(net, &init_net))
 
 641                         pr_err("cannot create netfilter proc entry");
 
 650 static void __net_exit netfilter_net_exit(struct net *net)
 
 652         remove_proc_entry("netfilter", net->proc_net);
 
 655 static struct pernet_operations netfilter_net_ops = {
 
 656         .init = netfilter_net_init,
 
 657         .exit = netfilter_net_exit,
 
 660 int __init netfilter_init(void)
 
 664         ret = register_pernet_subsys(&netfilter_net_ops);
 
 668         ret = netfilter_log_init();
 
 674         unregister_pernet_subsys(&netfilter_net_ops);