node->bitlen = bits;
        memcpy(node->bits, src, bits / 8U);
 }
-#define CHOOSE_NODE(parent, key) \
-       parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
+{
+       return (key[node->bit_at_a] >> node->bit_at_b) & 1;
+}
 
 static void push_rcu(struct allowedips_node **stack,
                     struct allowedips_node __rcu *p, unsigned int *len)
                        found = node;
                if (node->cidr == bits)
                        break;
-               node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+               node = rcu_dereference_bh(node->bit[choose(node, key)]);
        }
        return found;
 }
                           u8 cidr, u8 bits, struct allowedips_node **rnode,
                           struct mutex *lock)
 {
-       struct allowedips_node *node = rcu_dereference_protected(trie,
-                                               lockdep_is_held(lock));
+       struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
        struct allowedips_node *parent = NULL;
        bool exact = false;
 
                        exact = true;
                        break;
                }
-               node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
-                                                lockdep_is_held(lock));
+               node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
        }
        *rnode = parent;
        return exact;
 }
 
+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
+{
+       node->parent_bit_packed = (unsigned long)parent | bit;
+       rcu_assign_pointer(*parent, node);
+}
+
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
+{
+       u8 bit = choose(parent, node->bits);
+       connect_node(&parent->bit[bit], bit, node);
+}
+
 static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
               u8 cidr, struct wg_peer *peer, struct mutex *lock)
 {
                RCU_INIT_POINTER(node->peer, peer);
                list_add_tail(&node->peer_list, &peer->allowedips_list);
                copy_and_assign_cidr(node, key, cidr, bits);
-               rcu_assign_pointer(node->parent_bit, trie);
-               rcu_assign_pointer(*trie, node);
+               connect_node(trie, 2, node);
                return 0;
        }
        if (node_placement(*trie, key, cidr, bits, &node, lock)) {
        if (!node) {
                down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
        } else {
-               down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
+               const u8 bit = choose(node, key);
+               down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
                if (!down) {
-                       rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
-                       rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+                       connect_node(&node->bit[bit], bit, newnode);
                        return 0;
                }
        }
        parent = node;
 
        if (newnode->cidr == cidr) {
-               rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
-               rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
-               if (!parent) {
-                       rcu_assign_pointer(newnode->parent_bit, trie);
-                       rcu_assign_pointer(*trie, newnode);
-               } else {
-                       rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
-                       rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
-               }
+               choose_and_connect_node(newnode, down);
+               if (!parent)
+                       connect_node(trie, 2, newnode);
+               else
+                       choose_and_connect_node(parent, newnode);
                return 0;
        }
 
        INIT_LIST_HEAD(&node->peer_list);
        copy_and_assign_cidr(node, newnode->bits, cidr, bits);
 
-       rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
-       rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-       rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
-       rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-       if (!parent) {
-               rcu_assign_pointer(node->parent_bit, trie);
-               rcu_assign_pointer(*trie, node);
-       } else {
-               rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
-               rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
-       }
+       choose_and_connect_node(node, down);
+       choose_and_connect_node(node, newnode);
+       if (!parent)
+               connect_node(trie, 2, node);
+       else
+               choose_and_connect_node(parent, node);
        return 0;
 }
 
 void wg_allowedips_remove_by_peer(struct allowedips *table,
                                  struct wg_peer *peer, struct mutex *lock)
 {
-       struct allowedips_node *node, *child, *tmp;
+       struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
+       bool free_parent;
 
        if (list_empty(&peer->allowedips_list))
                return;
                RCU_INIT_POINTER(node->peer, NULL);
                if (node->bit[0] && node->bit[1])
                        continue;
-               child = rcu_dereference_protected(
-                               node->bit[!rcu_access_pointer(node->bit[0])],
-                               lockdep_is_held(lock));
+               child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
+                                                 lockdep_is_held(lock));
                if (child)
-                       child->parent_bit = node->parent_bit;
-               *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
+                       child->parent_bit_packed = node->parent_bit_packed;
+               parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
+               *parent_bit = child;
+               parent = (void *)parent_bit -
+                        offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
+               free_parent = !rcu_access_pointer(node->bit[0]) &&
+                             !rcu_access_pointer(node->bit[1]) &&
+                             (node->parent_bit_packed & 3) <= 1 &&
+                             !rcu_access_pointer(parent->peer);
+               if (free_parent)
+                       child = rcu_dereference_protected(
+                                       parent->bit[!(node->parent_bit_packed & 1)],
+                                       lockdep_is_held(lock));
                call_rcu(&node->rcu, node_free_rcu);
-
-               /* TODO: Note that we currently don't walk up and down in order to
-                * free any potential filler nodes. This means that this function
-                * doesn't free up as much as it could, which could be revisited
-                * at some point.
-                */
+               if (!free_parent)
+                       continue;
+               if (child)
+                       child->parent_bit_packed = parent->parent_bit_packed;
+               *(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
+               call_rcu(&parent->rcu, node_free_rcu);
        }
 }
 
 
 
 #include <linux/siphash.h>
 
-static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
-                                             u8 cidr)
-{
-       swap_endian(dst, src, bits);
-       memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
-       if (cidr)
-               dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
-}
-
 static __init void print_node(struct allowedips_node *node, u8 bits)
 {
        char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
-       char *fmt_declaration = KERN_DEBUG
-               "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+       char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+       u8 ip1[16], ip2[16], cidr1, cidr2;
        char *style = "dotted";
-       u8 ip1[16], ip2[16];
        u32 color = 0;
 
+       if (node == NULL)
+               return;
        if (bits == 32) {
                fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
-               fmt_declaration = KERN_DEBUG
-                       "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+               fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
        } else if (bits == 128) {
                fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
-               fmt_declaration = KERN_DEBUG
-                       "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+               fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
        }
        if (node->peer) {
                hsiphash_key_t key = { { 0 } };
                        hsiphash_1u32(0xabad1dea, &key) % 200;
                style = "bold";
        }
-       swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
-       printk(fmt_declaration, ip1, node->cidr, style, color);
+       wg_allowedips_read_node(node, ip1, &cidr1);
+       printk(fmt_declaration, ip1, cidr1, style, color);
        if (node->bit[0]) {
-               swap_endian_and_apply_cidr(ip2,
-                               rcu_dereference_raw(node->bit[0])->bits, bits,
-                               node->cidr);
-               printk(fmt_connection, ip1, node->cidr, ip2,
-                      rcu_dereference_raw(node->bit[0])->cidr);
-               print_node(rcu_dereference_raw(node->bit[0]), bits);
+               wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2);
+               printk(fmt_connection, ip1, cidr1, ip2, cidr2);
        }
        if (node->bit[1]) {
-               swap_endian_and_apply_cidr(ip2,
-                               rcu_dereference_raw(node->bit[1])->bits,
-                               bits, node->cidr);
-               printk(fmt_connection, ip1, node->cidr, ip2,
-                      rcu_dereference_raw(node->bit[1])->cidr);
-               print_node(rcu_dereference_raw(node->bit[1]), bits);
+               wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2);
+               printk(fmt_connection, ip1, cidr1, ip2, cidr2);
        }
+       if (node->bit[0])
+               print_node(rcu_dereference_raw(node->bit[0]), bits);
+       if (node->bit[1])
+               print_node(rcu_dereference_raw(node->bit[1]), bits);
 }
 
 static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
 {
        union nf_inet_addr mask;
 
-       memset(&mask, 0x00, 128 / 8);
-       memset(&mask, 0xff, cidr / 8);
+       memset(&mask, 0, sizeof(mask));
+       memset(&mask.all, 0xff, cidr / 8);
        if (cidr % 32)
                mask.all[cidr / 32] = (__force u32)htonl(
                        (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
 }
 
 static __init inline bool
-horrible_match_v4(const struct horrible_allowedips_node *node,
-                 struct in_addr *ip)
+horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip)
 {
        return (ip->s_addr & node->mask.ip) == node->ip.ip;
 }
 
 static __init inline bool
-horrible_match_v6(const struct horrible_allowedips_node *node,
-                 struct in6_addr *ip)
+horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip)
 {
-       return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
-                      node->ip.ip6[0] &&
-              (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
-                      node->ip.ip6[1] &&
-              (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
-                      node->ip.ip6[2] &&
+       return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
+              (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
+              (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
               (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
 }
 
 static __init void
-horrible_insert_ordered(struct horrible_allowedips *table,
-                       struct horrible_allowedips_node *node)
+horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node)
 {
        struct horrible_allowedips_node *other = NULL, *where = NULL;
        u8 my_cidr = horrible_mask_to_cidr(node->mask);
 
        hlist_for_each_entry(other, &table->head, table) {
-               if (!memcmp(&other->mask, &node->mask,
-                           sizeof(union nf_inet_addr)) &&
-                   !memcmp(&other->ip, &node->ip,
-                           sizeof(union nf_inet_addr)) &&
-                   other->ip_version == node->ip_version) {
+               if (other->ip_version == node->ip_version &&
+                   !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
+                   !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) {
                        other->value = node->value;
                        kfree(node);
                        return;
                }
+       }
+       hlist_for_each_entry(other, &table->head, table) {
                where = other;
                if (horrible_mask_to_cidr(other->mask) <= my_cidr)
                        break;
 horrible_allowedips_insert_v4(struct horrible_allowedips *table,
                              struct in_addr *ip, u8 cidr, void *value)
 {
-       struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-                                                       GFP_KERNEL);
+       struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
        if (unlikely(!node))
                return -ENOMEM;
 horrible_allowedips_insert_v6(struct horrible_allowedips *table,
                              struct in6_addr *ip, u8 cidr, void *value)
 {
-       struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-                                                       GFP_KERNEL);
+       struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
        if (unlikely(!node))
                return -ENOMEM;
 }
 
 static __init void *
-horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
-                             struct in_addr *ip)
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip)
 {
        struct horrible_allowedips_node *node;
-       void *ret = NULL;
 
        hlist_for_each_entry(node, &table->head, table) {
-               if (node->ip_version != 4)
-                       continue;
-               if (horrible_match_v4(node, ip)) {
-                       ret = node->value;
-                       break;
-               }
+               if (node->ip_version == 4 && horrible_match_v4(node, ip))
+                       return node->value;
        }
-       return ret;
+       return NULL;
 }
 
 static __init void *
-horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
-                             struct in6_addr *ip)
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip)
 {
        struct horrible_allowedips_node *node;
-       void *ret = NULL;
 
        hlist_for_each_entry(node, &table->head, table) {
-               if (node->ip_version != 6)
+               if (node->ip_version == 6 && horrible_match_v6(node, ip))
+                       return node->value;
+       }
+       return NULL;
+}
+
+
+static __init void
+horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value)
+{
+       struct horrible_allowedips_node *node;
+       struct hlist_node *h;
+
+       hlist_for_each_entry_safe(node, h, &table->head, table) {
+               if (node->value != value)
                        continue;
-               if (horrible_match_v6(node, ip)) {
-                       ret = node->value;
-                       break;
-               }
+               hlist_del(&node->table);
+               kfree(node);
        }
-       return ret;
+
 }
 
 static __init bool randomized_test(void)
                print_tree(t.root6, 128);
        }
 
-       for (i = 0; i < NUM_QUERIES; ++i) {
-               prandom_bytes(ip, 4);
-               if (lookup(t.root4, 32, ip) !=
-                   horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
-                       pr_err("allowedips random self-test: FAIL\n");
-                       goto free;
+       for (j = 0;; ++j) {
+               for (i = 0; i < NUM_QUERIES; ++i) {
+                       prandom_bytes(ip, 4);
+                       if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+                               horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip);
+                               pr_err("allowedips random v4 self-test: FAIL\n");
+                               goto free;
+                       }
+                       prandom_bytes(ip, 16);
+                       if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+                               pr_err("allowedips random v6 self-test: FAIL\n");
+                               goto free;
+                       }
                }
+               if (j >= NUM_PEERS)
+                       break;
+               mutex_lock(&mutex);
+               wg_allowedips_remove_by_peer(&t, peers[j], &mutex);
+               mutex_unlock(&mutex);
+               horrible_allowedips_remove_by_value(&h, peers[j]);
        }
 
-       for (i = 0; i < NUM_QUERIES; ++i) {
-               prandom_bytes(ip, 16);
-               if (lookup(t.root6, 128, ip) !=
-                   horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
-                       pr_err("allowedips random self-test: FAIL\n");
-                       goto free;
-               }
+       if (t.root4 || t.root6) {
+               pr_err("allowedips random self-test removal: FAIL\n");
+               goto free;
        }
+
        ret = true;
 
 free: