struct nf_conntrack_zone        zone;
        int                             cpu;
        u32                             jiffies32;
+       struct rcu_head                 rcu_head;
 };
 
 struct nf_conncount_rb {
        struct rb_node node;
        struct nf_conncount_list list;
        u32 key[MAX_KEYLEN];
+       struct rcu_head rcu_head;
 };
 
 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
 struct nf_conncount_data {
        unsigned int keylen;
        struct rb_root root[CONNCOUNT_SLOTS];
+       struct net *net;
+       struct work_struct gc_work;
+       unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+       unsigned int gc_tree;
 };
 
 static u_int32_t conncount_rnd __read_mostly;
        return memcmp(a, b, klen * sizeof(u32));
 }
 
-bool nf_conncount_add(struct nf_conncount_list *list,
-                     const struct nf_conntrack_tuple *tuple,
-                     const struct nf_conntrack_zone *zone)
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+                const struct nf_conntrack_tuple *tuple,
+                const struct nf_conntrack_zone *zone)
 {
        struct nf_conncount_tuple *conn;
 
        if (WARN_ON_ONCE(list->count > INT_MAX))
-               return false;
+               return NF_CONNCOUNT_ERR;
 
        conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
        if (conn == NULL)
-               return false;
+               return NF_CONNCOUNT_ERR;
+
        conn->tuple = *tuple;
        conn->zone = *zone;
        conn->cpu = raw_smp_processor_id();
        conn->jiffies32 = (u32)jiffies;
+       spin_lock(&list->list_lock);
+       if (list->dead == true) {
+               kmem_cache_free(conncount_conn_cachep, conn);
+               spin_unlock(&list->list_lock);
+               return NF_CONNCOUNT_SKIP;
+       }
        list_add_tail(&conn->node, &list->head);
        list->count++;
-       return true;
+       spin_unlock(&list->list_lock);
+       return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
-static void conn_free(struct nf_conncount_list *list,
+static void __conn_free(struct rcu_head *h)
+{
+       struct nf_conncount_tuple *conn;
+
+       conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+       kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static bool conn_free(struct nf_conncount_list *list,
                      struct nf_conncount_tuple *conn)
 {
-       if (WARN_ON_ONCE(list->count == 0))
-               return;
+       bool free_entry = false;
+
+       spin_lock(&list->list_lock);
+
+       if (list->count == 0) {
+               spin_unlock(&list->list_lock);
+                return free_entry;
+       }
 
        list->count--;
-       list_del(&conn->node);
-       kmem_cache_free(conncount_conn_cachep, conn);
+       list_del_rcu(&conn->node);
+       if (list->count == 0)
+               free_entry = true;
+
+       spin_unlock(&list->list_lock);
+       call_rcu(&conn->rcu_head, __conn_free);
+       return free_entry;
 }
 
 static const struct nf_conntrack_tuple_hash *
 find_or_evict(struct net *net, struct nf_conncount_list *list,
-             struct nf_conncount_tuple *conn)
+             struct nf_conncount_tuple *conn, bool *free_entry)
 {
        const struct nf_conntrack_tuple_hash *found;
        unsigned long a, b;
         */
        age = a - b;
        if (conn->cpu == cpu || age >= 2) {
-               conn_free(list, conn);
+               *free_entry = conn_free(list, conn);
                return ERR_PTR(-ENOENT);
        }
 
        struct nf_conncount_tuple *conn, *conn_n;
        struct nf_conn *found_ct;
        unsigned int collect = 0;
+       bool free_entry = false;
 
        /* best effort only */
        *addit = tuple ? true : false;
                if (collect > CONNCOUNT_GC_MAX_NODES)
                        break;
 
-               found = find_or_evict(net, list, conn);
+               found = find_or_evict(net, list, conn, &free_entry);
                if (IS_ERR(found)) {
                        /* Not found, but might be about to be confirmed */
                        if (PTR_ERR(found) == -EAGAIN) {
 
 void nf_conncount_list_init(struct nf_conncount_list *list)
 {
+       spin_lock_init(&list->list_lock);
        INIT_LIST_HEAD(&list->head);
        list->count = 1;
+       list->dead = false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
 
-void nf_conncount_gc_list(struct net *net,
+/* Return true if the list is empty */
+bool nf_conncount_gc_list(struct net *net,
                          struct nf_conncount_list *list)
 {
        const struct nf_conntrack_tuple_hash *found;
        struct nf_conncount_tuple *conn, *conn_n;
        struct nf_conn *found_ct;
        unsigned int collected = 0;
+       bool free_entry = false;
 
        list_for_each_entry_safe(conn, conn_n, &list->head, node) {
-               found = find_or_evict(net, list, conn);
+               found = find_or_evict(net, list, conn, &free_entry);
                if (IS_ERR(found)) {
-                       if (PTR_ERR(found) == -ENOENT)
+                       if (PTR_ERR(found) == -ENOENT)  {
+                               if (free_entry)
+                                       return true;
                                collected++;
+                       }
                        continue;
                }
 
                         * closed already -> ditch it
                         */
                        nf_ct_put(found_ct);
-                       conn_free(list, conn);
+                       if (conn_free(list, conn))
+                               return true;
                        collected++;
                        continue;
                }
 
                nf_ct_put(found_ct);
                if (collected > CONNCOUNT_GC_MAX_NODES)
-                       return;
+                       return false;
        }
+       return false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
 
+static void __tree_nodes_free(struct rcu_head *h)
+{
+       struct nf_conncount_rb *rbconn;
+
+       rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+       kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
 static void tree_nodes_free(struct rb_root *root,
                            struct nf_conncount_rb *gc_nodes[],
                            unsigned int gc_count)
 
        while (gc_count) {
                rbconn = gc_nodes[--gc_count];
-               rb_erase(&rbconn->node, root);
-               kmem_cache_free(conncount_rb_cachep, rbconn);
+               spin_lock(&rbconn->list.list_lock);
+               if (rbconn->list.count == 0 && rbconn->list.dead == false) {
+                       rbconn->list.dead = true;
+                       rb_erase(&rbconn->node, root);
+                       call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+               }
+               spin_unlock(&rbconn->list.list_lock);
        }
 }
 
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+       set_bit(tree, data->pending_trees);
+       schedule_work(&data->gc_work);
+}
+
 static unsigned int
-insert_tree(struct rb_root *root,
+insert_tree(struct net *net,
+           struct nf_conncount_data *data,
+           struct rb_root *root,
            unsigned int hash,
            const u32 *key,
            u8 keylen,
            const struct nf_conntrack_tuple *tuple,
            const struct nf_conntrack_zone *zone)
 {
+       enum nf_conncount_list_add ret;
+       struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
        struct rb_node **rbnode, *parent;
        struct nf_conncount_rb *rbconn;
        struct nf_conncount_tuple *conn;
-       unsigned int count = 0;
+       unsigned int count = 0, gc_count = 0;
+       bool node_found = false;
 
        spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
                        rbnode = &((*rbnode)->rb_right);
                } else {
                        /* unlikely: other cpu added node already */
-                       if (!nf_conncount_add(&rbconn->list, tuple, zone)) {
+                       node_found = true;
+                       ret = nf_conncount_add(&rbconn->list, tuple, zone);
+                       if (ret == NF_CONNCOUNT_ERR) {
                                count = 0; /* hotdrop */
-                               goto out_unlock;
+                       } else if (ret == NF_CONNCOUNT_ADDED) {
+                               count = rbconn->list.count;
+                       } else {
+                               /* NF_CONNCOUNT_SKIP, rbconn is already
+                                * reclaimed by gc, insert a new tree node
+                                */
+                               node_found = false;
                        }
-
-                       count = rbconn->list.count;
-                       goto out_unlock;
+                       break;
                }
+
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
+                       continue;
+
+               if (nf_conncount_gc_list(net, &rbconn->list))
+                       gc_nodes[gc_count++] = rbconn;
+       }
+
+       if (gc_count) {
+               tree_nodes_free(root, gc_nodes, gc_count);
+               /* tree_node_free before new allocation permits
+                * allocator to re-use newly free'd object.
+                *
+                * This is a rare event; in most cases we will find
+                * existing node to re-use. (or gc_count is 0).
+                */
+
+               if (gc_count >= ARRAY_SIZE(gc_nodes))
+                       schedule_gc_worker(data, hash);
        }
 
+       if (node_found)
+               goto out_unlock;
+
        /* expected case: match, insert new node */
        rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
        if (rbconn == NULL)
           const struct nf_conntrack_tuple *tuple,
           const struct nf_conntrack_zone *zone)
 {
-       struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+       enum nf_conncount_list_add ret;
        struct rb_root *root;
-       struct rb_node **rbnode, *parent;
+       struct rb_node *parent;
        struct nf_conncount_rb *rbconn;
-       unsigned int gc_count, hash;
-       bool no_gc = false;
-       unsigned int count = 0;
+       unsigned int hash;
        u8 keylen = data->keylen;
 
        hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
        root = &data->root[hash];
 
-       spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
- restart:
-       gc_count = 0;
-       parent = NULL;
-       rbnode = &(root->rb_node);
-       while (*rbnode) {
+       parent = rcu_dereference_raw(root->rb_node);
+       while (parent) {
                int diff;
                bool addit;
 
-               rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
+               rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
-               parent = *rbnode;
                diff = key_diff(key, rbconn->key, keylen);
                if (diff < 0) {
-                       rbnode = &((*rbnode)->rb_left);
+                       parent = rcu_dereference_raw(parent->rb_left);
                } else if (diff > 0) {
-                       rbnode = &((*rbnode)->rb_right);
+                       parent = rcu_dereference_raw(parent->rb_right);
                } else {
                        /* same source network -> be counted! */
                        nf_conncount_lookup(net, &rbconn->list, tuple, zone,
                                            &addit);
-                       count = rbconn->list.count;
 
-                       tree_nodes_free(root, gc_nodes, gc_count);
                        if (!addit)
-                               goto out_unlock;
+                               return rbconn->list.count;
+
+                       ret = nf_conncount_add(&rbconn->list, tuple, zone);
+                       if (ret == NF_CONNCOUNT_ERR) {
+                               return 0; /* hotdrop */
+                       } else if (ret == NF_CONNCOUNT_ADDED) {
+                               return rbconn->list.count;
+                       } else {
+                               /* NF_CONNCOUNT_SKIP, rbconn is already
+                                * reclaimed by gc, insert a new tree node
+                                */
+                               break;
+                       }
+               }
+       }
 
-                       if (!nf_conncount_add(&rbconn->list, tuple, zone))
-                               count = 0; /* hotdrop */
-                               goto out_unlock;
+       if (!tuple)
+               return 0;
 
-                       count++;
-                       goto out_unlock;
-               }
+       return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+}
 
-               if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
-                       continue;
+static void tree_gc_worker(struct work_struct *work)
+{
+       struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+       struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+       struct rb_root *root;
+       struct rb_node *node;
+       unsigned int tree, next_tree, gc_count = 0;
+
+       tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+       root = &data->root[tree];
 
-               nf_conncount_gc_list(net, &rbconn->list);
-               if (list_empty(&rbconn->list.head))
+       rcu_read_lock();
+       for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+               rbconn = rb_entry(node, struct nf_conncount_rb, node);
+               if (nf_conncount_gc_list(data->net, &rbconn->list))
                        gc_nodes[gc_count++] = rbconn;
        }
+       rcu_read_unlock();
+
+       spin_lock_bh(&nf_conncount_locks[tree]);
 
        if (gc_count) {
-               no_gc = true;
                tree_nodes_free(root, gc_nodes, gc_count);
-               /* tree_node_free before new allocation permits
-                * allocator to re-use newly free'd object.
-                *
-                * This is a rare event; in most cases we will find
-                * existing node to re-use. (or gc_count is 0).
-                */
-               goto restart;
        }
 
-       count = 0;
-       if (!tuple)
-               goto out_unlock;
+       clear_bit(tree, data->pending_trees);
 
-       spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-       return insert_tree(root, hash, key, keylen, tuple, zone);
+       next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+       next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
 
-out_unlock:
-       spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-       return count;
+       if (next_tree < CONNCOUNT_SLOTS) {
+               data->gc_tree = next_tree;
+               schedule_work(work);
+       }
+
+       spin_unlock_bh(&nf_conncount_locks[tree]);
 }
 
 /* Count and return number of conntrack entries in 'net' with particular 'key'.
  * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
  */
 unsigned int nf_conncount_count(struct net *net,
                                struct nf_conncount_data *data,
                data->root[i] = RB_ROOT;
 
        data->keylen = keylen / sizeof(u32);
+       data->net = net;
+       INIT_WORK(&data->gc_work, tree_gc_worker);
 
        return data;
 }
 {
        unsigned int i;
 
+       cancel_work_sync(&data->gc_work);
        nf_ct_netns_put(net, family);
 
        for (i = 0; i < ARRAY_SIZE(data->root); ++i)