/*
  * Convert the values in the security context
- * structure `c' from the values specified
+ * structure `oldc' from the values specified
  * in the policy `p->oldp' to the values specified
- * in the policy `p->newp'.  Verify that the
- * context is valid under the new policy.
+ * in the policy `p->newp', storing the new context
+ * in `newc'.  Verify that the context is valid
+ * under the new policy.
  */
-static int convert_context(u32 key,
-                          struct context *c,
-                          void *p)
+static int convert_context(struct context *oldc, struct context *newc, void *p)
 {
        struct convert_context_args *args;
-       struct context oldc;
        struct ocontext *oc;
-       struct mls_range *range;
        struct role_datum *role;
        struct type_datum *typdatum;
        struct user_datum *usrdatum;
 
        args = p;
 
-       if (c->str) {
-               struct context ctx;
-
-               rc = -ENOMEM;
-               s = kstrdup(c->str, GFP_KERNEL);
+       if (oldc->str) {
+               s = kstrdup(oldc->str, GFP_KERNEL);
                if (!s)
-                       goto out;
+                       return -ENOMEM;
 
                rc = string_to_context_struct(args->newp, NULL, s,
-                                             &ctx, SECSID_NULL);
-               kfree(s);
-               if (!rc) {
-                       pr_info("SELinux:  Context %s became valid (mapped).\n",
-                              c->str);
-                       /* Replace string with mapped representation. */
-                       kfree(c->str);
-                       memcpy(c, &ctx, sizeof(*c));
-                       goto out;
-               } else if (rc == -EINVAL) {
+                                             newc, SECSID_NULL);
+               if (rc == -EINVAL) {
                        /* Retain string representation for later mapping. */
-                       rc = 0;
-                       goto out;
-               } else {
+                       context_init(newc);
+                       newc->str = s;
+                       newc->len = oldc->len;
+                       return 0;
+               }
+               kfree(s);
+               if (rc) {
                        /* Other error condition, e.g. ENOMEM. */
                        pr_err("SELinux:   Unable to map context %s, rc = %d.\n",
-                              c->str, -rc);
-                       goto out;
+                              oldc->str, -rc);
+                       return rc;
                }
+               pr_info("SELinux:  Context %s became valid (mapped).\n",
+                       oldc->str);
+               return 0;
        }
 
-       rc = context_cpy(&oldc, c);
-       if (rc)
-               goto out;
+       context_init(newc);
 
        /* Convert the user. */
        rc = -EINVAL;
        usrdatum = hashtab_search(args->newp->p_users.table,
-                                 sym_name(args->oldp, SYM_USERS, c->user - 1));
+                                 sym_name(args->oldp,
+                                          SYM_USERS, oldc->user - 1));
        if (!usrdatum)
                goto bad;
-       c->user = usrdatum->value;
+       newc->user = usrdatum->value;
 
        /* Convert the role. */
        rc = -EINVAL;
        role = hashtab_search(args->newp->p_roles.table,
-                             sym_name(args->oldp, SYM_ROLES, c->role - 1));
+                             sym_name(args->oldp, SYM_ROLES, oldc->role - 1));
        if (!role)
                goto bad;
-       c->role = role->value;
+       newc->role = role->value;
 
        /* Convert the type. */
        rc = -EINVAL;
        typdatum = hashtab_search(args->newp->p_types.table,
-                                 sym_name(args->oldp, SYM_TYPES, c->type - 1));
+                                 sym_name(args->oldp,
+                                          SYM_TYPES, oldc->type - 1));
        if (!typdatum)
                goto bad;
-       c->type = typdatum->value;
+       newc->type = typdatum->value;
 
        /* Convert the MLS fields if dealing with MLS policies */
        if (args->oldp->mls_enabled && args->newp->mls_enabled) {
-               rc = mls_convert_context(args->oldp, args->newp, c);
+               rc = mls_convert_context(args->oldp, args->newp, oldc, newc);
                if (rc)
                        goto bad;
-       } else if (args->oldp->mls_enabled && !args->newp->mls_enabled) {
-               /*
-                * Switching between MLS and non-MLS policy:
-                * free any storage used by the MLS fields in the
-                * context for all existing entries in the sidtab.
-                */
-               mls_context_destroy(c);
        } else if (!args->oldp->mls_enabled && args->newp->mls_enabled) {
                /*
                 * Switching between non-MLS and MLS policy:
                                " the initial SIDs list\n");
                        goto bad;
                }
-               range = &oc->context[0].range;
-               rc = mls_range_set(c, range);
+               rc = mls_range_set(newc, &oc->context[0].range);
                if (rc)
                        goto bad;
        }
 
        /* Check the validity of the new context. */
-       if (!policydb_context_isvalid(args->newp, c)) {
-               rc = convert_context_handle_invalid_context(args->state,
-                                                           &oldc);
+       if (!policydb_context_isvalid(args->newp, newc)) {
+               rc = convert_context_handle_invalid_context(args->state, oldc);
                if (rc)
                        goto bad;
        }
 
-       context_destroy(&oldc);
-
-       rc = 0;
-out:
-       return rc;
+       return 0;
 bad:
        /* Map old representation to string and save it. */
-       rc = context_struct_to_string(args->oldp, &oldc, &s, &len);
+       rc = context_struct_to_string(args->oldp, oldc, &s, &len);
        if (rc)
                return rc;
-       context_destroy(&oldc);
-       context_destroy(c);
-       c->str = s;
-       c->len = len;
+       context_destroy(newc);
+       newc->str = s;
+       newc->len = len;
        pr_info("SELinux:  Context %s became invalid (unmapped).\n",
-              c->str);
-       rc = 0;
-       goto out;
+               newc->str);
+       return 0;
 }
 
 static void security_load_policycaps(struct selinux_state *state)
        struct policydb *oldpolicydb, *newpolicydb;
        struct selinux_mapping *oldmapping;
        struct selinux_map newmap;
+       struct sidtab_convert_params convert_params;
        struct convert_context_args args;
        u32 seqno;
        int rc = 0;
                goto out;
        }
 
-       oldsidtab = state->ss->sidtab;
-
-#if 0
-       sidtab_hash_eval(oldsidtab, "sids");
-#endif
-
        rc = policydb_read(newpolicydb, fp);
        if (rc) {
                kfree(newsidtab);
                goto err;
        }
 
+       oldsidtab = state->ss->sidtab;
+
        /*
         * Convert the internal representations of contexts
         * in the new SID table.
        args.state = state;
        args.oldp = policydb;
        args.newp = newpolicydb;
-       rc = sidtab_convert(oldsidtab, newsidtab, convert_context, &args);
+
+       convert_params.func = convert_context;
+       convert_params.args = &args;
+       convert_params.target = newsidtab;
+
+       rc = sidtab_convert(oldsidtab, &convert_params);
        if (rc) {
                pr_err("SELinux:  unable to convert the internal"
                        " representation of contexts in the new SID"
 
 /*
  * Implementation of the SID table type.
  *
- * Author : Stephen Smalley, <sds@tycho.nsa.gov>
+ * Original author: Stephen Smalley, <sds@tycho.nsa.gov>
+ * Author: Ondrej Mosnacek, <omosnacek@gmail.com>
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
  */
+#include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
-#include <linux/errno.h>
+#include <linux/atomic.h>
 #include "flask.h"
 #include "security.h"
 #include "sidtab.h"
 
-#define SIDTAB_HASH(sid) \
-(sid & SIDTAB_HASH_MASK)
-
 int sidtab_init(struct sidtab *s)
 {
-       int i;
+       u32 i;
 
-       s->htable = kmalloc_array(SIDTAB_SIZE, sizeof(*s->htable), GFP_ATOMIC);
-       if (!s->htable)
-               return -ENOMEM;
+       memset(s->roots, 0, sizeof(s->roots));
+
+       for (i = 0; i < SIDTAB_RCACHE_SIZE; i++)
+               atomic_set(&s->rcache[i], -1);
 
        for (i = 0; i < SECINITSID_NUM; i++)
                s->isids[i].set = 0;
 
-       for (i = 0; i < SIDTAB_SIZE; i++)
-               s->htable[i] = NULL;
+       atomic_set(&s->count, 0);
 
-       for (i = 0; i < SIDTAB_CACHE_LEN; i++)
-               s->cache[i] = NULL;
+       s->convert = NULL;
 
-       s->nel = 0;
-       s->next_sid = 0;
-       s->shutdown = 0;
        spin_lock_init(&s->lock);
        return 0;
 }
 
-static int sidtab_insert(struct sidtab *s, u32 sid, struct context *context)
-{
-       int hvalue;
-       struct sidtab_node *prev, *cur, *newnode;
-
-       if (!s)
-               return -ENOMEM;
-
-       hvalue = SIDTAB_HASH(sid);
-       prev = NULL;
-       cur = s->htable[hvalue];
-       while (cur && sid > cur->sid) {
-               prev = cur;
-               cur = cur->next;
-       }
-
-       if (cur && sid == cur->sid)
-               return -EEXIST;
-
-       newnode = kmalloc(sizeof(*newnode), GFP_ATOMIC);
-       if (!newnode)
-               return -ENOMEM;
-
-       newnode->sid = sid;
-       if (context_cpy(&newnode->context, context)) {
-               kfree(newnode);
-               return -ENOMEM;
-       }
-
-       if (prev) {
-               newnode->next = prev->next;
-               wmb();
-               prev->next = newnode;
-       } else {
-               newnode->next = s->htable[hvalue];
-               wmb();
-               s->htable[hvalue] = newnode;
-       }
-
-       s->nel++;
-       if (sid >= s->next_sid)
-               s->next_sid = sid + 1;
-       return 0;
-}
-
 int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context)
 {
        struct sidtab_isid_entry *entry;
        return 0;
 }
 
-static struct context *sidtab_lookup(struct sidtab *s, u32 sid)
+static u32 sidtab_level_from_count(u32 count)
 {
-       int hvalue;
-       struct sidtab_node *cur;
+       u32 capacity = SIDTAB_LEAF_ENTRIES;
+       u32 level = 0;
+
+       while (count > capacity) {
+               capacity <<= SIDTAB_INNER_SHIFT;
+               ++level;
+       }
+       return level;
+}
+
+static int sidtab_alloc_roots(struct sidtab *s, u32 level)
+{
+       u32 l;
+
+       if (!s->roots[0].ptr_leaf) {
+               s->roots[0].ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                              GFP_ATOMIC);
+               if (!s->roots[0].ptr_leaf)
+                       return -ENOMEM;
+       }
+       for (l = 1; l <= level; ++l)
+               if (!s->roots[l].ptr_inner) {
+                       s->roots[l].ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                                       GFP_ATOMIC);
+                       if (!s->roots[l].ptr_inner)
+                               return -ENOMEM;
+                       s->roots[l].ptr_inner->entries[0] = s->roots[l - 1];
+               }
+       return 0;
+}
+
+static struct context *sidtab_do_lookup(struct sidtab *s, u32 index, int alloc)
+{
+       union sidtab_entry_inner *entry;
+       u32 level, capacity_shift, leaf_index = index / SIDTAB_LEAF_ENTRIES;
+
+       /* find the level of the subtree we need */
+       level = sidtab_level_from_count(index + 1);
+       capacity_shift = level * SIDTAB_INNER_SHIFT;
+
+       /* allocate roots if needed */
+       if (alloc && sidtab_alloc_roots(s, level) != 0)
+               return NULL;
+
+       /* lookup inside the subtree */
+       entry = &s->roots[level];
+       while (level != 0) {
+               capacity_shift -= SIDTAB_INNER_SHIFT;
+               --level;
+
+               entry = &entry->ptr_inner->entries[leaf_index >> capacity_shift];
+               leaf_index &= ((u32)1 << capacity_shift) - 1;
+
+               if (!entry->ptr_inner) {
+                       if (alloc)
+                               entry->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                                          GFP_ATOMIC);
+                       if (!entry->ptr_inner)
+                               return NULL;
+               }
+       }
+       if (!entry->ptr_leaf) {
+               if (alloc)
+                       entry->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                                 GFP_ATOMIC);
+               if (!entry->ptr_leaf)
+                       return NULL;
+       }
+       return &entry->ptr_leaf->entries[index % SIDTAB_LEAF_ENTRIES].context;
+}
 
-       hvalue = SIDTAB_HASH(sid);
-       cur = s->htable[hvalue];
-       while (cur && sid > cur->sid)
-               cur = cur->next;
+static struct context *sidtab_lookup(struct sidtab *s, u32 index)
+{
+       u32 count = (u32)atomic_read(&s->count);
 
-       if (!cur || sid != cur->sid)
+       if (index >= count)
                return NULL;
 
-       return &cur->context;
+       /* read entries after reading count */
+       smp_rmb();
+
+       return sidtab_do_lookup(s, index, 0);
 }
 
 static struct context *sidtab_lookup_initial(struct sidtab *s, u32 sid)
 {
        struct context *context;
 
-       if (!s)
-               return NULL;
-
        if (sid != 0) {
                if (sid > SECINITSID_NUM)
                        context = sidtab_lookup(s, sid - (SECINITSID_NUM + 1));
        return sidtab_search_core(s, sid, 1);
 }
 
-static int sidtab_map(struct sidtab *s,
-                     int (*apply)(u32 sid,
-                                  struct context *context,
-                                  void *args),
-                     void *args)
+static int sidtab_find_context(union sidtab_entry_inner entry,
+                              u32 *pos, u32 count, u32 level,
+                              struct context *context, u32 *index)
 {
-       int i, rc = 0;
-       struct sidtab_node *cur;
+       int rc;
+       u32 i;
 
-       if (!s)
-               goto out;
+       if (level != 0) {
+               struct sidtab_node_inner *node = entry.ptr_inner;
 
-       for (i = 0; i < SIDTAB_SIZE; i++) {
-               cur = s->htable[i];
-               while (cur) {
-                       rc = apply(cur->sid, &cur->context, args);
-                       if (rc)
-                               goto out;
-                       cur = cur->next;
+               i = 0;
+               while (i < SIDTAB_INNER_ENTRIES && *pos < count) {
+                       rc = sidtab_find_context(node->entries[i],
+                                                pos, count, level - 1,
+                                                context, index);
+                       if (rc == 0)
+                               return 0;
+                       i++;
+               }
+       } else {
+               struct sidtab_node_leaf *node = entry.ptr_leaf;
+
+               i = 0;
+               while (i < SIDTAB_LEAF_ENTRIES && *pos < count) {
+                       if (context_cmp(&node->entries[i].context, context)) {
+                               *index = *pos;
+                               return 0;
+                       }
+                       (*pos)++;
+                       i++;
                }
        }
-out:
-       return rc;
+       return -ENOENT;
 }
 
-/* Clone the SID into the new SID table. */
-static int clone_sid(u32 sid, struct context *context, void *arg)
+static void sidtab_rcache_update(struct sidtab *s, u32 index, u32 pos)
 {
-       struct sidtab *s = arg;
-       return sidtab_insert(s, sid, context);
+       while (pos > 0) {
+               atomic_set(&s->rcache[pos], atomic_read(&s->rcache[pos - 1]));
+               --pos;
+       }
+       atomic_set(&s->rcache[0], (int)index);
 }
 
-int sidtab_convert(struct sidtab *s, struct sidtab *news,
-                  int (*convert)(u32 sid,
-                                 struct context *context,
-                                 void *args),
-                  void *args)
+static void sidtab_rcache_push(struct sidtab *s, u32 index)
 {
-       unsigned long flags;
-       int rc;
-
-       spin_lock_irqsave(&s->lock, flags);
-       s->shutdown = 1;
-       spin_unlock_irqrestore(&s->lock, flags);
-
-       rc = sidtab_map(s, clone_sid, news);
-       if (rc)
-               return rc;
-
-       return sidtab_map(news, convert, args);
+       sidtab_rcache_update(s, index, SIDTAB_RCACHE_SIZE - 1);
 }
 
-static void sidtab_update_cache(struct sidtab *s, struct sidtab_node *n, int loc)
+static int sidtab_rcache_search(struct sidtab *s, struct context *context,
+                               u32 *index)
 {
-       BUG_ON(loc >= SIDTAB_CACHE_LEN);
+       u32 i;
 
-       while (loc > 0) {
-               s->cache[loc] = s->cache[loc - 1];
-               loc--;
-       }
-       s->cache[0] = n;
-}
+       for (i = 0; i < SIDTAB_RCACHE_SIZE; i++) {
+               int v = atomic_read(&s->rcache[i]);
 
-static inline int sidtab_search_context(struct sidtab *s,
-                                       struct context *context, u32 *sid)
-{
-       int i;
-       struct sidtab_node *cur;
-
-       for (i = 0; i < SIDTAB_SIZE; i++) {
-               cur = s->htable[i];
-               while (cur) {
-                       if (context_cmp(&cur->context, context)) {
-                               sidtab_update_cache(s, cur, SIDTAB_CACHE_LEN - 1);
-                               *sid = cur->sid;
-                               return 0;
-                       }
-                       cur = cur->next;
-               }
-       }
-       return -ENOENT;
-}
+               if (v < 0)
+                       continue;
 
-static inline int sidtab_search_cache(struct sidtab *s, struct context *context,
-                                     u32 *sid)
-{
-       int i;
-       struct sidtab_node *node;
-
-       for (i = 0; i < SIDTAB_CACHE_LEN; i++) {
-               node = s->cache[i];
-               if (unlikely(!node))
-                       return -ENOENT;
-               if (context_cmp(&node->context, context)) {
-                       sidtab_update_cache(s, node, i);
-                       *sid = node->sid;
+               if (context_cmp(sidtab_do_lookup(s, (u32)v, 0), context)) {
+                       sidtab_rcache_update(s, (u32)v, i);
+                       *index = (u32)v;
                        return 0;
                }
        }
 }
 
 static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
-                                u32 *sid)
+                                u32 *index)
 {
-       int ret;
        unsigned long flags;
+       u32 count = (u32)atomic_read(&s->count);
+       u32 count_locked, level, pos;
+       struct sidtab_convert_params *convert;
+       struct context *dst, *dst_convert;
+       int rc;
 
-       ret = sidtab_search_cache(s, context, sid);
-       if (ret)
-               ret = sidtab_search_context(s, context, sid);
-       if (ret) {
-               spin_lock_irqsave(&s->lock, flags);
-               /* Rescan now that we hold the lock. */
-               ret = sidtab_search_context(s, context, sid);
-               if (!ret)
-                       goto unlock_out;
-               /* No SID exists for the context.  Allocate a new one. */
-               if (s->next_sid == (UINT_MAX - SECINITSID_NUM - 1) ||
-                   s->shutdown) {
-                       ret = -ENOMEM;
-                       goto unlock_out;
+       rc = sidtab_rcache_search(s, context, index);
+       if (rc == 0)
+               return 0;
+
+       level = sidtab_level_from_count(count);
+
+       /* read entries after reading count */
+       smp_rmb();
+
+       pos = 0;
+       rc = sidtab_find_context(s->roots[level], &pos, count, level,
+                                context, index);
+       if (rc == 0) {
+               sidtab_rcache_push(s, *index);
+               return 0;
+       }
+
+       /* lock-free search failed: lock, re-search, and insert if not found */
+       spin_lock_irqsave(&s->lock, flags);
+
+       convert = s->convert;
+       count_locked = (u32)atomic_read(&s->count);
+       level = sidtab_level_from_count(count_locked);
+
+       /* if count has changed before we acquired the lock, then catch up */
+       while (count < count_locked) {
+               if (context_cmp(sidtab_do_lookup(s, count, 0), context)) {
+                       sidtab_rcache_push(s, count);
+                       *index = count;
+                       rc = 0;
+                       goto out_unlock;
                }
-               *sid = s->next_sid++;
-               if (context->len)
-                       pr_info("SELinux:  Context %s is not valid (left unmapped).\n",
-                              context->str);
-               ret = sidtab_insert(s, *sid, context);
-               if (ret)
-                       s->next_sid--;
-unlock_out:
-               spin_unlock_irqrestore(&s->lock, flags);
+               ++count;
+       }
+
+       /* insert context into new entry */
+       rc = -ENOMEM;
+       dst = sidtab_do_lookup(s, count, 1);
+       if (!dst)
+               goto out_unlock;
+
+       rc = context_cpy(dst, context);
+       if (rc)
+               goto out_unlock;
+
+       /*
+        * if we are building a new sidtab, we need to convert the context
+        * and insert it there as well
+        */
+       if (convert) {
+               rc = -ENOMEM;
+               dst_convert = sidtab_do_lookup(convert->target, count, 1);
+               if (!dst_convert) {
+                       context_destroy(dst);
+                       goto out_unlock;
+               }
+
+               rc = convert->func(context, dst_convert, convert->args);
+               if (rc) {
+                       context_destroy(dst);
+                       goto out_unlock;
+               }
+
+               /* at this point we know the insert won't fail */
+               atomic_set(&convert->target->count, count + 1);
        }
 
-       return ret;
+       if (context->len)
+               pr_info("SELinux:  Context %s is not valid (left unmapped).\n",
+                       context->str);
+
+       sidtab_rcache_push(s, count);
+       *index = count;
+
+       /* write entries before writing new count */
+       smp_wmb();
+
+       atomic_set(&s->count, count + 1);
+
+       rc = 0;
+out_unlock:
+       spin_unlock_irqrestore(&s->lock, flags);
+       return rc;
 }
 
 int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid)
        return 0;
 }
 
-void sidtab_hash_eval(struct sidtab *h, char *tag)
+static int sidtab_convert_tree(union sidtab_entry_inner *edst,
+                              union sidtab_entry_inner *esrc,
+                              u32 *pos, u32 count, u32 level,
+                              struct sidtab_convert_params *convert)
 {
-       int i, chain_len, slots_used, max_chain_len;
-       struct sidtab_node *cur;
-
-       slots_used = 0;
-       max_chain_len = 0;
-       for (i = 0; i < SIDTAB_SIZE; i++) {
-               cur = h->htable[i];
-               if (cur) {
-                       slots_used++;
-                       chain_len = 0;
-                       while (cur) {
-                               chain_len++;
-                               cur = cur->next;
-                       }
+       int rc;
+       u32 i;
 
-                       if (chain_len > max_chain_len)
-                               max_chain_len = chain_len;
+       if (level != 0) {
+               if (!edst->ptr_inner) {
+                       edst->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                                 GFP_KERNEL);
+                       if (!edst->ptr_inner)
+                               return -ENOMEM;
                }
+               i = 0;
+               while (i < SIDTAB_INNER_ENTRIES && *pos < count) {
+                       rc = sidtab_convert_tree(&edst->ptr_inner->entries[i],
+                                                &esrc->ptr_inner->entries[i],
+                                                pos, count, level - 1,
+                                                convert);
+                       if (rc)
+                               return rc;
+                       i++;
+               }
+       } else {
+               if (!edst->ptr_leaf) {
+                       edst->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE,
+                                                GFP_KERNEL);
+                       if (!edst->ptr_leaf)
+                               return -ENOMEM;
+               }
+               i = 0;
+               while (i < SIDTAB_LEAF_ENTRIES && *pos < count) {
+                       rc = convert->func(&esrc->ptr_leaf->entries[i].context,
+                                          &edst->ptr_leaf->entries[i].context,
+                                          convert->args);
+                       if (rc)
+                               return rc;
+                       (*pos)++;
+                       i++;
+               }
+               cond_resched();
+       }
+       return 0;
+}
+
+int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params)
+{
+       unsigned long flags;
+       u32 count, level, pos;
+       int rc;
+
+       spin_lock_irqsave(&s->lock, flags);
+
+       /* concurrent policy loads are not allowed */
+       if (s->convert) {
+               spin_unlock_irqrestore(&s->lock, flags);
+               return -EBUSY;
        }
 
-       pr_debug("%s:  %d entries and %d/%d buckets used, longest "
-              "chain length %d\n", tag, h->nel, slots_used, SIDTAB_SIZE,
-              max_chain_len);
+       count = (u32)atomic_read(&s->count);
+       level = sidtab_level_from_count(count);
+
+       /* allocate last leaf in the new sidtab (to avoid race with
+        * live convert)
+        */
+       rc = sidtab_do_lookup(params->target, count - 1, 1) ? 0 : -ENOMEM;
+       if (rc) {
+               spin_unlock_irqrestore(&s->lock, flags);
+               return rc;
+       }
+
+       /* set count in case no new entries are added during conversion */
+       atomic_set(¶ms->target->count, count);
+
+       /* enable live convert of new entries */
+       s->convert = params;
+
+       /* we can safely do the rest of the conversion outside the lock */
+       spin_unlock_irqrestore(&s->lock, flags);
+
+       pr_info("SELinux:  Converting %u SID table entries...\n", count);
+
+       /* convert all entries not covered by live convert */
+       pos = 0;
+       rc = sidtab_convert_tree(¶ms->target->roots[level],
+                                &s->roots[level], &pos, count, level, params);
+       if (rc) {
+               /* we need to keep the old table - disable live convert */
+               spin_lock_irqsave(&s->lock, flags);
+               s->convert = NULL;
+               spin_unlock_irqrestore(&s->lock, flags);
+       }
+       return rc;
 }
 
-void sidtab_destroy(struct sidtab *s)
+static void sidtab_destroy_tree(union sidtab_entry_inner entry, u32 level)
 {
-       int i;
-       struct sidtab_node *cur, *temp;
+       u32 i;
+
+       if (level != 0) {
+               struct sidtab_node_inner *node = entry.ptr_inner;
+
+               if (!node)
+                       return;
+
+               for (i = 0; i < SIDTAB_INNER_ENTRIES; i++)
+                       sidtab_destroy_tree(node->entries[i], level - 1);
+               kfree(node);
+       } else {
+               struct sidtab_node_leaf *node = entry.ptr_leaf;
 
-       if (!s)
-               return;
+               if (!node)
+                       return;
+
+               for (i = 0; i < SIDTAB_LEAF_ENTRIES; i++)
+                       context_destroy(&node->entries[i].context);
+               kfree(node);
+       }
+}
+
+void sidtab_destroy(struct sidtab *s)
+{
+       u32 i, level;
 
        for (i = 0; i < SECINITSID_NUM; i++)
                if (s->isids[i].set)
                        context_destroy(&s->isids[i].context);
 
-       for (i = 0; i < SIDTAB_SIZE; i++) {
-               cur = s->htable[i];
-               while (cur) {
-                       temp = cur;
-                       cur = cur->next;
-                       context_destroy(&temp->context);
-                       kfree(temp);
-               }
-               s->htable[i] = NULL;
-       }
-       kfree(s->htable);
-       s->htable = NULL;
-       s->nel = 0;
-       s->next_sid = 1;
+       level = SIDTAB_MAX_LEVEL;
+       while (level && !s->roots[level].ptr_inner)
+               --level;
+
+       sidtab_destroy_tree(s->roots[level], level);
 }
 
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * A security identifier table (sidtab) is a hash table
+ * A security identifier table (sidtab) is a lookup table
  * of security context structures indexed by SID value.
  *
- * Author : Stephen Smalley, <sds@tycho.nsa.gov>
+ * Original author: Stephen Smalley, <sds@tycho.nsa.gov>
+ * Author: Ondrej Mosnacek, <omosnacek@gmail.com>
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
  */
 #ifndef _SS_SIDTAB_H_
 #define _SS_SIDTAB_H_
 
+#include <linux/spinlock_types.h>
+#include <linux/log2.h>
+
 #include "context.h"
 
-struct sidtab_node {
-       u32 sid;                /* security identifier */
-       struct context context; /* security context structure */
-       struct sidtab_node *next;
+struct sidtab_entry_leaf {
+       struct context context;
+};
+
+struct sidtab_node_inner;
+struct sidtab_node_leaf;
+
+union sidtab_entry_inner {
+       struct sidtab_node_inner *ptr_inner;
+       struct sidtab_node_leaf  *ptr_leaf;
 };
 
-#define SIDTAB_HASH_BITS 7
-#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS)
-#define SIDTAB_HASH_MASK (SIDTAB_HASH_BUCKETS-1)
+/* align node size to page boundary */
+#define SIDTAB_NODE_ALLOC_SHIFT PAGE_SHIFT
+#define SIDTAB_NODE_ALLOC_SIZE  PAGE_SIZE
+
+#define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size) - 1) + 1))
+
+#define SIDTAB_INNER_SHIFT \
+       (SIDTAB_NODE_ALLOC_SHIFT - size_to_shift(sizeof(union sidtab_entry_inner)))
+#define SIDTAB_INNER_ENTRIES ((size_t)1 << SIDTAB_INNER_SHIFT)
+#define SIDTAB_LEAF_ENTRIES \
+       (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry_leaf))
 
-#define SIDTAB_SIZE SIDTAB_HASH_BUCKETS
+#define SIDTAB_MAX_BITS 31 /* limited to INT_MAX due to atomic_t range */
+#define SIDTAB_MAX (((u32)1 << SIDTAB_MAX_BITS) - 1)
+/* ensure enough tree levels for SIDTAB_MAX entries */
+#define SIDTAB_MAX_LEVEL \
+       DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \
+                    SIDTAB_INNER_SHIFT)
+
+struct sidtab_node_leaf {
+       struct sidtab_entry_leaf entries[SIDTAB_LEAF_ENTRIES];
+};
+
+struct sidtab_node_inner {
+       union sidtab_entry_inner entries[SIDTAB_INNER_ENTRIES];
+};
 
 struct sidtab_isid_entry {
        int set;
        struct context context;
 };
 
+struct sidtab_convert_params {
+       int (*func)(struct context *oldc, struct context *newc, void *args);
+       void *args;
+       struct sidtab *target;
+};
+
+#define SIDTAB_RCACHE_SIZE 3
+
 struct sidtab {
-       struct sidtab_node **htable;
-       unsigned int nel;       /* number of elements */
-       unsigned int next_sid;  /* next SID to allocate */
-       unsigned char shutdown;
-#define SIDTAB_CACHE_LEN       3
-       struct sidtab_node *cache[SIDTAB_CACHE_LEN];
+       union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1];
+       atomic_t count;
+       struct sidtab_convert_params *convert;
        spinlock_t lock;
 
+       /* reverse lookup cache */
+       atomic_t rcache[SIDTAB_RCACHE_SIZE];
+
        /* index == SID - 1 (no entry for SECSID_NULL) */
        struct sidtab_isid_entry isids[SECINITSID_NUM];
 };
 struct context *sidtab_search(struct sidtab *s, u32 sid);
 struct context *sidtab_search_force(struct sidtab *s, u32 sid);
 
-int sidtab_convert(struct sidtab *s, struct sidtab *news,
-                  int (*apply)(u32 sid,
-                               struct context *context,
-                               void *args),
-                  void *args);
+int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params);
 
 int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid);
 
-void sidtab_hash_eval(struct sidtab *h, char *tag);
 void sidtab_destroy(struct sidtab *s);
 
 #endif /* _SS_SIDTAB_H_ */