#include <linux/proc_ns.h>
 #include <linux/proc_fs.h>
 #include <linux/sched/task.h>
+#include <linux/idr.h>
 
 #define pid_hashfn(nr, ns)     \
        hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-static inline int mk_pid(struct pid_namespace *pid_ns,
-               struct pidmap *map, int off)
-{
-       return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
-}
-
-#define find_next_offset(map, off)                                     \
-               find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
 /*
  * PID-map pages start out as NULL, they get allocated upon
  */
 struct pid_namespace init_pid_ns = {
        .kref = KREF_INIT(2),
-       .pidmap = {
-               [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-       },
-       .last_pid = 0,
+       .idr = IDR_INIT,
        .nr_hashed = PIDNS_HASH_ADDING,
        .level = 0,
        .child_reaper = &init_task,
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static void free_pidmap(struct upid *upid)
-{
-       int nr = upid->nr;
-       struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
-       int offset = nr & BITS_PER_PAGE_MASK;
-
-       clear_bit(offset, map->page);
-       atomic_inc(&map->nr_free);
-}
-
-/*
- * If we started walking pids at 'base', is 'a' seen before 'b'?
- */
-static int pid_before(int base, int a, int b)
-{
-       /*
-        * This is the same as saying
-        *
-        * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT
-        * and that mapping orders 'a' and 'b' with respect to 'base'.
-        */
-       return (unsigned)(a - base) < (unsigned)(b - base);
-}
-
-/*
- * We might be racing with someone else trying to set pid_ns->last_pid
- * at the pid allocation time (there's also a sysctl for this, but racing
- * with this one is OK, see comment in kernel/pid_namespace.c about it).
- * We want the winner to have the "later" value, because if the
- * "earlier" value prevails, then a pid may get reused immediately.
- *
- * Since pids rollover, it is not sufficient to just pick the bigger
- * value.  We have to consider where we started counting from.
- *
- * 'base' is the value of pid_ns->last_pid that we observed when
- * we started looking for a pid.
- *
- * 'pid' is the pid that we eventually found.
- */
-static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
-{
-       int prev;
-       int last_write = base;
-       do {
-               prev = last_write;
-               last_write = cmpxchg(&pid_ns->last_pid, prev, pid);
-       } while ((prev != last_write) && (pid_before(base, last_write, pid)));
-}
-
-static int alloc_pidmap(struct pid_namespace *pid_ns)
-{
-       int i, offset, max_scan, pid, last = pid_ns->last_pid;
-       struct pidmap *map;
-
-       pid = last + 1;
-       if (pid >= pid_max)
-               pid = RESERVED_PIDS;
-       offset = pid & BITS_PER_PAGE_MASK;
-       map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
-       /*
-        * If last_pid points into the middle of the map->page we
-        * want to scan this bitmap block twice, the second time
-        * we start with offset == 0 (or RESERVED_PIDS).
-        */
-       max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
-       for (i = 0; i <= max_scan; ++i) {
-               if (unlikely(!map->page)) {
-                       void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-                       /*
-                        * Free the page if someone raced with us
-                        * installing it:
-                        */
-                       spin_lock_irq(&pidmap_lock);
-                       if (!map->page) {
-                               map->page = page;
-                               page = NULL;
-                       }
-                       spin_unlock_irq(&pidmap_lock);
-                       kfree(page);
-                       if (unlikely(!map->page))
-                               return -ENOMEM;
-               }
-               if (likely(atomic_read(&map->nr_free))) {
-                       for ( ; ; ) {
-                               if (!test_and_set_bit(offset, map->page)) {
-                                       atomic_dec(&map->nr_free);
-                                       set_last_pid(pid_ns, last, pid);
-                                       return pid;
-                               }
-                               offset = find_next_offset(map, offset);
-                               if (offset >= BITS_PER_PAGE)
-                                       break;
-                               pid = mk_pid(pid_ns, map, offset);
-                               if (pid >= pid_max)
-                                       break;
-                       }
-               }
-               if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
-                       ++map;
-                       offset = 0;
-               } else {
-                       map = &pid_ns->pidmap[0];
-                       offset = RESERVED_PIDS;
-                       if (unlikely(last == offset))
-                               break;
-               }
-               pid = mk_pid(pid_ns, map, offset);
-       }
-       return -EAGAIN;
-}
-
-int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
-{
-       int offset;
-       struct pidmap *map, *end;
-
-       if (last >= PID_MAX_LIMIT)
-               return -1;
-
-       offset = (last + 1) & BITS_PER_PAGE_MASK;
-       map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
-       end = &pid_ns->pidmap[PIDMAP_ENTRIES];
-       for (; map < end; map++, offset = 0) {
-               if (unlikely(!map->page))
-                       continue;
-               offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
-               if (offset < BITS_PER_PAGE)
-                       return mk_pid(pid_ns, map, offset);
-       }
-       return -1;
-}
-
 void put_pid(struct pid *pid)
 {
        struct pid_namespace *ns;
                struct upid *upid = pid->numbers + i;
                struct pid_namespace *ns = upid->ns;
                hlist_del_rcu(&upid->pid_chain);
-               switch(--ns->nr_hashed) {
+               switch (--ns->nr_hashed) {
                case 2:
                case 1:
                        /* When all that is left in the pid namespace
                        schedule_work(&ns->proc_work);
                        break;
                }
+
+               idr_remove(&ns->idr, upid->nr);
        }
        spin_unlock_irqrestore(&pidmap_lock, flags);
 
-       for (i = 0; i <= pid->level; i++)
-               free_pidmap(pid->numbers + i);
-
        call_rcu(&pid->rcu, delayed_put_pid);
 }
 
 
        tmp = ns;
        pid->level = ns->level;
+
        for (i = ns->level; i >= 0; i--) {
-               nr = alloc_pidmap(tmp);
+               int pid_min = 1;
+
+               idr_preload(GFP_KERNEL);
+               spin_lock_irq(&pidmap_lock);
+
+               /*
+                * init really needs pid 1, but after reaching the maximum
+                * wrap back to RESERVED_PIDS
+                */
+               if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
+                       pid_min = RESERVED_PIDS;
+
+               /*
+                * Store a null pointer so find_pid_ns does not find
+                * a partially initialized PID (see below).
+                */
+               nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
+                                     pid_max, GFP_ATOMIC);
+               spin_unlock_irq(&pidmap_lock);
+               idr_preload_end();
+
                if (nr < 0) {
                        retval = nr;
                        goto out_free;
        for ( ; upid >= pid->numbers; --upid) {
                hlist_add_head_rcu(&upid->pid_chain,
                                &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+               /* Make the PID visible to find_pid_ns. */
+               idr_replace(&upid->ns->idr, pid, upid->nr);
                upid->ns->nr_hashed++;
        }
        spin_unlock_irq(&pidmap_lock);
        put_pid_ns(ns);
 
 out_free:
+       spin_lock_irq(&pidmap_lock);
        while (++i <= ns->level)
-               free_pidmap(pid->numbers + i);
+               idr_remove(&ns->idr, (pid->numbers + i)->nr);
+
+       spin_unlock_irq(&pidmap_lock);
 
        kmem_cache_free(ns->pid_cachep, pid);
        return ERR_PTR(retval);
  */
 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
-       struct pid *pid;
-
-       do {
-               pid = find_pid_ns(nr, ns);
-               if (pid)
-                       break;
-               nr = next_pidmap(ns, nr);
-       } while (nr > 0);
-
-       return pid;
+       return idr_get_next(&ns->idr, &nr);
 }
 
 /*
                                           0, 4096);
 }
 
-void __init pidmap_init(void)
+void __init pid_idr_init(void)
 {
        /* Verify no one has done anything silly: */
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
                                PIDS_PER_CPU_MIN * num_possible_cpus());
        pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
 
-       init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-       /* Reserve PID 0. We never call free_pidmap(0) */
-       set_bit(0, init_pid_ns.pidmap[0].page);
-       atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+       idr_init(&init_pid_ns.idr);
 
        init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                        SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
 
 #include <linux/export.h>
 #include <linux/sched/task.h>
 #include <linux/sched/signal.h>
+#include <linux/idr.h>
 
 struct pid_cache {
        int nr_ids;
        struct pid_namespace *ns;
        unsigned int level = parent_pid_ns->level + 1;
        struct ucounts *ucounts;
-       int i;
        int err;
 
        err = -EINVAL;
        if (ns == NULL)
                goto out_dec;
 
-       ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!ns->pidmap[0].page)
-               goto out_free;
+       idr_init(&ns->idr);
 
        ns->pid_cachep = create_pid_cachep(level + 1);
        if (ns->pid_cachep == NULL)
-               goto out_free_map;
+               goto out_free_idr;
 
        err = ns_alloc_inum(&ns->ns);
        if (err)
-               goto out_free_map;
+               goto out_free_idr;
        ns->ns.ops = &pidns_operations;
 
        kref_init(&ns->kref);
        ns->nr_hashed = PIDNS_HASH_ADDING;
        INIT_WORK(&ns->proc_work, proc_cleanup_work);
 
-       set_bit(0, ns->pidmap[0].page);
-       atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
-
-       for (i = 1; i < PIDMAP_ENTRIES; i++)
-               atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-
        return ns;
 
-out_free_map:
-       kfree(ns->pidmap[0].page);
-out_free:
+out_free_idr:
+       idr_destroy(&ns->idr);
        kmem_cache_free(pid_ns_cachep, ns);
 out_dec:
        dec_pid_namespaces(ucounts);
 
 static void destroy_pid_namespace(struct pid_namespace *ns)
 {
-       int i;
-
        ns_free_inum(&ns->ns);
-       for (i = 0; i < PIDMAP_ENTRIES; i++)
-               kfree(ns->pidmap[i].page);
+
+       idr_destroy(&ns->idr);
        call_rcu(&ns->rcu, delayed_free_pidns);
 }
 
        int rc;
        struct task_struct *task, *me = current;
        int init_pids = thread_group_leader(me) ? 1 : 2;
+       struct pid *pid;
 
        /* Don't allow any more processes into the pid namespace */
        disable_pid_allocation(pid_ns);
         *        maintain a tasklist for each pid namespace.
         *
         */
+       rcu_read_lock();
        read_lock(&tasklist_lock);
-       nr = next_pidmap(pid_ns, 1);
-       while (nr > 0) {
-               rcu_read_lock();
-
-               task = pid_task(find_vpid(nr), PIDTYPE_PID);
+       nr = 2;
+       idr_for_each_entry_continue(&pid_ns->idr, pid, nr) {
+               task = pid_task(pid, PIDTYPE_PID);
                if (task && !__fatal_signal_pending(task))
                        send_sig_info(SIGKILL, SEND_SIG_FORCED, task);
-
-               rcu_read_unlock();
-
-               nr = next_pidmap(pid_ns, nr);
        }
        read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        /*
         * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD.
 {
        struct pid_namespace *pid_ns = task_active_pid_ns(current);
        struct ctl_table tmp = *table;
+       int ret, next;
 
        if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
                return -EPERM;
         * it should synchronize its usage with external means.
         */
 
-       tmp.data = &pid_ns->last_pid;
-       return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+       next = idr_get_cursor(&pid_ns->idr) - 1;
+
+       tmp.data = &next;
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+       if (!ret && write)
+               idr_set_cursor(&pid_ns->idr, next + 1);
+
+       return ret;
 }
 
 extern int pid_max;