]> www.infradead.org Git - users/willy/xarray.git/commitdiff
ipc: Convert ipcs_idr to XArray
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Sun, 4 Aug 2019 11:20:30 +0000 (07:20 -0400)
committerMatthew Wilcox (Oracle) <willy@infradead.org>
Fri, 9 Aug 2019 01:38:13 +0000 (21:38 -0400)
The XArray has better loops than the IDR has, removing the need to
open-code them.  We also don't need to call idr_destroy() any more.
Allocating the ID is a little tricky due to needing to get 'seq'
correct.  Open-code a variant of __xa_alloc() which lets us set the
ID and the seq before depositing the pointer in the array.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
include/linux/ipc_namespace.h
include/linux/xarray.h
ipc/ipc_sysctl.c
ipc/msg.c
ipc/namespace.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
lib/xarray.c

index c309f43bde45e086df82c7740aed8ddc3671bced..bdc39cc4d1dc83dec3fd1cc2731fb849e4a8169c 100644 (file)
@@ -3,13 +3,13 @@
 #define __IPC_NAMESPACE_H__
 
 #include <linux/err.h>
-#include <linux/idr.h>
-#include <linux/rwsem.h>
 #include <linux/notifier.h>
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
+#include <linux/rwsem.h>
+#include <linux/xarray.h>
 
 struct user_namespace;
 
@@ -17,11 +17,11 @@ struct ipc_ids {
        int in_use;
        unsigned short seq;
        struct rw_semaphore rwsem;
-       struct idr ipcs_idr;
+       struct xarray ipcs;
        int max_idx;
-       int last_idx;   /* For wrap around detection */
+       int next_idx;
 #ifdef CONFIG_CHECKPOINT_RESTORE
-       int next_id;
+       int restore_id;
 #endif
        struct rhashtable key_ht;
 };
index 1c7ccdd072baeffd297dcb25fdc82430e141d427..0b53ce49e38b87cc0287fbb79f213ac8d7e67170 100644 (file)
@@ -1465,6 +1465,7 @@ void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t);
 void xas_init_marks(const struct xa_state *);
 
 bool xas_nomem(struct xa_state *, gfp_t);
+bool __xas_nomem(struct xa_state *, gfp_t);
 void xas_pause(struct xa_state *);
 
 void xas_create_range(struct xa_state *);
index affd66537e87550a3fb88c144957506f9140dcd1..a2df5d0e27f61c0507226c7f752048b7ba334b86 100644 (file)
@@ -115,7 +115,7 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
 
 int ipc_mni = IPCMNI;
 int ipc_mni_shift = IPCMNI_SHIFT;
-int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
+int ipc_min_cycle = XA_CHUNK_SIZE;
 
 static struct ctl_table ipc_kern_table[] = {
        {
@@ -196,8 +196,8 @@ static struct ctl_table ipc_kern_table[] = {
 #ifdef CONFIG_CHECKPOINT_RESTORE
        {
                .procname       = "sem_next_id",
-               .data           = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
-               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
+               .data           = &init_ipc_ns.ids[IPC_SEM_IDS].restore_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].restore_id),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
@@ -205,8 +205,8 @@ static struct ctl_table ipc_kern_table[] = {
        },
        {
                .procname       = "msg_next_id",
-               .data           = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
-               .maxlen         = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
+               .data           = &init_ipc_ns.ids[IPC_MSG_IDS].restore_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].restore_id),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
@@ -214,8 +214,8 @@ static struct ctl_table ipc_kern_table[] = {
        },
        {
                .procname       = "shm_next_id",
-               .data           = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
-               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
+               .data           = &init_ipc_ns.ids[IPC_SHM_IDS].restore_id,
+               .maxlen         = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].restore_id),
                .mode           = 0644,
                .proc_handler   = proc_ipc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
index 8dec945fa030b099b5becf08b4350b23852203d2..d9800af933edab6e861484e598e7f49377d9ad3f 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1278,7 +1278,6 @@ void msg_init_ns(struct ipc_namespace *ns)
 void msg_exit_ns(struct ipc_namespace *ns)
 {
        free_ipcs(ns, &msg_ids(ns), freeque);
-       idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
        rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
 }
 #endif
index b3ca1476ca5118793c096a0244b062fbff7255b0..e8567c3d32e94d2d4cc5cb5b0546621826ead226 100644 (file)
@@ -96,22 +96,17 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
               void (*free)(struct ipc_namespace *, struct kern_ipc_perm *))
 {
        struct kern_ipc_perm *perm;
-       int next_id;
-       int total, in_use;
+       unsigned long index;
 
        down_write(&ids->rwsem);
 
-       in_use = ids->in_use;
-
-       for (total = 0, next_id = 0; total < in_use; next_id++) {
-               perm = idr_find(&ids->ipcs_idr, next_id);
-               if (perm == NULL)
-                       continue;
+       xa_for_each(&ids->ipcs, index, perm) {
                rcu_read_lock();
                ipc_lock_object(perm);
                free(ns, perm);
-               total++;
        }
+       BUG_ON(!xa_empty(&ids->ipcs));
+
        up_write(&ids->rwsem);
 }
 
index 7da4504bcc7cfd8c57b897e66608fa3890427fc3..861895966dbd71f848334cac8638ae67356fc95c 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -235,7 +235,6 @@ void sem_init_ns(struct ipc_namespace *ns)
 void sem_exit_ns(struct ipc_namespace *ns)
 {
        free_ipcs(ns, &sem_ids(ns), freeary);
-       idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
        rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
 }
 #endif
index ce1ca9f7c6e97ca5035963d2d64e3c57fefe7f6c..49a8e088e0dee83544c2fef7c2b4cd306361e918 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -129,7 +129,6 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 void shm_exit_ns(struct ipc_namespace *ns)
 {
        free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
-       idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
        rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
 }
 #endif
@@ -348,34 +347,30 @@ done:
        up_write(&shm_ids(ns).rwsem);
 }
 
-/* Called with ns->shm_ids(ns).rwsem locked */
-static int shm_try_destroy_orphaned(int id, void *p, void *data)
+void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
-       struct ipc_namespace *ns = data;
-       struct kern_ipc_perm *ipcp = p;
-       struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+       struct kern_ipc_perm *ipcp;
+       unsigned long index;
 
-       /*
-        * We want to destroy segments without users and with already
-        * exit'ed originating process.
-        *
-        * As shp->* are changed under rwsem, it's safe to skip shp locking.
-        */
-       if (shp->shm_creator != NULL)
-               return 0;
+       down_write(&shm_ids(ns).rwsem);
+       xa_for_each(&shm_ids(ns).ipcs, index, ipcp) {
+               struct shmid_kernel *shp;
 
-       if (shm_may_destroy(ns, shp)) {
-               shm_lock_by_ptr(shp);
-               shm_destroy(ns, shp);
-       }
-       return 0;
-}
+               shp = container_of(ipcp, struct shmid_kernel, shm_perm);
 
-void shm_destroy_orphaned(struct ipc_namespace *ns)
-{
-       down_write(&shm_ids(ns).rwsem);
-       if (shm_ids(ns).in_use)
-               idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
+               /*
+                * We want to destroy segments without users and with already
+                * exit'ed originating process.  As shp->* are changed under
+                * rwsem, it's safe to skip shp locking.
+                */
+               if (shp->shm_creator != NULL)
+                       continue;
+
+               if (shm_may_destroy(ns, shp)) {
+                       shm_lock_by_ptr(shp);
+                       shm_destroy(ns, shp);
+               }
+       }
        up_write(&shm_ids(ns).rwsem);
 }
 
@@ -860,26 +855,17 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
                unsigned long *swp)
 {
-       int next_id;
-       int total, in_use;
+       struct kern_ipc_perm *ipc;
+       unsigned long index;
 
        *rss = 0;
        *swp = 0;
 
-       in_use = shm_ids(ns).in_use;
-
-       for (total = 0, next_id = 0; total < in_use; next_id++) {
-               struct kern_ipc_perm *ipc;
+       xa_for_each(&shm_ids(ns).ipcs, index, ipc) {
                struct shmid_kernel *shp;
 
-               ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
-               if (ipc == NULL)
-                       continue;
                shp = container_of(ipc, struct shmid_kernel, shm_perm);
-
                shm_add_rss_swap(shp, rss, swp);
-
-               total++;
        }
 }
 
index d126d156efc64e7d2d710197cf50377c12ad620d..840640368a094e75227f68d5d5109352f7987b52 100644 (file)
@@ -109,7 +109,7 @@ static const struct rhashtable_params ipc_kht_params = {
  * @ids: ipc identifier set
  *
  * Set up the sequence range to use for the ipc identifier range (limited
- * below ipc_mni) then initialise the keys hashtable and ids idr.
+ * below ipc_mni) then initialise the keys hashtable and ids xarray.
  */
 void ipc_init_ids(struct ipc_ids *ids)
 {
@@ -117,11 +117,11 @@ void ipc_init_ids(struct ipc_ids *ids)
        ids->seq = 0;
        init_rwsem(&ids->rwsem);
        rhashtable_init(&ids->key_ht, &ipc_kht_params);
-       idr_init(&ids->ipcs_idr);
+       xa_init_flags(&ids->ipcs, XA_FLAGS_ALLOC);
        ids->max_idx = -1;
-       ids->last_idx = -1;
+       ids->next_idx = 0;
 #ifdef CONFIG_CHECKPOINT_RESTORE
-       ids->next_id = -1;
+       ids->restore_id = -1;
 #endif
 }
 
@@ -183,12 +183,12 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 }
 
 /*
- * Insert new IPC object into idr tree, and set sequence number and id
+ * Insert new IPC object into xarray, and set sequence number and id
  * in the correct order.
  * Especially:
- * - the sequence number must be set before inserting the object into the idr,
- *   because the sequence number is accessed without a lock.
- * - the id can/must be set after inserting the object into the idr.
+ * - the sequence number must be set before inserting the object into the
+ *   xarray, because the sequence number is accessed without a lock.
+ * - the id can/must be set after inserting the object into the xarray.
  *   All accesses must be done after getting kern_ipc_perm.lock.
  *
  * The caller must own kern_ipc_perm.lock.of the new object.
@@ -198,64 +198,60 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
  * the sequence number is incremented only when the returned ID is less than
  * the last one.
  */
-static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
+static inline int ipc_id_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
 {
-       int idx, next_id = -1;
+       u32 idx;
+       int err;
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
-       next_id = ids->next_id;
-       ids->next_id = -1;
-#endif
+       if (ids->restore_id < 0) {
+               XA_STATE(xas, &ids->ipcs, 0);
+               int min_idx, max_idx;
 
-       /*
-        * As soon as a new object is inserted into the idr,
-        * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
-        * and the lockless preparations for ipc operations can start.
-        * This means especially: permission checks, audit calls, allocation
-        * of undo structures, ...
-        *
-        * Thus the object must be fully initialized, and if something fails,
-        * then the full tear-down sequence must be followed.
-        * (i.e.: set new->deleted, reduce refcount, call_rcu())
-        */
+               max_idx = max(ids->in_use*3/2, ipc_min_cycle);
+               max_idx = min(max_idx, ipc_mni) - 1;
 
-       if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
-               int max_idx;
+               xas_lock(&xas);
 
-               max_idx = max(ids->in_use*3/2, ipc_min_cycle);
-               max_idx = min(max_idx, ipc_mni);
-
-               /* allocate the idx, with a NULL struct kern_ipc_perm */
-               idx = idr_alloc_cyclic(&ids->ipcs_idr, NULL, 0, max_idx,
-                                       GFP_NOWAIT);
-
-               if (idx >= 0) {
-                       /*
-                        * idx got allocated successfully.
-                        * Now calculate the sequence number and set the
-                        * pointer for real.
-                        */
-                       if (idx <= ids->last_idx) {
+               min_idx = ids->next_idx;
+               new->seq = ids->seq;
+
+               /* Modified version of __xa_alloc */
+               do {
+                       xas.xa_index = min_idx;
+                       xas_find_marked(&xas, max_idx, XA_FREE_MARK);
+                       if (xas.xa_node == XAS_RESTART && min_idx > 0) {
                                ids->seq++;
                                if (ids->seq >= ipcid_seq_max())
                                        ids->seq = 0;
+                               new->seq = ids->seq;
+                               xas.xa_index = 0;
+                               min_idx = 0;
+                               xas_find_marked(&xas, max_idx, XA_FREE_MARK);
                        }
-                       ids->last_idx = idx;
-
-                       new->seq = ids->seq;
-                       /* no need for smp_wmb(), this is done
-                        * inside idr_replace, as part of
-                        * rcu_assign_pointer
-                        */
-                       idr_replace(&ids->ipcs_idr, new, idx);
-               }
+                       if (xas.xa_node == XAS_RESTART)
+                               xas_set_err(&xas, -ENOSPC);
+                       else
+                               new->id = (new->seq << ipcmni_seq_shift()) +
+                                       xas.xa_index;
+                       xas_store(&xas, new);
+                       xas_clear_mark(&xas, XA_FREE_MARK);
+               } while (__xas_nomem(&xas, GFP_KERNEL));
+
+               xas_unlock(&xas);
+               err = xas_error(&xas);
+               idx = xas.xa_index;
        } else {
-               new->seq = ipcid_to_seqx(next_id);
-               idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
-                               0, GFP_NOWAIT);
+               new->id = ids->restore_id;
+               new->seq = ipcid_to_seqx(new->id);
+               idx = ipcid_to_idx(new->id);
+               err = xa_insert(&ids->ipcs, idx, new, GFP_KERNEL);
+               if (err == -EBUSY)
+                       err = -ENOSPC;
+               ids->restore_id = -1;
        }
-       if (idx >= 0)
-               new->id = (new->seq << ipcmni_seq_shift()) + idx;
+
+       if (err < 0)
+               return err;
        return idx;
 }
 
@@ -278,7 +274,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
 {
        kuid_t euid;
        kgid_t egid;
-       int idx, err;
+       int idx;
 
        /* 1) Initialize the refcount so that ipc_rcu_putref works */
        refcount_set(&new->refcount, 1);
@@ -289,29 +285,42 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
        if (ids->in_use >= limit)
                return -ENOSPC;
 
-       idr_preload(GFP_KERNEL);
-
+       /*
+        * 2) Hold the spinlock so that nobody else can access the object
+        * once they can find it
+        */
        spin_lock_init(&new->lock);
-       rcu_read_lock();
        spin_lock(&new->lock);
-
        current_euid_egid(&euid, &egid);
        new->cuid = new->uid = euid;
        new->gid = new->cgid = egid;
-
        new->deleted = false;
 
-       idx = ipc_idr_alloc(ids, new);
-       idr_preload_end();
+       idx = ipc_id_alloc(ids, new);
+
+       rcu_read_lock();
+
+       /*
+        * As soon as a new object is inserted into the XArray,
+        * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
+        * and the lockless preparations for ipc operations can start.
+        * This means especially: permission checks, audit calls, allocation
+        * of undo structures, ...
+        *
+        * Thus the object must be fully initialized, and if something fails,
+        * then the full tear-down sequence must be followed.
+        * (i.e.: set new->deleted, reduce refcount, call_rcu())
+        */
 
        if (idx >= 0 && new->key != IPC_PRIVATE) {
-               err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
+               int err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
                                             ipc_kht_params);
                if (err < 0) {
-                       idr_remove(&ids->ipcs_idr, idx);
+                       xa_erase(&ids->ipcs, idx);
                        idx = err;
                }
        }
+
        if (idx < 0) {
                new->deleted = true;
                spin_unlock(&new->lock);
@@ -462,7 +471,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
        int idx = ipcid_to_idx(ipcp->id);
 
-       idr_remove(&ids->ipcs_idr, idx);
+       xa_erase(&ids->ipcs, idx);
        ipc_kht_remove(ids, ipcp);
        ids->in_use--;
        ipcp->deleted = true;
@@ -472,7 +481,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
                        idx--;
                        if (idx == -1)
                                break;
-               } while (!idr_find(&ids->ipcs_idr, idx));
+               } while (!xa_load(&ids->ipcs, idx));
                ids->max_idx = idx;
        }
 }
@@ -595,7 +604,7 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
        struct kern_ipc_perm *out;
        int idx = ipcid_to_idx(id);
 
-       out = idr_find(&ids->ipcs_idr, idx);
+       out = xa_load(&ids->ipcs, idx);
        if (!out)
                return ERR_PTR(-EINVAL);
 
@@ -754,31 +763,19 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s)
 static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
                                              loff_t *new_pos)
 {
+       unsigned long index = pos;
        struct kern_ipc_perm *ipc;
-       int total, id;
 
-       total = 0;
-       for (id = 0; id < pos && total < ids->in_use; id++) {
-               ipc = idr_find(&ids->ipcs_idr, id);
-               if (ipc != NULL)
-                       total++;
-       }
-
-       if (total >= ids->in_use)
+       rcu_read_lock();
+       ipc = xa_find(&ids->ipcs, &index, ULONG_MAX, XA_PRESENT);
+       if (!ipc) {
+               rcu_read_unlock();
                return NULL;
-
-       for (; pos < ipc_mni; pos++) {
-               ipc = idr_find(&ids->ipcs_idr, pos);
-               if (ipc != NULL) {
-                       *new_pos = pos + 1;
-                       rcu_read_lock();
-                       ipc_lock_object(ipc);
-                       return ipc;
-               }
        }
 
-       /* Out of range - return NULL to terminate iteration */
-       return NULL;
+       *new_pos = index + 1;
+       ipc_lock_object(ipc);
+       return ipc;
 }
 
 static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
index 0fcf8e719b7647fe7e59141c153b2485c95c4399..5320e0e2dbf6171f70e39b46dd74d804a3f7178b 100644 (file)
@@ -27,7 +27,7 @@
  */
 #define IPCMNI_SHIFT           15
 #define IPCMNI_EXTEND_SHIFT    24
-#define IPCMNI_EXTEND_MIN_CYCLE        (RADIX_TREE_MAP_SIZE * RADIX_TREE_MAP_SIZE)
+#define IPCMNI_EXTEND_MIN_CYCLE        (XA_CHUNK_SIZE * XA_CHUNK_SIZE)
 #define IPCMNI                 (1 << IPCMNI_SHIFT)
 #define IPCMNI_EXTEND          (1 << IPCMNI_EXTEND_SHIFT)
 
@@ -42,7 +42,7 @@ extern int ipc_min_cycle;
 #else /* CONFIG_SYSVIPC_SYSCTL */
 
 #define ipc_mni                        IPCMNI
-#define ipc_min_cycle          ((int)RADIX_TREE_MAP_SIZE)
+#define ipc_min_cycle          ((int)XA_CHUNK_SIZE)
 #define ipcmni_seq_shift()     IPCMNI_SHIFT
 #define IPCMNI_IDX_MASK                ((1 << IPCMNI_SHIFT) - 1)
 #endif /* CONFIG_SYSVIPC_SYSCTL */
index 446b956c918888d06dfca776d00aed8f3c2e54de..742d3663b579528da808dc427277474b890cd81f 100644 (file)
@@ -318,7 +318,7 @@ EXPORT_SYMBOL_GPL(xas_nomem);
  *
  * Return: true if memory was needed, and was successfully allocated.
  */
-static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
+bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
        __must_hold(xas->xa->xa_lock)
 {
        unsigned int lock_type = xa_lock_type(xas->xa);