aio: Convert ioctx_table to XArray

author Matthew Wilcox <willy@infradead.org>

Wed, 14 Nov 2018 17:12:50 +0000 (12:12 -0500)

committer Matthew Wilcox (Oracle) <willy@infradead.org>

Thu, 8 Aug 2019 18:01:04 +0000 (14:01 -0400)
author Matthew Wilcox <willy@infradead.org>
Wed, 14 Nov 2018 17:12:50 +0000 (12:12 -0500)
committer Matthew Wilcox (Oracle) <willy@infradead.org>
Thu, 8 Aug 2019 18:01:04 +0000 (14:01 -0400)
diff --git a/fs/aio.c b/fs/aio.c

index 01e0fb9ae45aed3b1e10b3bfe25b878f393e0ab6..6dcef0f5f3527eb3ae587ee39087b1b1b12e575e 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -46,7 +46,6 @@
  
  #include <asm/kmap_types.h>
  #include <linux/uaccess.h>
-#include <linux/nospec.h>
  
  #include "internal.h"
  
@@ -79,12 +78,6 @@ struct aio_ring {
  
  #define AIO_RING_PAGES 8
  
-struct kioctx_table {
-       struct rcu_head         rcu;
-       unsigned                nr;
-       struct kioctx __rcu     *table[];
-};
-
  struct kioctx_cpu {
         unsigned                reqs_available;
  };
@@ -328,27 +321,22 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
  {
         struct file *file = vma->vm_file;
         struct mm_struct *mm = vma->vm_mm;
-       struct kioctx_table *table;
-       int i, res = -EINVAL;
+       struct kioctx *ctx;
+       unsigned long index;
+       int res = -EINVAL;
  
-       spin_lock(&mm->ioctx_lock);
-       rcu_read_lock();
-       table = rcu_dereference(mm->ioctx_table);
-       for (i = 0; i < table->nr; i++) {
-               struct kioctx *ctx;
-
-               ctx = rcu_dereference(table->table[i]);
-               if (ctx && ctx->aio_ring_file == file) {
-                       if (!atomic_read(&ctx->dead)) {
-                               ctx->user_id = ctx->mmap_base = vma->vm_start;
-                               res = 0;
-                       }
-                       break;
+       xa_lock(&mm->ioctx);
+       xa_for_each(&mm->ioctx, index, ctx) {
+               if (ctx->aio_ring_file != file)
+                       continue;
+               if (!atomic_read(&ctx->dead)) {
+                       ctx->user_id = ctx->mmap_base = vma->vm_start;
+                       res = 0;
                 }
+               break;
         }
+       xa_unlock(&mm->ioctx);
  
-       rcu_read_unlock();
-       spin_unlock(&mm->ioctx_lock);
         return res;
  }
  
@@ -632,57 +620,21 @@ static void free_ioctx_users(struct percpu_ref *ref)
  
  static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
  {
-       unsigned i, new_nr;
-       struct kioctx_table *table, *old;
         struct aio_ring *ring;
+       int err;
  
-       spin_lock(&mm->ioctx_lock);
-       table = rcu_dereference_raw(mm->ioctx_table);
-
-       while (1) {
-               if (table)
-                       for (i = 0; i < table->nr; i++)
-                               if (!rcu_access_pointer(table->table[i])) {
-                                       ctx->id = i;
-                                       rcu_assign_pointer(table->table[i], ctx);
-                                       spin_unlock(&mm->ioctx_lock);
-
-                                       /* While kioctx setup is in progress,
-                                        * we are protected from page migration
-                                        * changes ring_pages by ->ring_lock.
-                                        */
-                                       ring = kmap_atomic(ctx->ring_pages[0]);
-                                       ring->id = ctx->id;
-                                       kunmap_atomic(ring);
-                                       return 0;
-                               }
-
-               new_nr = (table ? table->nr : 1) * 4;
-               spin_unlock(&mm->ioctx_lock);
-
-               table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
-                               new_nr, GFP_KERNEL);
-               if (!table)
-                       return -ENOMEM;
-
-               table->nr = new_nr;
-
-               spin_lock(&mm->ioctx_lock);
-               old = rcu_dereference_raw(mm->ioctx_table);
-
-               if (!old) {
-                       rcu_assign_pointer(mm->ioctx_table, table);
-               } else if (table->nr > old->nr) {
-                       memcpy(table->table, old->table,
-                              old->nr * sizeof(struct kioctx *));
+       err = xa_alloc(&mm->ioctx, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL);
+       if (err)
+               return err;
  
-                       rcu_assign_pointer(mm->ioctx_table, table);
-                       kfree_rcu(old, rcu);
-               } else {
-                       kfree(table);
-                       table = old;
-               }
-       }
+       /*
+        * While kioctx setup is in progress, we are protected from
+        * page migration changing ring_pages by ->ring_lock.
+        */
+       ring = kmap_atomic(ctx->ring_pages[0]);
+       ring->id = ctx->id;
+       kunmap_atomic(ring);
+       return 0;
  }
  
  static void aio_nr_sub(unsigned nr)
@@ -808,27 +760,8 @@ err:
         return ERR_PTR(err);
  }
  
-/* kill_ioctx
- *     Cancels all outstanding aio requests on an aio context.  Used
- *     when the processes owning a context have all exited to encourage
- *     the rapid destruction of the kioctx.
- */
-static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
-                     struct ctx_rq_wait *wait)
+static void __kill_ioctx(struct kioctx *ctx, struct ctx_rq_wait *wait)
  {
-       struct kioctx_table *table;
-
-       spin_lock(&mm->ioctx_lock);
-       if (atomic_xchg(&ctx->dead, 1)) {
-               spin_unlock(&mm->ioctx_lock);
-               return -EINVAL;
-       }
-
-       table = rcu_dereference_raw(mm->ioctx_table);
-       WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
-       RCU_INIT_POINTER(table->table[ctx->id], NULL);
-       spin_unlock(&mm->ioctx_lock);
-
         /* free_ioctx_reqs() will do the necessary RCU synchronization */
         wake_up_all(&ctx->wait);
  
@@ -846,6 +779,30 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
  
         ctx->rq_wait = wait;
         percpu_ref_kill(&ctx->users);
+}
+
+/* kill_ioctx
+ *     Cancels all outstanding aio requests on an aio context.  Used
+ *     when the processes owning a context have all exited to encourage
+ *     the rapid destruction of the kioctx.
+ */
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+                     struct ctx_rq_wait *wait)
+{
+       struct kioctx *old;
+
+       /* I don't understand what this lock is protecting against */
+       xa_lock(&mm->ioctx);
+       if (atomic_xchg(&ctx->dead, 1)) {
+               xa_unlock(&mm->ioctx);
+               return -EINVAL;
+       }
+
+       old = __xa_erase(&mm->ioctx, ctx->id);
+       WARN_ON(old != ctx);
+       xa_unlock(&mm->ioctx);
+
+       __kill_ioctx(ctx, wait);
         return 0;
  }
  
@@ -859,26 +816,21 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
   */
  void exit_aio(struct mm_struct *mm)
  {
-       struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
+       struct kioctx *ctx;
         struct ctx_rq_wait wait;
-       int i, skipped;
+       unsigned long index;
  
-       if (!table)
+       if (xa_empty(&mm->ioctx))
                 return;
  
-       atomic_set(&wait.count, table->nr);
+       /*
+        * Prevent count from getting to 0 until we're ready for it
+        */
+       atomic_set(&wait.count, 1);
         init_completion(&wait.comp);
  
-       skipped = 0;
-       for (i = 0; i < table->nr; ++i) {
-               struct kioctx *ctx =
-                       rcu_dereference_protected(table->table[i], true);
-
-               if (!ctx) {
-                       skipped++;
-                       continue;
-               }
-
+       xa_lock(&mm->ioctx);
+       xa_for_each(&mm->ioctx, index, ctx) {
                 /*
                  * We don't need to bother with munmap() here - exit_mmap(mm)
                  * is coming and it'll unmap everything. And we simply can't,
@@ -887,16 +839,16 @@ void exit_aio(struct mm_struct *mm)
                  * that it needs to unmap the area, just set it to 0.
                  */
                 ctx->mmap_size = 0;
-               kill_ioctx(mm, ctx, &wait);
+               atomic_inc(&wait.count);
+               __xa_erase(&mm->ioctx, ctx->id);
+               __kill_ioctx(ctx, &wait);
         }
+       xa_unlock(&mm->ioctx);
  
-       if (!atomic_sub_and_test(skipped, &wait.count)) {
+       if (!atomic_sub_and_test(1, &wait.count)) {
                 /* Wait until all IO for the context are done. */
                 wait_for_completion(&wait.comp);
         }
-
-       RCU_INIT_POINTER(mm->ioctx_table, NULL);
-       kfree(table);
  }
  
  static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -1049,25 +1001,17 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
         struct aio_ring __user *ring  = (void __user *)ctx_id;
         struct mm_struct *mm = current->mm;
         struct kioctx *ctx, *ret = NULL;
-       struct kioctx_table *table;
         unsigned id;
  
         if (get_user(id, &ring->id))
                 return NULL;
  
         rcu_read_lock();
-       table = rcu_dereference(mm->ioctx_table);
-
-       if (!table || id >= table->nr)
-               goto out;
-
-       id = array_index_nospec(id, table->nr);
-       ctx = rcu_dereference(table->table[id]);
+       ctx = xa_load(&mm->ioctx, id);
         if (ctx && ctx->user_id == ctx_id) {
                 if (percpu_ref_tryget_live(&ctx->users))
                         ret = ctx;
         }
-out:
         rcu_read_unlock();
         return ret;
  }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 3a37a89eb7a7c363c4ac795b8ed782e657e3c6fe..cca357fc845f112dd49a566157b4f585e3ac0f3b 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -14,6 +14,7 @@
  #include <linux/uprobes.h>
  #include <linux/page-flags-layout.h>
  #include <linux/workqueue.h>
+#include <linux/xarray.h>
  
  #include <asm/mmu.h>
  
@@ -352,7 +353,6 @@ struct core_state {
         struct completion startup;
  };
  
-struct kioctx_table;
  struct mm_struct {
         struct {
                 struct vm_area_struct *mmap;            /* list of VMAs */
@@ -447,8 +447,7 @@ struct mm_struct {
                 atomic_t membarrier_state;
  #endif
  #ifdef CONFIG_AIO
-               spinlock_t                      ioctx_lock;
-               struct kioctx_table __rcu       *ioctx_table;
+               struct xarray ioctx;
  #endif
  #ifdef CONFIG_MEMCG
                 /*
diff --git a/kernel/fork.c b/kernel/fork.c

index 2852d0e76ea3b905693a454f36e21524c14a8b54..1e6d2642d830657ec8d64f0749f7045acc46f24e 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -956,8 +956,7 @@ __setup("coredump_filter=", coredump_filter_setup);
  static void mm_init_aio(struct mm_struct *mm)
  {
  #ifdef CONFIG_AIO
-       spin_lock_init(&mm->ioctx_lock);
-       mm->ioctx_table = NULL;
+       xa_init_flags(&mm->ioctx, XA_FLAGS_ALLOC);
  #endif
  }
  
diff --git a/mm/debug.c b/mm/debug.c

index 8345bb6e47699d52fe1c19976b46312fecd25c1f..4e1a53de5732ff556ef555e662b26b91f17d6e8b 100644 (file)
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -142,9 +142,6 @@ void dump_mm(const struct mm_struct *mm)
                 "start_brk %lx brk %lx start_stack %lx\n"
                 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
                 "binfmt %px flags %lx core_state %px\n"
-#ifdef CONFIG_AIO
-               "ioctx_table %px\n"
-#endif
  #ifdef CONFIG_MEMCG
                 "owner %px "
  #endif
@@ -174,9 +171,6 @@ void dump_mm(const struct mm_struct *mm)
                 mm->start_brk, mm->brk, mm->start_stack,
                 mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
                 mm->binfmt, mm->flags, mm->core_state,
-#ifdef CONFIG_AIO
-               mm->ioctx_table,
-#endif
  #ifdef CONFIG_MEMCG
                 mm->owner,
  #endif
author	Matthew Wilcox <willy@infradead.org>
	Wed, 14 Nov 2018 17:12:50 +0000 (12:12 -0500)
committer	Matthew Wilcox (Oracle) <willy@infradead.org>
	Thu, 8 Aug 2019 18:01:04 +0000 (14:01 -0400)
fs/aio.c		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/debug.c		patch \| blob \| history