Android reported a performance regression in the userfaultfd unmap path.
A closer inspection on the userfaultfd_unmap_prep() change showed that a
second tree walk would be necessary in the reworked code.
Fix the regression by passing each VMA that will be unmapped through to
the userfaultfd_unmap_prep() function as they are added to the unmap list,
instead of re-walking the tree for the VMA.
Link: https://lkml.kernel.org/r/20230601015402.2819343-1-Liam.Howlett@oracle.com
Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
        return false;
 }
 
-int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
                           unsigned long end, struct list_head *unmaps)
 {
-       VMA_ITERATOR(vmi, mm, start);
-       struct vm_area_struct *vma;
-
-       for_each_vma_range(vmi, vma, end) {
-               struct userfaultfd_unmap_ctx *unmap_ctx;
-               struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+       struct userfaultfd_unmap_ctx *unmap_ctx;
+       struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
 
-               if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
-                   has_unmap_ctx(ctx, unmaps, start, end))
-                       continue;
+       if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+           has_unmap_ctx(ctx, unmaps, start, end))
+               return 0;
 
-               unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
-               if (!unmap_ctx)
-                       return -ENOMEM;
+       unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+       if (!unmap_ctx)
+               return -ENOMEM;
 
-               userfaultfd_ctx_get(ctx);
-               atomic_inc(&ctx->mmap_changing);
-               unmap_ctx->ctx = ctx;
-               unmap_ctx->start = start;
-               unmap_ctx->end = end;
-               list_add_tail(&unmap_ctx->list, unmaps);
-       }
+       userfaultfd_ctx_get(ctx);
+       atomic_inc(&ctx->mmap_changing);
+       unmap_ctx->ctx = ctx;
+       unmap_ctx->start = start;
+       unmap_ctx->end = end;
+       list_add_tail(&unmap_ctx->list, unmaps);
 
        return 0;
 }
 
                               unsigned long start,
                               unsigned long end);
 
-extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
-                                 unsigned long end, struct list_head *uf);
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+               unsigned long start, unsigned long end, struct list_head *uf);
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
                                       struct list_head *uf);
 extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
        return true;
 }
 
-static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
                                         unsigned long start, unsigned long end,
                                         struct list_head *uf)
 {
 
                        goto munmap_sidetree_failed;
 
                count++;
+               if (unlikely(uf)) {
+                       /*
+                        * If userfaultfd_unmap_prep returns an error the vmas
+                        * will remain split, but userland will get a
+                        * highly unexpected error anyway. This is no
+                        * different than the case where the first of the two
+                        * __split_vma fails, but we don't undo the first
+                        * split, despite we could. This is unlikely enough
+                        * failure that it's not worth optimizing it for.
+                        */
+                       error = userfaultfd_unmap_prep(next, start, end, uf);
+
+                       if (error)
+                               goto userfaultfd_error;
+               }
 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
                BUG_ON(next->vm_start < start);
                BUG_ON(next->vm_start > end);
        if (!next)
                next = vma_next(vmi);
 
-       if (unlikely(uf)) {
-               /*
-                * If userfaultfd_unmap_prep returns an error the vmas
-                * will remain split, but userland will get a
-                * highly unexpected error anyway. This is no
-                * different than the case where the first of the two
-                * __split_vma fails, but we don't undo the first
-                * split, despite we could. This is unlikely enough
-                * failure that it's not worth optimizing it for.
-                */
-               error = userfaultfd_unmap_prep(mm, start, end, uf);
-
-               if (error)
-                       goto userfaultfd_error;
-       }
-
 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
        /* Make sure no VMAs are about to be lost. */
        {