]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm/mmap: Alter brk to be more maple tree efficient.
authorLiam R. Howlett <Liam.Howlett@Oracle.com>
Mon, 28 Sep 2020 19:52:08 +0000 (15:52 -0400)
committerLiam R. Howlett <Liam.Howlett@Oracle.com>
Fri, 30 Oct 2020 19:12:32 +0000 (15:12 -0400)
Alter the brk, do_brk_flags() calls and add do_brk_munmap() to be more
efficient when using the maple tree.

kvm Benchmark on 8 core laptop:
./mmtests/work/sources/wis-0-installed/brk1_threads

rb_tree v5.9-rc1:
min:3633758 max:3633758 total:3633758
min:3604892 max:3604892 total:3604892
min:3589182 max:3589182 total:3589182
min:3581230 max:3581230 total:3581230

maple tree with this patch:
min:3673408 max:3673408 total:3673408
min:3659268 max:3659268 total:3659268
min:3624968 max:3624968 total:3624968
min:3613576 max:3613576 total:3613576

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
include/linux/mm.h
mm/mmap.c

index d461baa0a228dbc1c1a0e0714c12a8f058a6f7a7..f4363b723610de0dc9df16edb4c88d22d58eb705 100644 (file)
@@ -2587,8 +2587,12 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
        /* Ignore errors */
        (void) __mm_populate(addr, len, 1);
 }
+extern void mm_populate_vma(struct vm_area_struct *vma, unsigned long start,
+               unsigned long end);
 #else
 static inline void mm_populate(unsigned long addr, unsigned long len) {}
+extern void mm_populate_vma(struct vm_area_struct *vma, unsigned long start,
+               unsigned long end) {}
 #endif
 
 /* These take the mm semaphore themselves */
index ae0570ae573dd269459cf5daa2c938b14fe75ac9..d9382b6de0402f81e3f9a81f87064f4af281ea13 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -184,19 +184,18 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
        return next;
 }
 
+static int do_brk_munmap(struct vm_area_struct *vma, unsigned long start,
+                        unsigned long end);
 static int do_brk_flags(unsigned long addr, unsigned long request,
-                       struct vm_area_struct *vma, unsigned long flags,
-                       struct list_head *uf);
+                       struct vm_area_struct *vma, unsigned long flags);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
-       unsigned long retval;
        unsigned long newbrk, oldbrk, origbrk;
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma_brk, *next;
+       struct vm_area_struct *brkvma, *next = NULL;
        unsigned long min_brk;
        bool populate;
        bool downgraded = false;
-       LIST_HEAD(uf);
 
        if (mmap_write_lock_killable(mm))
                return -EINTR;
@@ -238,34 +237,41 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
        /*
         * Always allow shrinking brk.
-        * __do_munmap() may downgrade mmap_lock to read.
+        * do_brk_munmap() may downgrade mmap_lock to read.
         */
-       if (brk <= mm->brk) {
-               int ret;
+       brkvma = find_vma_intersection(mm, mm->start_brk, mm->brk);
+       if (brkvma) {
+               if (brk <= mm->brk) {
+                       int ret;
 
-               /*
-                * mm->brk must to be protected by write mmap_lock so update it
-                * before downgrading mmap_lock. When __do_munmap() fails,
-                * mm->brk will be restored from origbrk.
-                */
-               mm->brk = brk;
-               ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
-               if (ret < 0) {
-                       mm->brk = origbrk;
-                       goto out;
-               } else if (ret == 1) {
-                       downgraded = true;
+                       /*
+                        * mm->brk must to be protected by write mmap_lock.
+                        * do_brk_munmap() may downgrade the lock,  so update it
+                        * before calling do_brk_munmap().downgrading mmap_lock. if do_brk_munmap() fails,
+                        * mm->brk will be restored from origbrk.
+                        */
+                       mm->brk = brk;
+                       ret = do_brk_munmap(brkvma, newbrk, brkvma->vm_end);
+                       if (ret < 0) {
+                               mm->brk = origbrk;
+                               goto out;
+                       } else if (ret == 1) {
+                               downgraded = true;
+                       }
+                       goto success;
                }
-               goto success;
+
+               next = brkvma->vm_next;
+       } else {
+               next = find_vma(mm, mm->brk);
        }
 
        /* Check against existing mmap mappings. */
-       next = find_vma_prev(mm, oldbrk, &vma_brk);
        if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;
 
        /* Ok, looks good - let it rip. */
-       if (do_brk_flags(oldbrk, newbrk-oldbrk, vma_brk, 0, &uf) < 0)
+       if (do_brk_flags(oldbrk, newbrk-oldbrk, brkvma, 0) < 0)
                goto out;
        mm->brk = brk;
 
@@ -275,15 +281,13 @@ success:
                mmap_read_unlock(mm);
        else
                mmap_write_unlock(mm);
-       userfaultfd_unmap_complete(mm, &uf);
        if (populate)
-               mm_populate_vma(vma_brk, oldbrk, newbrk);
+               mm_populate_vma(brkvma, oldbrk, newbrk);
        return brk;
 
 out:
-       retval = origbrk;
        mmap_write_unlock(mm);
-       return retval;
+       return origbrk;
 }
 
 #if defined(CONFIG_DEBUG_MAPLE_TREE)
@@ -608,20 +612,36 @@ static inline void vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma
                GFP_KERNEL);
 }
 
+
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       vma_mt_store(mm, vma);
+}
+
 void vma_mt_modify(struct vm_area_struct *vma, unsigned long new_start,
                unsigned long new_end)
 {
+       if (vma->anon_vma) {
+               anon_vma_interval_tree_pre_update_vma(vma);
+               anon_vma_lock_write(vma->anon_vma);
+       }
        // Shrinking front.
-       if (vma->vm_start < new_start)
+       if (vma->vm_start < new_start) {
                vma_mt_szero(vma->vm_mm, vma->vm_start, new_start);
+       }
 
        // Shrinking back.
        if (vma->vm_end > new_end)
                vma_mt_szero(vma->vm_mm, new_end, vma->vm_end);
 
+       vma->vm_pgoff += (new_end - new_start) >> PAGE_SHIFT;
        vma->vm_start = new_start;
        vma->vm_end = new_end;
        vma_mt_store(vma->vm_mm, vma);
+       if (vma->anon_vma) {
+               anon_vma_interval_tree_post_update_vma(vma);
+               anon_vma_unlock_write(vma->anon_vma);
+       }
 }
 
 static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -2850,6 +2870,65 @@ out:
        return ret;
 }
 
+/*
+ * bkr_munmap() - Unmap a parital vma.
+ * @vma: The vma to be modified
+ * @start: the start of the address to unmap
+ * @end: The end of the address to unmap
+ *
+ * Returns: 0 on success.
+ * unmaps a partial VMA mapping.  Does not handle alignment, downgrades lock if
+ * possible.
+ */
+int do_brk_munmap(struct vm_area_struct *vma, unsigned long start,
+                 unsigned long end)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       struct vm_area_struct unmap;
+       unsigned long unmap_pages = 0;
+       int ret = 0;
+
+       arch_unmap(mm, start, end);
+
+       if (vma->vm_ops && vma->vm_ops->split) {
+               ret = vma->vm_ops->split(vma, start);
+               if (ret)
+                       return ret;
+       }
+
+       memset(&unmap, 0, sizeof(struct vm_area_struct));
+       INIT_LIST_HEAD(&unmap.anon_vma_chain);
+       ret = vma_dup_policy(vma, &unmap);
+       if (ret)
+               return ret;
+
+       if (mm->locked_vm)
+               unlock_range(vma, end);
+
+       vma_mt_modify(vma, vma->vm_start, start);
+       unmap.vm_mm = vma->vm_mm;
+       unmap.vm_start = start;
+       unmap.vm_end = end;
+       unmap.vm_flags = vma->vm_flags;
+       ret = 1;
+       if (vma->vm_flags & VM_GROWSDOWN)
+               ret = 0;
+
+       if (vma->vm_prev && (vma->vm_prev->vm_flags & VM_GROWSUP))
+               ret = 0;
+
+       if (ret)
+               mmap_write_downgrade(mm);
+
+       unmap_region(mm, &unmap, vma, start, end);
+       unmap_pages = vma_pages(&unmap);
+       vm_stat_account(mm, vma->vm_flags, -unmap_pages);
+       if (vma->vm_flags & VM_ACCOUNT)
+               vm_unacct_memory(unmap_pages);
+       validate_mm_mt(mm);
+       return ret;
+}
+
 /*
  * do_brk_flags() - Increase the brk vma if the flags match.
  * @addr: The start address
@@ -2862,8 +2941,7 @@ out:
  * do some brk-specific accounting here.
  */
 static int do_brk_flags(unsigned long addr, unsigned long len,
-                       struct vm_area_struct *vma, unsigned long flags,
-                       struct list_head *uf)
+                       struct vm_area_struct *vma, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *prev = NULL;
@@ -2936,7 +3014,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
        unsigned long len;
        int ret;
        bool populate;
-       LIST_HEAD(uf);
 
        len = PAGE_ALIGN(request);
        if (len < request)
@@ -2947,10 +3024,9 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
        if (mmap_write_lock_killable(mm))
                return -EINTR;
 
-       ret = do_brk_flags(addr, len, NULL, flags, &uf);
+       ret = do_brk_flags(addr, len, NULL, flags);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        mmap_write_unlock(mm);
-       userfaultfd_unmap_complete(mm, &uf);
        if (populate && !ret)
                mm_populate(addr, len);
        return ret;