From: Liam R. Howlett Date: Mon, 28 Sep 2020 19:52:08 +0000 (-0400) Subject: mm/mmap: Alter brk to be more maple tree efficient. X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=93f7f3ffc8fbf6eccaccdd433e80b3387a61c6b0;p=users%2Fjedix%2Flinux-maple.git mm/mmap: Alter brk to be more maple tree efficient. Alter the brk, do_brk_flags() calls and add do_brk_munmap() to be more efficient when using the maple tree. kvm Benchmark on 8 core laptop: ./mmtests/work/sources/wis-0-installed/brk1_threads rb_tree v5.9-rc1: min:3633758 max:3633758 total:3633758 min:3604892 max:3604892 total:3604892 min:3589182 max:3589182 total:3589182 min:3581230 max:3581230 total:3581230 maple tree with this patch: min:3673408 max:3673408 total:3673408 min:3659268 max:3659268 total:3659268 min:3624968 max:3624968 total:3624968 min:3613576 max:3613576 total:3613576 Signed-off-by: Liam R. Howlett --- diff --git a/include/linux/mm.h b/include/linux/mm.h index d461baa0a228..f4363b723610 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2587,8 +2587,12 @@ static inline void mm_populate(unsigned long addr, unsigned long len) /* Ignore errors */ (void) __mm_populate(addr, len, 1); } +extern void mm_populate_vma(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #else static inline void mm_populate(unsigned long addr, unsigned long len) {} +extern void mm_populate_vma(struct vm_area_struct *vma, unsigned long start, + unsigned long end) {} #endif /* These take the mm semaphore themselves */ diff --git a/mm/mmap.c b/mm/mmap.c index ae0570ae573d..d9382b6de040 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -184,19 +184,18 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } +static int do_brk_munmap(struct vm_area_struct *vma, unsigned long start, + unsigned long end); static int do_brk_flags(unsigned long addr, unsigned long request, - struct vm_area_struct *vma, unsigned long flags, - struct list_head *uf); + struct vm_area_struct *vma, unsigned long flags); SYSCALL_DEFINE1(brk, unsigned long, brk) { - unsigned long retval; unsigned long newbrk, oldbrk, origbrk; struct mm_struct *mm = current->mm; - struct vm_area_struct *vma_brk, *next; + struct vm_area_struct *brkvma, *next = NULL; unsigned long min_brk; bool populate; bool downgraded = false; - LIST_HEAD(uf); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -238,34 +237,41 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* * Always allow shrinking brk. - * __do_munmap() may downgrade mmap_lock to read. + * do_brk_munmap() may downgrade mmap_lock to read. */ - if (brk <= mm->brk) { - int ret; + brkvma = find_vma_intersection(mm, mm->start_brk, mm->brk); + if (brkvma) { + if (brk <= mm->brk) { + int ret; - /* - * mm->brk must to be protected by write mmap_lock so update it - * before downgrading mmap_lock. When __do_munmap() fails, - * mm->brk will be restored from origbrk. - */ - mm->brk = brk; - ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true); - if (ret < 0) { - mm->brk = origbrk; - goto out; - } else if (ret == 1) { - downgraded = true; + /* + * mm->brk must to be protected by write mmap_lock. + * do_brk_munmap() may downgrade the lock, so update it + * before calling do_brk_munmap().downgrading mmap_lock. if do_brk_munmap() fails, + * mm->brk will be restored from origbrk. + */ + mm->brk = brk; + ret = do_brk_munmap(brkvma, newbrk, brkvma->vm_end); + if (ret < 0) { + mm->brk = origbrk; + goto out; + } else if (ret == 1) { + downgraded = true; + } + goto success; } - goto success; + + next = brkvma->vm_next; + } else { + next = find_vma(mm, mm->brk); } /* Check against existing mmap mappings. */ - next = find_vma_prev(mm, oldbrk, &vma_brk); if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) goto out; /* Ok, looks good - let it rip. */ - if (do_brk_flags(oldbrk, newbrk-oldbrk, vma_brk, 0, &uf) < 0) + if (do_brk_flags(oldbrk, newbrk-oldbrk, brkvma, 0) < 0) goto out; mm->brk = brk; @@ -275,15 +281,13 @@ success: mmap_read_unlock(mm); else mmap_write_unlock(mm); - userfaultfd_unmap_complete(mm, &uf); if (populate) - mm_populate_vma(vma_brk, oldbrk, newbrk); + mm_populate_vma(brkvma, oldbrk, newbrk); return brk; out: - retval = origbrk; mmap_write_unlock(mm); - return retval; + return origbrk; } #if defined(CONFIG_DEBUG_MAPLE_TREE) @@ -608,20 +612,36 @@ static inline void vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma GFP_KERNEL); } + +void vma_store(struct mm_struct *mm, struct vm_area_struct *vma) +{ + vma_mt_store(mm, vma); +} + void vma_mt_modify(struct vm_area_struct *vma, unsigned long new_start, unsigned long new_end) { + if (vma->anon_vma) { + anon_vma_interval_tree_pre_update_vma(vma); + anon_vma_lock_write(vma->anon_vma); + } // Shrinking front. - if (vma->vm_start < new_start) + if (vma->vm_start < new_start) { vma_mt_szero(vma->vm_mm, vma->vm_start, new_start); + } // Shrinking back. if (vma->vm_end > new_end) vma_mt_szero(vma->vm_mm, new_end, vma->vm_end); + vma->vm_pgoff += (new_end - new_start) >> PAGE_SHIFT; vma->vm_start = new_start; vma->vm_end = new_end; vma_mt_store(vma->vm_mm, vma); + if (vma->anon_vma) { + anon_vma_interval_tree_post_update_vma(vma); + anon_vma_unlock_write(vma->anon_vma); + } } static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, @@ -2850,6 +2870,65 @@ out: return ret; } +/* + * bkr_munmap() - Unmap a parital vma. + * @vma: The vma to be modified + * @start: the start of the address to unmap + * @end: The end of the address to unmap + * + * Returns: 0 on success. + * unmaps a partial VMA mapping. Does not handle alignment, downgrades lock if + * possible. + */ +int do_brk_munmap(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct unmap; + unsigned long unmap_pages = 0; + int ret = 0; + + arch_unmap(mm, start, end); + + if (vma->vm_ops && vma->vm_ops->split) { + ret = vma->vm_ops->split(vma, start); + if (ret) + return ret; + } + + memset(&unmap, 0, sizeof(struct vm_area_struct)); + INIT_LIST_HEAD(&unmap.anon_vma_chain); + ret = vma_dup_policy(vma, &unmap); + if (ret) + return ret; + + if (mm->locked_vm) + unlock_range(vma, end); + + vma_mt_modify(vma, vma->vm_start, start); + unmap.vm_mm = vma->vm_mm; + unmap.vm_start = start; + unmap.vm_end = end; + unmap.vm_flags = vma->vm_flags; + ret = 1; + if (vma->vm_flags & VM_GROWSDOWN) + ret = 0; + + if (vma->vm_prev && (vma->vm_prev->vm_flags & VM_GROWSUP)) + ret = 0; + + if (ret) + mmap_write_downgrade(mm); + + unmap_region(mm, &unmap, vma, start, end); + unmap_pages = vma_pages(&unmap); + vm_stat_account(mm, vma->vm_flags, -unmap_pages); + if (vma->vm_flags & VM_ACCOUNT) + vm_unacct_memory(unmap_pages); + validate_mm_mt(mm); + return ret; +} + /* * do_brk_flags() - Increase the brk vma if the flags match. * @addr: The start address @@ -2862,8 +2941,7 @@ out: * do some brk-specific accounting here. */ static int do_brk_flags(unsigned long addr, unsigned long len, - struct vm_area_struct *vma, unsigned long flags, - struct list_head *uf) + struct vm_area_struct *vma, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *prev = NULL; @@ -2936,7 +3014,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) unsigned long len; int ret; bool populate; - LIST_HEAD(uf); len = PAGE_ALIGN(request); if (len < request) @@ -2947,10 +3024,9 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_brk_flags(addr, len, NULL, flags, &uf); + ret = do_brk_flags(addr, len, NULL, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); mmap_write_unlock(mm); - userfaultfd_unmap_complete(mm, &uf); if (populate && !ret) mm_populate(addr, len); return ret;