From: Liam R. Howlett Date: Fri, 9 Oct 2020 19:35:01 +0000 (-0400) Subject: mm/mmap: Change do_brk_munmap/do_brk_flags(). X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=5b8f603d3b9decb5086d6879e35263981a99ec34;p=users%2Fjedix%2Flinux-maple.git mm/mmap: Change do_brk_munmap/do_brk_flags(). do_brk_munmap needs to call vma_adjust_trans_huge() when altering the vma size. do_brk_munmap needs to support userfaults on umap. do_brk_flags needs to take a pointer as it could return a new value that needs to be populated. Signed-off-by: Liam R. Howlett --- diff --git a/mm/mmap.c b/mm/mmap.c index ba93cf9e3816..8cd6da36ad3f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -184,10 +184,10 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -static bool do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, - unsigned long oldbrk); -static int do_brk_flags(unsigned long addr, unsigned long request, - struct vm_area_struct *vma, unsigned long flags); +static int do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, + unsigned long oldbrk, struct list_head *uf); +static int do_brk_flags(struct vm_area_struct **brkvma, unsigned long addr, + unsigned long request, unsigned long flags); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long newbrk, oldbrk, origbrk; @@ -196,6 +196,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) unsigned long min_brk; bool populate; bool downgraded = false; + LIST_HEAD(uf); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -235,21 +236,29 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto success; } - brkvma = find_vma_intersection(mm, mm->start_brk, mm->brk); + brkvma = find_vma_intersection(mm, mm->start_brk, oldbrk); if (brkvma) { /* * Always allow shrinking brk. * do_brk_munmap() may downgrade mmap_lock to read. */ if (brk <= mm->brk) { + int ret; /* * mm->brk must to be protected by write mmap_lock. * do_brk_munmap() may downgrade the lock, so update it * before calling do_brk_munmap(). */ mm->brk = brk; - downgraded = do_brk_munmap(brkvma, newbrk, oldbrk); - goto success; + ret = do_brk_munmap(brkvma, newbrk, oldbrk, &uf); + if (ret == 1) { + downgraded = true; + goto success; + } else if (!ret) + goto success; + + mm->brk = origbrk; + goto out; } next = brkvma->vm_next; } else { @@ -261,7 +270,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk_flags(oldbrk, newbrk-oldbrk, brkvma, 0) < 0) + if (do_brk_flags(&brkvma, oldbrk, newbrk-oldbrk, 0) < 0) goto out; mm->brk = brk; @@ -271,6 +280,7 @@ success: mmap_read_unlock(mm); else mmap_write_unlock(mm); + userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate_vma(brkvma, oldbrk, newbrk); return brk; @@ -2879,24 +2889,28 @@ out: * unmaps a partial VMA mapping. Does not handle alignment, downgrades lock if * possible. */ -static bool do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, - unsigned long oldbrk) +static int do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, + unsigned long oldbrk, struct list_head *uf) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct unmap; unsigned long unmap_pages; - int downgrade = true; + int ret = 1; arch_unmap(mm, newbrk, oldbrk); - if (vma->vm_start == newbrk) { // remove entire mapping. + if (vma->vm_start >= newbrk) { // remove entire mapping. struct vm_area_struct *prev = vma->vm_prev; + ret = userfaultfd_unmap_prep(&unmap, newbrk, oldbrk, uf); + if (ret) + return ret; + if (mm->locked_vm) unlock_range(vma, oldbrk); if (!detach_vmas_to_be_unmapped(mm, vma, prev, oldbrk)) - downgrade = false; + ret = 0; else mmap_write_downgrade(mm); @@ -2909,28 +2923,31 @@ static bool do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, vma_init(&unmap, mm); unmap.vm_start = newbrk; unmap.vm_end = oldbrk; - unmap.vm_flags = vma->vm_flags; - - unmap_pages = vma_pages(&unmap); - - if (vma->vm_flags & VM_LOCKED) { - mm->locked_vm -= unmap_pages; - munlock_vma_pages_range(vma, newbrk, oldbrk); - } + ret = userfaultfd_unmap_prep(&unmap, newbrk, oldbrk, uf); + if (ret) + return ret; // Change the oldbrk of vma to the newbrk of the munmap area + vma_adjust_trans_huge(vma, vma->vm_start, newbrk, 0); vma_mt_brk(vma, newbrk); + // Handle usertfaults here. + // - if (vma->vm_next && (vma->vm_next->vm_flags & VM_GROWSDOWN)) - downgrade = false; + unmap_pages = vma_pages(&unmap); + if (unmap.vm_flags & VM_LOCKED) { + mm->locked_vm -= unmap_pages; + munlock_vma_pages_range(&unmap, newbrk, oldbrk); + } - if (vma->vm_prev && (vma->vm_prev->vm_flags & VM_GROWSUP)) - downgrade = false; + // prev of unmap_pages is just vma. + if (vma->vm_flags & VM_GROWSUP) + ret = 0; + else if (vma->vm_next && (vma->vm_next->vm_flags & VM_GROWSDOWN)) + ret = 0; - if (downgrade) + if (ret) mmap_write_downgrade(mm); - unmap_region(mm, &unmap, vma, newbrk, oldbrk); /* Statistics */ vm_stat_account(mm, unmap.vm_flags, -unmap_pages); @@ -2939,7 +2956,7 @@ static bool do_brk_munmap(struct vm_area_struct *vma, unsigned long newbrk, munmap_full_vma: validate_mm_mt(mm); - return downgrade; + return ret; } /* @@ -2953,11 +2970,11 @@ munmap_full_vma: * do not match then create a new anonymous VMA. Eventually we may be able to * do some brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long len, - struct vm_area_struct *vma, unsigned long flags) +static int do_brk_flags(struct vm_area_struct **brkvma, unsigned long addr, + unsigned long len, unsigned long flags) { struct mm_struct *mm = current->mm; - struct vm_area_struct *prev = NULL; + struct vm_area_struct *prev = NULL, *vma = NULL; int error; unsigned long mapped_addr; validate_mm_mt(mm); @@ -2985,10 +3002,13 @@ static int do_brk_flags(unsigned long addr, unsigned long len, if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM; - /* Fast path, expand the existing vma if possible */ - if (vma && ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) { - vma_mt_brk(vma, addr + len); - goto out; + if (brkvma) { + vma = &brkvma; + /* Fast path, expand the existing vma if possible */ + if (vma && ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)){ + vma_mt_brk(vma, addr + len); + goto out; + } } /* create a vma struct for an anonymous mapping */ @@ -3008,6 +3028,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, if (!prev) find_vma_prev(mm, addr, &prev); vma_link(mm, vma, prev); + *brkvma = vma; out: perf_event_mmap(vma); mm->total_vm += len >> PAGE_SHIFT; @@ -3035,7 +3056,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (mmap_write_lock_killable(mm)) return -EINTR; - ret = do_brk_flags(addr, len, NULL, flags); + ret = do_brk_flags(NULL, addr, len, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); mmap_write_unlock(mm); if (populate && !ret)