From: Liam R. Howlett Date: Thu, 11 Mar 2021 20:25:33 +0000 (-0500) Subject: mm: Move mas locking outside of munmap() path. X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fmaple_next;p=users%2Fjedix%2Flinux-maple.git mm: Move mas locking outside of munmap() path. Now that there is a split variant that allows splitting to use a maple state, move the locks to a more logical position. Signed-off-by: Liam R. Howlett --- diff --git a/mm/internal.h b/mm/internal.h index f26ef82f3a7b..e7a526438814 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -369,9 +369,7 @@ static inline int vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas mas->index = vma->vm_start; mas->last = vma->vm_end - 1; - mas_lock(mas); ret = mas_store_gfp(mas, vma, GFP_KERNEL); - mas_unlock(mas); return ret; } @@ -390,9 +388,7 @@ static inline int vma_mas_remove(struct vm_area_struct *vma, struct ma_state *ma mas->index = vma->vm_start; mas->last = vma->vm_end - 1; - mas_lock(mas); ret = mas_store_gfp(mas, NULL, GFP_KERNEL); - mas_unlock(mas); return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index db89c4b8a54d..1ae46cca1fd5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -203,6 +203,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) if (mmap_write_lock_killable(mm)) return -EINTR; + mas_lock(&mas); origbrk = mm->brk; @@ -289,19 +290,21 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; mm->brk = brk; - success: populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; if (downgraded) mmap_read_unlock(mm); - else + else { + mas_unlock(&mas); mmap_write_unlock(mm); + } userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(oldbrk, newbrk - oldbrk); return brk; out: + mas_unlock(&mas); mmap_write_unlock(mm); return origbrk; } @@ -503,7 +506,9 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma) { MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end - 1); + mas_lock(&mas); vma_mas_link(mm, vma, &mas); + mas_unlock(&mas); } /* @@ -2443,8 +2448,6 @@ static inline void detach_range(struct mm_struct *mm, struct ma_state *mas, do { count++; *vma = mas_prev(mas, start); - BUG_ON((*vma)->vm_start < start); - BUG_ON((*vma)->vm_end > end + 1); vma_mas_store(*vma, dst); if ((*vma)->vm_flags & VM_LOCKED) { mm->locked_vm -= vma_pages(*vma); @@ -2549,14 +2552,12 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, } /* Point of no return */ - mas_lock(mas); if (next) max = next->vm_start; mtree_init(&mt_detach, MAPLE_ALLOC_RANGE); dst.tree = &mt_detach; detach_range(mm, mas, &dst, &vma); - mas_unlock(mas); /* * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or @@ -2568,8 +2569,10 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, downgrade = false; else if (prev && (prev->vm_flags & VM_GROWSUP)) downgrade = false; - else + else { + mas_unlock(mas); mmap_write_downgrade(mm); + } } /* Unmap the region */ @@ -2598,6 +2601,10 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma, * This function takes a @mas that is in the correct state to remove the * mapping(s). The @len will be aligned and any arch_unmap work will be * preformed. + * Note: May unlock mas if lock is degraded. + * + * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise. + * */ int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, @@ -2635,7 +2642,9 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, int ret; MA_STATE(mas, &mm->mm_mt, start, start); + mas_lock(&mas); ret = do_mas_munmap(&mas, mm, start, len, uf, false); + mas_unlock(&mas); return ret; } @@ -2652,11 +2661,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long merge_start = addr, merge_end = end; unsigned long max = USER_PGTABLES_CEILING; pgoff_t vm_pgoff; - int error; + int error = ENOMEM; struct ma_state ma_prev, tmp; MA_STATE(mas, &mm->mm_mt, addr, end - 1); + mas_lock(&mas); /* Check against address space limit. */ if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { unsigned long nr_pages; @@ -2669,23 +2679,21 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - nr_pages)) - return -ENOMEM; + goto no_mem; } validate_mm(mm); /* Unmap any existing mapping in the area */ - if (do_mas_munmap(&mas, mm, addr, len, uf, false)) { - return -ENOMEM; - } + if (do_mas_munmap(&mas, mm, addr, len, uf, false)) + goto no_mem; /* * Private writable mapping: check memory availability */ if (accountable_mapping(file, vm_flags)) { charged = len >> PAGE_SHIFT; - if (security_vm_enough_memory_mm(mm, charged)) { - return -ENOMEM; - } + if (security_vm_enough_memory_mm(mm, charged)) + goto no_mem; vm_flags |= VM_ACCOUNT; } @@ -2736,10 +2744,8 @@ cannot_expand: * not unmapped, but the maps are removed from the list. */ vma = vm_area_alloc(mm); - if (!vma) { - error = -ENOMEM; + if (!vma) goto unacct_error; - } vma->vm_start = addr; vma->vm_end = end; @@ -2864,6 +2870,7 @@ expanded: vma->vm_flags |= VM_SOFTDIRTY; vma_set_page_prot(vma); validate_mm(mm); + mas_unlock(&mas); return addr; unmap_and_free_vma: @@ -2884,6 +2891,8 @@ free_vma: unacct_error: if (charged) vm_unacct_memory(charged); +no_mem: + mas_unlock(&mas); return error; } @@ -2896,6 +2905,7 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) if (mmap_write_lock_killable(mm)) return -EINTR; + mas_lock(&mas); ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade); /* * Returning 1 indicates mmap_lock is downgraded. @@ -2905,8 +2915,10 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade) if (ret == 1) { mmap_read_unlock(mm); ret = 0; - } else + } else { + mas_unlock(&mas); mmap_write_unlock(mm); + } userfaultfd_unmap_complete(mm, &uf); return ret; @@ -2958,6 +2970,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (mmap_write_lock_killable(mm)) return -EINTR; + rcu_read_lock(); mas_set(&mas, start); vma = mas_walk(&mas); @@ -3006,6 +3019,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, prot, flags, pgoff, &populate, NULL); fput(file); out: + rcu_read_unlock(); mmap_write_unlock(mm); if (populate) mm_populate(ret, populate); @@ -3022,7 +3036,8 @@ out: * @oldbrk: The end of the address to unmap * @uf: The userfaultfd list_head * - * Returns: 0 on success. + * Returns: 0 on success, 1 on success and downgraded write lock, negative + * otherwise. * unmaps a partial VMA mapping. Does not handle alignment, downgrades lock if * possible. */ @@ -3084,6 +3099,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma, munlock_vma_pages_range(&unmap, newbrk, oldbrk); } + mas_unlock(mas); mmap_write_downgrade(mm); unmap_region(mm, &unmap, mas, newbrk, oldbrk, vma, next ? next->vm_start : 0); @@ -3166,13 +3182,10 @@ static int do_brk_flags(struct ma_state *mas, struct ma_state *ma_prev, anon_vma_lock_write(vma->anon_vma); anon_vma_interval_tree_pre_update_vma(vma); } - mas_lock(ma_prev); vma->vm_end = addr + len; vma->vm_flags |= VM_SOFTDIRTY; - if (mas_store_gfp(ma_prev, vma, GFP_KERNEL)) { - mas_unlock(ma_prev); + if (mas_store_gfp(ma_prev, vma, GFP_KERNEL)) goto mas_mod_fail; - } if (vma->anon_vma) { anon_vma_interval_tree_post_update_vma(vma); @@ -3243,10 +3256,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (mmap_write_lock_killable(mm)) return -EINTR; + mas_lock(&mas); // This vma left intentionally blank. mas_walk(&mas); ret = do_brk_flags(&mas, &mas, &vma, addr, len, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); + mas_unlock(&mas); mmap_write_unlock(mm); if (populate && !ret) mm_populate(addr, len); @@ -3308,9 +3323,10 @@ void exit_mmap(struct mm_struct *mm) arch_exit_mmap(mm); + mas_lock(&mas); vma = mas_find(&mas, ULONG_MAX); if (!vma) { /* Can happen if dup_mmap() received an OOM */ - rcu_read_unlock(); + mas_unlock(&mas); return; } @@ -3323,6 +3339,7 @@ void exit_mmap(struct mm_struct *mm) unmap_vmas(&tlb, vma, &mas, 0, -1); free_pgtables(&tlb, &mas2, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); + mas_unlock(&mas); /* * Walk the list again, actually closing and freeing it, @@ -3347,12 +3364,16 @@ void exit_mmap(struct mm_struct *mm) */ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { - if (find_vma_intersection(mm, vma->vm_start, vma->vm_end)) - return -ENOMEM; + MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end - 1); + + mas_lock(&mas); + if (mas_find(&mas, vma->vm_end - 1)) + goto no_mem; if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) - return -ENOMEM; + goto no_mem; + /* * The vm_pgoff of a purely anonymous vma should be irrelevant @@ -3371,8 +3392,14 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; } - vma_link(mm, vma); + mas_reset(&mas); + vma_mas_link(mm, vma, &mas); + mas_unlock(&mas); return 0; + +no_mem: + mas_unlock(&mas); + return -ENOMEM; } /* diff --git a/mm/nommu.c b/mm/nommu.c index fa1e387c2a33..46dbdd4050a0 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -571,6 +571,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) BUG_ON(!vma->vm_region); + mas_lock(&mas); mm->map_count++; printk("mm at %u\n", mm->map_count); vma->vm_mm = mm; @@ -592,6 +593,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mas_reset(&mas); /* add the VMA to the tree */ vma_mas_store(vma, &mas); + mas_unlock(&mas); } /* @@ -601,6 +603,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) { MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0); + mas_lock(&mas); vma->vm_mm->map_count--; /* remove the VMA from the mapping */ if (vma->vm_file) { @@ -616,6 +619,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) /* remove from the MM's tree and list */ vma_mas_remove(vma, &mas); + mas_unlock(&mas); } /*