From c22e7f5038bc42f9f7cd9b47a4d4cfcfed76ffaa Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 16 Jan 2020 11:06:42 -0500 Subject: [PATCH] mm: Add maple tree to init-mm,mmap, mprotect, mm_types Signed-off-by: Liam R. Howlett --- include/linux/mm_types.h | 2 + include/linux/mmdebug.h | 2 +- init/main.c | 2 + mm/init-mm.c | 2 + mm/mmap.c | 398 +++++++++++++++++++++++++++++++++++---- mm/mprotect.c | 1 + 6 files changed, 367 insertions(+), 40 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..f2c725e1052f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -388,6 +389,7 @@ struct kioctx_table; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ + struct maple_tree mm_mt; struct rb_root mm_rb; u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..48ed391c41d5 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -13,7 +13,7 @@ extern void dump_page(struct page *page, const char *reason); extern void __dump_page(struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); - +#define CONFIG_DEBUG_VM #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) #define VM_BUG_ON_PAGE(cond, page) \ diff --git a/init/main.c b/init/main.c index 32b2a8affafd..506bcdbeb02d 100644 --- a/init/main.c +++ b/init/main.c @@ -114,6 +114,7 @@ static int kernel_init(void *); extern void init_IRQ(void); extern void radix_tree_init(void); +extern void maple_tree_init(void); /* * Debug helper: via this flag we know that we are in 'early bootup code' @@ -923,6 +924,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); radix_tree_init(); + maple_tree_init(); /* * Set up housekeeping before setting up workqueues to allow the unbound diff --git a/mm/init-mm.c b/mm/init-mm.c index 3a613c85f9ed..27229044a070 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -28,6 +29,7 @@ */ struct mm_struct init_mm = { .mm_rb = RB_ROOT, + .mm_mt = MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE), .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), diff --git a/mm/mmap.c b/mm/mmap.c index 5c8b4485860d..2d04d0a948b7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -122,6 +122,7 @@ static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) } /* Update vma->vm_page_prot to reflect vma->vm_flags. */ +// LRH: Needed void vma_set_page_prot(struct vm_area_struct *vma) { unsigned long vm_flags = vma->vm_flags; @@ -139,6 +140,7 @@ void vma_set_page_prot(struct vm_area_struct *vma) /* * Requires inode->i_mapping->i_mmap_rwsem */ +// LRH: Needed static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) { @@ -156,6 +158,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, * Unlink a file-based vm structure from its interval tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ +// LRH: Needed void unlink_file_vma(struct vm_area_struct *vma) { struct file *file = vma->vm_file; @@ -171,6 +174,7 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it, returning the next. */ +// LRH: Needed static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) { struct vm_area_struct *next = vma->vm_next; @@ -185,6 +189,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } +// LRH: Needed static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) @@ -286,6 +291,7 @@ out: return retval; } +// LRH: not needed. static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) { unsigned long gap, prev_end; @@ -306,8 +312,9 @@ static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) } return gap; } - -#ifdef CONFIG_DEBUG_VM_RB +#define CONFIG_DEBUG_VM_RB +#define CONFIG_DEBUG_MAPLE_TREE +#if defined(CONFIG_DEBUG_VM_RB) || defined(CONFIG_DEBUG_MAPLE_TREE) static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) { unsigned long max = vma_compute_gap(vma), subtree_gap; @@ -373,7 +380,40 @@ static int browse_rb(struct mm_struct *mm) } return bug ? -1 : i; } +extern void mt_dump(const struct maple_tree *mt); + +/* Validate the maple tree + * + */ +static void validate_mm_mt(struct mm_struct *mm, + struct vm_area_struct *ignore) +{ + struct maple_tree *mt = &mm->mm_mt; + struct vm_area_struct *vma_mt, *vma = mm->mmap; + MA_STATE(mas, mt, 0, 0); + rcu_read_lock(); + mas_for_each(&mas, vma_mt, ULONG_MAX) { + if (mas_retry(&mas, vma_mt)) + continue; + + if (!vma) + break; + if (vma != vma_mt) { + pr_emerg("mt: %px %lu - %lu\n", vma_mt, + vma_mt->vm_start, vma_mt->vm_end); + pr_emerg("rb: %px %lu - %lu\n", vma, + vma->vm_start, vma->vm_end); + } + VM_BUG_ON(vma != vma_mt); + if (vma) + vma = vma->vm_next; + } + VM_BUG_ON(vma); + + rcu_read_unlock(); + //printk("%s: done\n", __func__); +} static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) { struct rb_node *nd; @@ -386,7 +426,6 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) vma); } } - static void validate_mm(struct mm_struct *mm) { int bug = 0; @@ -394,21 +433,54 @@ static void validate_mm(struct mm_struct *mm) unsigned long highest_address = 0; struct vm_area_struct *vma = mm->mmap; + struct maple_tree *mt = &mm->mm_mt; + MA_STATE(mas, mt, 0, 0); + struct vm_area_struct *entry = NULL; + unsigned long mt_highest_address = 0; + int mt_i = 0; + + + //printk("%s: vma linked list\n", __func__); while (vma) { struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; + //pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end); if (anon_vma) { + // pr_cont(" anon"); anon_vma_lock_read(anon_vma); - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - anon_vma_interval_tree_verify(avc); +// list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) +// anon_vma_interval_tree_verify(avc); anon_vma_unlock_read(anon_vma); } + //pr_cont("\n"); highest_address = vm_end_gap(vma); vma = vma->vm_next; i++; } + //printk("%s: mas for each\n", __func__); + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + if (mas_retry(&mas, entry)) + continue; + + // printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end, + // mas.index, mas.last); + VM_BUG_ON_MM(entry->vm_end != mas.last + 1, mm); + VM_BUG_ON_MM(entry->vm_start != mas.index, mm); + mt_highest_address = vm_end_gap(entry); + mt_i++; + } + rcu_read_unlock(); + //printk("%s: mas for each done\n", __func__); + if (i != mt_i) { + pr_emerg("%s: %d != %d\n", __func__, i, mt_i); + mt_dump(mas.tree); + } + VM_BUG_ON_MM(i != mt_i, mm); + VM_BUG_ON_MM(mt_highest_address != highest_address, mm); + if (i != mm->map_count) { pr_emerg("map_count %d vm_next %d\n", mm->map_count, i); bug = 1; @@ -440,6 +512,7 @@ RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, * vma->vm_prev->vm_end values changed, without modifying the vma's position * in the rbtree. */ +// LRH: Not needed static void vma_gap_update(struct vm_area_struct *vma) { /* @@ -449,15 +522,18 @@ static void vma_gap_update(struct vm_area_struct *vma) vma_gap_callbacks_propagate(&vma->vm_rb, NULL); } +// LRH: Not needed static inline void vma_rb_insert(struct vm_area_struct *vma, struct rb_root *root) { /* All rb_subtree_gap values must be consistent prior to insertion */ validate_mm_rb(root, NULL); + //printk("insert augmented %lu-%lu\n", vma->vm_start, vma->vm_end); rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } +// LRH: Make sure everywhere uses an mt_erase too.. static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) { /* @@ -468,6 +544,7 @@ static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } +// LRH: Not needed static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma, struct rb_root *root, struct vm_area_struct *ignore) @@ -539,6 +616,8 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, __rb_parent = *__rb_link; vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); + //printk("%s: checking %lu-%lu\n", __func__, + // vma_tmp->vm_start, vma_tmp->vm_end); if (vma_tmp->vm_end > addr) { /* Fail if an existing vma overlaps the area */ if (vma_tmp->vm_start < end) @@ -602,10 +681,39 @@ munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len, return 0; } + +/* Private + * clean_overlaps() - Call do_munmap if there exists any mapping within @start + * to @end. Sets @pprev to the previous entry or NULL if none exists. + * + */ +static int clean_overlaps(struct mm_struct *mm, unsigned long start, + unsigned long len, struct vm_area_struct **pprev, + struct list_head *uf) +{ + struct vm_area_struct *vma; + MA_STATE(mas, &mm->mm_mt, start, start); + + *pprev = NULL; + + rcu_read_lock(); + vma = mas_find(&mas, start + len); + if (vma) + *pprev = mas_prev(&mas, 0); + rcu_read_unlock(); + + if (vma) { + if (do_munmap(mm, start, len, uf)) + return -ENOMEM; + } + return 0; +} + static unsigned long count_vma_pages_range(struct mm_struct *mm, unsigned long addr, unsigned long end) { unsigned long nr_pages = 0; + unsigned long nr_mt_pages = 0; struct vm_area_struct *vma; /* Find first overlaping mapping */ @@ -627,9 +735,17 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, nr_pages += overlap_len >> PAGE_SHIFT; } + mt_for_each(&mm->mm_mt, vma, addr, end) { + nr_mt_pages += + (min(end, vma->vm_end) - vma->vm_start) >> PAGE_SHIFT; + } + + VM_BUG_ON_MM(nr_pages != nr_mt_pages, mm); + return nr_pages; } +// LRH: Not needed void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { @@ -654,6 +770,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, vma_rb_insert(vma, &mm->mm_rb); } +// LRH: Needed static void __vma_link_file(struct vm_area_struct *vma) { struct file *file; @@ -672,14 +789,44 @@ static void __vma_link_file(struct vm_area_struct *vma) flush_dcache_mmap_unlock(mapping); } } +static void __vma_mt_dump(struct mm_struct *mm) +{ + MA_STATE(mas, &mm->mm_mt, 0, 0); + struct vm_area_struct *entry = NULL; + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + if (mas_retry(&mas, entry)) + continue; + + printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end, + mas.index, mas.last); + } + rcu_read_unlock(); +} +static void __vma_mt_erase(struct mm_struct *mm, struct vm_area_struct *vma) +{ + printk("mt_mod %px ERASE, %lu, %lu,\n", + mm, vma->vm_start, vma->vm_start); + mtree_erase(&mm->mm_mt, vma->vm_start); +} +static void __vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma) +{ + printk("mt_mod %px STORE, %lu, %lu,\n", + mm, vma->vm_start, vma->vm_end - 1); + mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma, + GFP_KERNEL); +} +// LRH: Needed - update linked list, should fine. static void __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent) { + __vma_mt_store(mm, vma); __vma_link_list(mm, vma, prev); __vma_link_rb(mm, vma, rb_link, rb_parent); + validate_mm_mt(mm, NULL); } static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, @@ -702,29 +849,50 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, mm->map_count++; validate_mm(mm); } - /* * Helper for vma_adjust() in the split_vma insert case: insert a vma into the * mm's list and rbtree. It has already been inserted into the interval tree. */ +extern void mt_dump(const struct maple_tree *mt); static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; + unsigned long vm_start = vma->vm_start; + struct vm_area_struct *overlap = NULL; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) BUG(); + + //printk("going to insert %lx: vma %lu-%lu\n", (unsigned long) current, vma->vm_start, vma->vm_end); + if ((overlap = mt_find(&mm->mm_mt, &vm_start, vma->vm_end - 1, true)) != NULL) { + /* + printk("Found vma ending at %lu\n", vm_start - 1); + printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap, + overlap->vm_start, overlap->vm_end); + printk("rbtree:\n"); + */ + browse_rb(mm); +#define CONFIG_DEBUG_MAPLE_TREE + //mt_dump(&mm->mm_mt); + } __vma_link(mm, vma, prev, rb_link, rb_parent); mm->map_count++; } +<<<<<<< HEAD static __always_inline void __vma_unlink(struct mm_struct *mm, +======= +// LRH: Fixed. +static __always_inline void __vma_unlink_common(struct mm_struct *mm, +>>>>>>> 6942ca05fee78... mm: Add maple tree to init-mm,mmap, mprotect, mm_types struct vm_area_struct *vma, struct vm_area_struct *ignore) { vma_rb_erase_ignore(vma, &mm->mm_rb, ignore); __vma_unlink_list(mm, vma); + __vma_mt_erase(mm, vma); /* Kill the cache */ vmacache_invalidate(mm); } @@ -750,6 +918,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, long adjust_next = 0; int remove_next = 0; + validate_mm(mm); if (next && !insert) { struct vm_area_struct *exporter = NULL, *importer = NULL; @@ -859,12 +1028,14 @@ again: if (!anon_vma && adjust_next) anon_vma = next->anon_vma; if (anon_vma) { + browse_rb(mm); VM_WARN_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) anon_vma_interval_tree_pre_update_vma(next); + browse_rb(mm); } if (file) { @@ -876,16 +1047,31 @@ again: if (start != vma->vm_start) { vma->vm_start = start; + if (vma->vm_start < start) + __vma_mt_erase(mm, vma); start_changed = true; } if (end != vma->vm_end) { + if (vma->vm_end > end) + __vma_mt_erase(mm, vma); vma->vm_end = end; end_changed = true; } + + if (end_changed || start_changed) { + __vma_mt_store(mm, vma); + } + vma->vm_pgoff = pgoff; if (adjust_next) { +<<<<<<< HEAD next->vm_start += adjust_next; next->vm_pgoff += adjust_next >> PAGE_SHIFT; +======= + next->vm_start += adjust_next << PAGE_SHIFT; + next->vm_pgoff += adjust_next; + __vma_mt_store(mm, next); +>>>>>>> 6942ca05fee78... mm: Add maple tree to init-mm,mmap, mprotect, mm_types } if (file) { @@ -925,6 +1111,7 @@ again: } else { if (start_changed) vma_gap_update(vma); + if (end_changed) { if (!next) mm->highest_vm_end = vm_end_gap(vma); @@ -943,7 +1130,6 @@ again: if (file) { i_mmap_unlock_write(mapping); uprobe_mmap(vma); - if (adjust_next) uprobe_mmap(next); } @@ -1154,6 +1340,18 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, * parameter) may establish ptes with the wrong permissions of NNNN * instead of the right permissions of XXXX. */ + +//LRH: +// p = prev, n = next, a = add, nn = next next +// 0. Adding page over partial p, cannot merge +// 1. Adding page between p and n, all become p +// 2. Adding page between p and n, a merges with p +// 3. Adding page between p and n, a merges with n +// 4. Adding page over p, a merges with n +// 5. Adding page over n, a merges with p +// 6. Adding page over all of n, p-a-nn all become p +// 7. Adding page over all of n, p-a all become p +// 8. Adding page over all of n, a-nn all become nn. struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, @@ -1177,6 +1375,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (area && area->vm_end == end) /* cases 6, 7, 8 */ next = next->vm_next; + //printk("%s: %lx: vma %lu-%lu\n", __func__, (unsigned long) current, addr, end); /* verify some invariant that must be enforced by the caller */ VM_WARN_ON(prev && addr <= prev->vm_start); VM_WARN_ON(area && end > area->vm_end); @@ -1222,10 +1421,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, vm_userfaultfd_ctx)) { - if (prev && addr < prev->vm_end) /* case 4 */ + if (prev && addr < prev->vm_end){ /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next); - else { /* cases 3, 8 */ + }else { /* cases 3, 8 */ err = __vma_adjust(area, addr, next->vm_end, next->vm_pgoff - pglen, NULL, next); /* @@ -1929,6 +2128,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; + MA_STATE(mas, &mm->mm_mt, 0, 0); /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; @@ -1944,6 +2144,12 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) return -ENOMEM; low_limit = info->low_limit + length; + // Maple tree is self contained. + rcu_read_lock(); + if (mas_get_unmapped_area(&mas, low_limit, high_limit, length)) + return -ENOMEM; + rcu_read_unlock(); + /* Check if rbtree root looks promising */ if (RB_EMPTY_ROOT(&mm->mm_rb)) goto check_highest; @@ -2016,6 +2222,8 @@ found: VM_BUG_ON(gap_start + info->length > info->high_limit); VM_BUG_ON(gap_start + info->length > gap_end); + + //VM_BUG_ON(mas.index != gap_start); return gap_start; } @@ -2024,6 +2232,7 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; + MA_STATE(mas, &mm->mm_mt, 0, 0); /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; @@ -2043,6 +2252,12 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) return -ENOMEM; low_limit = info->low_limit + length; + + rcu_read_lock(); + if (mas_get_unmapped_area_rev(&mas, low_limit, high_limit, length)) + return -ENOMEM; + rcu_read_unlock(); + /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; if (gap_start <= high_limit) @@ -2115,6 +2330,8 @@ found_highest: VM_BUG_ON(gap_end < info->low_limit); VM_BUG_ON(gap_end < gap_start); + + VM_BUG_ON(mas.last + 1 != gap_end); return gap_end; } @@ -2296,8 +2513,35 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, EXPORT_SYMBOL(get_unmapped_area); +/** + * mt_find_vma() - Find the VMA for a given address, or the next vma. May return + * NULL in the case of no vma at addr or above + * @mm The mm_struct to check + * @addr: The address + * + * Returns: The VMA assoicated with addr, or the next vma. + * May return NULL in the case of no vma at addr or above. + */ +struct vm_area_struct *mt_find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + + //printk("%s: looking up %lu\n", __func__, addr); + /* Check the cache first. */ + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; + + vma = mt_find(&mm->mm_mt, &addr, ULONG_MAX, 0); + //printk("Found %lu\n", (unsigned long)vma); + + if (vma) + vmacache_update(addr, vma); + return vma; +} + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr) { struct rb_node *rb_node; struct vm_area_struct *vma; @@ -2328,13 +2572,49 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) return vma; } +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *ret = rb_find_vma(mm, addr); + struct vm_area_struct *ret2 = mt_find_vma(mm, addr); + if (ret != ret2) + printk("%px %lu: ret %px ret2 %px\n", mm, addr, ret, ret2); + VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)ret2 , ret); + return ret; +} + EXPORT_SYMBOL(find_vma); + +/** + * mt_find_vma_prev() - Find the VMA for a given address, or the next vma and + * sets %pprev to the previous VMA, if any. + * @mm The mm_struct to check + * @addr: The address + * @pprev: The pointer to set to the previous VMA + * + * Returns: The VMA associated with @addr, or the next vma. + * May return NULL in the case of no vma at addr or above. + */ +struct vm_area_struct * +mt_find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *vma; + MA_STATE(mas, &mm->mm_mt, addr, addr); + + rcu_read_lock(); + vma = mas_find(&mas, ULONG_MAX); + *pprev = mas_prev(&mas, 0); + rcu_read_unlock(); + return vma; +} + /* * Same as find_vma, but also return a pointer to the previous VMA in *pprev. */ + struct vm_area_struct * -find_vma_prev(struct mm_struct *mm, unsigned long addr, +rb_find_vma_prev(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev) { struct vm_area_struct *vma; @@ -2345,11 +2625,25 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, } else { struct rb_node *rb_node = rb_last(&mm->mm_rb); - *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL; + *pprev = rb_node ? + rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL; } return vma; } + +struct vm_area_struct * +find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *mt_prev; + struct vm_area_struct *ret = rb_find_vma_prev(mm, addr, pprev); + VM_BUG_ON_VMA((unsigned long)ret != + (unsigned long)mt_find_vma_prev(mm, addr, &mt_prev), + ret); + VM_BUG_ON_VMA(mt_prev != *pprev, *pprev); + return ret; +} /* * Verify that the stack growth is acceptable and * update accounting. This is shared with both the @@ -2411,6 +2705,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; + printk("%s: %lu-%lu expand to %lu-%lu\n", __func__, vma->vm_start, vma->vm_end, + vma->vm_start, address); /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; if (address >= (TASK_SIZE & PAGE_MASK)) @@ -2498,10 +2794,12 @@ int expand_downwards(struct vm_area_struct *vma, struct vm_area_struct *prev; int error = 0; + validate_mm(mm); address &= PAGE_MASK; if (address < mmap_min_addr) return -EPERM; + /* Enforce stack_guard_gap */ prev = vma->vm_prev; /* Check that both stack segments have the same anon_vma? */ @@ -2530,35 +2828,40 @@ int expand_downwards(struct vm_area_struct *vma, grow = (vma->vm_start - address) >> PAGE_SHIFT; error = -ENOMEM; - if (grow <= vma->vm_pgoff) { - error = acct_stack_growth(vma, size, grow); - if (!error) { - /* - * vma_gap_update() doesn't support concurrent - * updates, but we only hold a shared mmap_lock - * lock here, so we need to protect against - * concurrent vma expansions. - * anon_vma_lock_write() doesn't help here, as - * we don't guarantee that all growable vmas - * in a mm share the same root anon vma. - * So, we reuse mm->page_table_lock to guard - * against concurrent vma expansions. - */ - spin_lock(&mm->page_table_lock); - if (vma->vm_flags & VM_LOCKED) - mm->locked_vm += grow; - vm_stat_account(mm, vma->vm_flags, grow); - anon_vma_interval_tree_pre_update_vma(vma); - vma->vm_start = address; - vma->vm_pgoff -= grow; - anon_vma_interval_tree_post_update_vma(vma); - vma_gap_update(vma); - spin_unlock(&mm->page_table_lock); + if (grow > vma->vm_pgoff) + goto no_update; - perf_event_mmap(vma); - } - } + error = acct_stack_growth(vma, size, grow); + if (error) + goto no_update; + /* + * vma_gap_update() doesn't support concurrent + * updates, but we only hold a shared mmap_sem + * lock here, so we need to protect against + * concurrent vma expansions. + * anon_vma_lock_write() doesn't help here, as + * we don't guarantee that all growable vmas + * in a mm share the same root anon vma. + * So, we reuse mm->page_table_lock to guard + * against concurrent vma expansions. + */ + spin_lock(&mm->page_table_lock); + if (vma->vm_flags & VM_LOCKED) + mm->locked_vm += grow; + vm_stat_account(mm, vma->vm_flags, grow); + anon_vma_interval_tree_pre_update_vma(vma); + vma->vm_start = address; + vma->vm_pgoff -= grow; + // Overwrite old entry in mtree. + mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, + vma, GFP_KERNEL); + anon_vma_interval_tree_post_update_vma(vma); + vma_gap_update(vma); + spin_unlock(&mm->page_table_lock); + + perf_event_mmap(vma); } +no_update: anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); @@ -2694,6 +2997,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma->vm_prev = NULL; do { vma_rb_erase(vma, &mm->mm_rb); + __vma_mt_erase(mm, vma); mm->map_count--; tail_vma = vma; vma = vma->vm_next; @@ -3088,6 +3392,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) return -ENOMEM; + /* Check against address space limits *after* clearing old maps... */ if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) return -ENOMEM; @@ -3242,10 +3547,22 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; + unsigned long start = vma->vm_start; + struct vm_area_struct *overlap = NULL; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) return -ENOMEM; + + //printk("%s: insert %lx: vma %lu-%lu\n", __func__, (unsigned long) current, vma->vm_start, vma->vm_end); + if ((overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1, true)) != NULL) { + printk("Found vma ending at %lu\n", start - 1); + printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap, + overlap->vm_start, overlap->vm_end - 1); + mt_dump(&mm->mm_mt); + BUG(); + } + if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; @@ -3285,6 +3602,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; bool faulted_in_anon_vma = true; + unsigned long index = addr; /* * If anonymous vma has not yet been faulted, update new pgoff @@ -3297,6 +3615,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) return NULL; /* should never get here */ + if (mt_find(&mm->mm_mt, &index, addr+len - 1, true)) + BUG(); new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); diff --git a/mm/mprotect.c b/mm/mprotect.c index 56c02beb6041..1505664dbbd4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -407,6 +407,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, int error; int dirty_accountable = 0; + printk("vma area %lu-%lu\n", vma->vm_start, vma->vm_end); if (newflags == oldflags) { *pprev = vma; return 0; -- 2.50.1