From: Liam Howlett Date: Thu, 27 Feb 2020 20:14:15 +0000 (-0500) Subject: mm: Add maple tree VMA tree X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=4098e28721d60ac0b5c2f1624f0d34b464eeab64;p=users%2Fjedix%2Flinux-maple.git mm: Add maple tree VMA tree Add a maple tree to parallel the rbtree. All operations are carried out in duplicate and verified to be sure they're returning the same answer. Signed-off-by: Liam Howlett Signed-off-by: Matthew Wilcox (Oracle) --- diff --git a/include/linux/mm.h b/include/linux/mm.h index dc7b87310c10..bbfb5cf28a72 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2472,6 +2472,8 @@ extern bool arch_has_descending_max_zone_pfns(void); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); +/* maple_tree */ +void vma_store(struct mm_struct *mm, struct vm_area_struct *vma); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 64ede5f150dc..90021d0c0530 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -384,6 +385,7 @@ struct kioctx_table; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ + struct maple_tree mm_mt; struct rb_root mm_rb; u64 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU diff --git a/kernel/fork.c b/kernel/fork.c index efc5493203ae..8b38eebf7e10 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -604,6 +604,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; + /* Link the vma into the MT */ + vma_store(mm, tmp); + mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(mm, oldmm, mpnt); @@ -1019,6 +1022,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, { mm->mmap = NULL; mm->mm_rb = RB_ROOT; + mt_init_flags(&mm->mm_mt, MAPLE_ALLOC_RANGE); mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); diff --git a/mm/init-mm.c b/mm/init-mm.c index 3a613c85f9ed..27229044a070 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -28,6 +29,7 @@ */ struct mm_struct init_mm = { .mm_rb = RB_ROOT, + .mm_mt = MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE), .pgd = swapper_pg_dir, .mm_users = ATOMIC_INIT(2), .mm_count = ATOMIC_INIT(1), diff --git a/mm/mmap.c b/mm/mmap.c index 59a4682ebf3f..67e182e995a6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -57,6 +57,12 @@ #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include +#define CONFIG_DEBUG_MAPLE_TREE +#undef CONFIG_DEBUG_MAPLE_TREE_VERBOSE +#define CONFIG_DEBUG_VM_RB 1 +extern void mt_validate(struct maple_tree *mt); #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) @@ -122,6 +128,7 @@ static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) } /* Update vma->vm_page_prot to reflect vma->vm_flags. */ +// LRH: Needed void vma_set_page_prot(struct vm_area_struct *vma) { unsigned long vm_flags = vma->vm_flags; @@ -139,6 +146,7 @@ void vma_set_page_prot(struct vm_area_struct *vma) /* * Requires inode->i_mapping->i_mmap_rwsem */ +// LRH: Needed static void __remove_shared_vm_struct(struct vm_area_struct *vma, struct file *file, struct address_space *mapping) { @@ -156,6 +164,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, * Unlink a file-based vm structure from its interval tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ +// LRH: Needed void unlink_file_vma(struct vm_area_struct *vma) { struct file *file = vma->vm_file; @@ -171,6 +180,7 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it, returning the next. */ +// LRH: Needed static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) { struct vm_area_struct *next = vma->vm_next; @@ -185,6 +195,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } +// LRH: Needed static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) @@ -286,6 +297,7 @@ out: return retval; } +// LRH: not needed. static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) { unsigned long gap, prev_end; @@ -373,7 +385,98 @@ static int browse_rb(struct mm_struct *mm) } return bug ? -1 : i; } +#if defined(CONFIG_DEBUG_MAPLE_TREE) +extern void mt_dump(const struct maple_tree *mt); + +#if 0 +static void __vma_mt_dump(struct mm_struct *mm) +{ + struct vm_area_struct *entry = NULL; + + MA_STATE(mas, &mm->mm_mt, 0, 0); + + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + if (xa_is_zero(entry)) + continue; + + pr_debug("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, + entry->vm_end, mas.index, mas.last); + } + rcu_read_unlock(); +} +#endif +/* + * Validate the maple tree + */ +static void validate_mm_mt(struct mm_struct *mm, + struct vm_area_struct *ignore) +{ + struct maple_tree *mt = &mm->mm_mt; + struct vm_area_struct *vma_mt, *vma = mm->mmap; + + MA_STATE(mas, mt, 0, 0); + rcu_read_lock(); + mas_for_each(&mas, vma_mt, ULONG_MAX) { + if (xa_is_zero(vma_mt)) + continue; + + if (vma && vma == ignore) + vma = vma->vm_next; + + if (!vma) + break; + + if ((vma != vma_mt) || + (vma->vm_start != vma_mt->vm_start) || + (vma->vm_end != vma_mt->vm_end) || + (vma->vm_start != mas.index) || + (vma->vm_end -1 != mas.last)){ + pr_emerg("issue in %s\n", current->comm); + dump_stack(); +#ifdef CONFIG_DEBUG_VM + dump_vma(vma_mt); + pr_emerg("and next in rb\n"); + dump_vma(vma->vm_next); +#endif + pr_emerg("mt piv: %px %lu - %lu\n", vma_mt, + mas.index, mas.last); + pr_emerg("mt vma: %px %lu - %lu\n", vma_mt, + vma_mt->vm_start, vma_mt->vm_end); + pr_emerg("rb vma: %px %lu - %lu\n", vma, + vma->vm_start, vma->vm_end); + if (ignore) + pr_emerg("rb_skip %px %lu - %lu\n", ignore, + ignore->vm_start, ignore->vm_end); + pr_emerg("rb->next = %px %lu - %lu\n", vma->vm_next, + vma->vm_next->vm_start, vma->vm_next->vm_end); + + mt_dump(mas.tree); + if (vma_mt->vm_end != mas.last + 1) { + pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n", + mm, vma_mt->vm_start, vma_mt->vm_end, + mas.index, mas.last); + mt_dump(mas.tree); + } + VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm); + if (vma_mt->vm_start != mas.index) { + pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n", + mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end); + mt_dump(mas.tree); + } + VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm); + } + VM_BUG_ON(vma != vma_mt); + vma = vma->vm_next; + + } + VM_BUG_ON(vma); + + rcu_read_unlock(); + mt_validate(&mm->mm_mt); +} +#endif static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) { struct rb_node *nd; @@ -396,14 +499,17 @@ static void validate_mm(struct mm_struct *mm) while (vma) { struct anon_vma *anon_vma = vma->anon_vma; - struct anon_vma_chain *avc; +// struct anon_vma_chain *avc; +// pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end); if (anon_vma) { +// pr_cont(" anon"); anon_vma_lock_read(anon_vma); - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - anon_vma_interval_tree_verify(avc); +// list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) +// anon_vma_interval_tree_verify(avc); anon_vma_unlock_read(anon_vma); } +// pr_cont("\n"); highest_address = vm_end_gap(vma); vma = vma->vm_next; @@ -428,6 +534,7 @@ static void validate_mm(struct mm_struct *mm) } #else #define validate_mm_rb(root, ignore) do { } while (0) +#define validate_mm_mt(root, ignore) do { } while (0) #define validate_mm(mm) do { } while (0) #endif @@ -440,6 +547,7 @@ RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, * vma->vm_prev->vm_end values changed, without modifying the vma's position * in the rbtree. */ +// LRH: Not needed static void vma_gap_update(struct vm_area_struct *vma) { /* @@ -449,6 +557,7 @@ static void vma_gap_update(struct vm_area_struct *vma) vma_gap_callbacks_propagate(&vma->vm_rb, NULL); } +// LRH: Not needed static inline void vma_rb_insert(struct vm_area_struct *vma, struct rb_root *root) { @@ -458,6 +567,7 @@ static inline void vma_rb_insert(struct vm_area_struct *vma, rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } +// LRH: Make sure everywhere uses an mt_erase too.. static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) { /* @@ -468,6 +578,7 @@ static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } +// LRH: Not needed static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma, struct rb_root *root, struct vm_area_struct *ignore) @@ -560,10 +671,40 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, return 0; } +#if 0 +/* Private + * clean_overlaps() - Call do_munmap if there exists any mapping within @start + * to @end. Sets @pprev to the previous entry or NULL if none exists. + * + */ +static int clean_overlaps(struct mm_struct *mm, unsigned long start, + unsigned long len, struct vm_area_struct **pprev, + struct list_head *uf) +{ + struct vm_area_struct *vma; + MA_STATE(mas, &mm->mm_mt, start, start); + + *pprev = NULL; + + rcu_read_lock(); + vma = mas_find(&mas, start + len); + if (vma) + *pprev = mas_prev(&mas, 0); + rcu_read_unlock(); + + if (vma) { + if (do_munmap(mm, start, len, uf)) + return -ENOMEM; + } + return 0; +} +#endif + static unsigned long count_vma_pages_range(struct mm_struct *mm, unsigned long addr, unsigned long end) { unsigned long nr_pages = 0; + unsigned long nr_mt_pages = 0; struct vm_area_struct *vma; /* Find first overlaping mapping */ @@ -585,9 +726,17 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, nr_pages += overlap_len >> PAGE_SHIFT; } + mt_for_each(&mm->mm_mt, vma, addr, end) { + nr_mt_pages += + (min(end, vma->vm_end) - vma->vm_start) >> PAGE_SHIFT; + } + + VM_BUG_ON_MM(nr_pages != nr_mt_pages, mm); + return nr_pages; } +// LRH: Not needed void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { @@ -612,6 +761,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, vma_rb_insert(vma, &mm->mm_rb); } +// LRH: Needed static void __vma_link_file(struct vm_area_struct *vma) { struct file *file; @@ -630,12 +780,48 @@ static void __vma_link_file(struct vm_area_struct *vma) flush_dcache_mmap_unlock(mapping); } } - +static void __vma_mt_erase(struct mm_struct *mm, struct vm_area_struct *vma) +{ + trace___vma_mt_erase(mm, vma); +#if defined(CONFIG_DEBUG_MAPLE_TREE_VERBOSE) + printk("mt_mod %px, (%px), ERASE, %lu, %lu,", mm, vma, vma->vm_start, + vma->vm_end - 1); +#endif + mtree_erase(&mm->mm_mt, vma->vm_start); + mt_validate(&mm->mm_mt); +} +static void __vma_mt_szero(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + trace___vma_mt_szero(mm, start, end); +#if defined(CONFIG_DEBUG_MAPLE_TREE_VERBOSE) + printk("mt_mod %px, (%px), SNULL, %lu, %lu,", mm, NULL, start, + end - 1); +#endif + mtree_store_range(&mm->mm_mt, start, end - 1, NULL, GFP_KERNEL); +} +static void __vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma) +{ + trace___vma_mt_store(mm, vma); +#if defined(CONFIG_DEBUG_MAPLE_TREE_VERBOSE) + printk("mt_mod %px, (%px), STORE, %lu, %lu,", mm, vma, vma->vm_start, + vma->vm_end - 1); +#endif + mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma, + GFP_KERNEL); + mt_validate(&mm->mm_mt); +} +void vma_store(struct mm_struct *mm, struct vm_area_struct *vma) +{ + __vma_mt_store(mm, vma); +} +// LRH: Needed - update linked list, should fine. static void __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, struct rb_node **rb_link, struct rb_node *rb_parent) { + __vma_mt_store(mm, vma); __vma_link_list(mm, vma, prev); __vma_link_rb(mm, vma, rb_link, rb_parent); } @@ -665,6 +851,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, * Helper for vma_adjust() in the split_vma insert case: insert a vma into the * mm's list and rbtree. It has already been inserted into the interval tree. */ +extern void mt_dump(const struct maple_tree *mt); static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; @@ -677,6 +864,7 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) mm->map_count++; } +// LRH: Fixed. static __always_inline void __vma_unlink_common(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *ignore) @@ -708,6 +896,9 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, long adjust_next = 0; int remove_next = 0; + validate_mm(mm); + validate_mm_mt(mm, NULL); + if (next && !insert) { struct vm_area_struct *exporter = NULL, *importer = NULL; @@ -833,17 +1024,33 @@ again: } if (start != vma->vm_start) { + unsigned long old_start = vma->vm_start; vma->vm_start = start; + if (old_start < start) + __vma_mt_szero(mm, old_start, start); start_changed = true; } if (end != vma->vm_end) { + unsigned long old_end = vma->vm_end; vma->vm_end = end; + if (old_end > end) + __vma_mt_szero(mm, end - 1, old_end); end_changed = true; } + + if (end_changed || start_changed) { + __vma_mt_store(mm, vma); + } + vma->vm_pgoff = pgoff; if (adjust_next) { + // maple tree erase is unnecessary as the adjusting of the vma + // would have overwritten the area. next->vm_start += adjust_next << PAGE_SHIFT; next->vm_pgoff += adjust_next; + // the vma_store is necessary as the adjust_next may be + // negative and expand backwards. + __vma_mt_store(mm, next); } if (root) { @@ -858,6 +1065,8 @@ again: * vma_merge has merged next into vma, and needs * us to remove next before dropping the locks. */ + /* Since we have expanded over this vma, the maple tree will + * have overwritten by storing the value */ if (remove_next != 3) __vma_unlink_common(mm, next, next); else @@ -879,6 +1088,8 @@ again: * us to insert it before dropping the locks * (it may either follow vma or precede it). */ + /* maple tree store is done in the __vma_link call in this + * call graph */ __insert_vm_struct(mm, insert); } else { if (start_changed) @@ -977,6 +1188,7 @@ again: uprobe_mmap(insert); validate_mm(mm); + validate_mm_mt(mm, NULL); return 0; } @@ -1113,6 +1325,18 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, * parameter) may establish ptes with the wrong permissions of NNNN * instead of the right permissions of XXXX. */ + +//LRH: +// p = prev, n = next, a = add, nn = next next +// 0. Adding page over partial p, cannot merge +// 1. Adding page between p and n, all become p +// 2. Adding page between p and n, a merges with p +// 3. Adding page between p and n, a merges with n +// 4. Adding page over p, a merges with n +// 5. Adding page over n, a merges with p +// 6. Adding page over all of n, p-a-nn all become p +// 7. Adding page over all of n, p-a all become p +// 8. Adding page over all of n, a-nn all become nn. struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, @@ -1124,6 +1348,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *area, *next; int err; + validate_mm_mt(mm, NULL); /* * We later require that vma->vm_flags == vm_flags, * so this tests vma->vm_flags & VM_SPECIAL, too. @@ -1184,10 +1409,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, vm_userfaultfd_ctx)) { - if (prev && addr < prev->vm_end) /* case 4 */ + if (prev && addr < prev->vm_end){ /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next); - else { /* cases 3, 8 */ + }else { /* cases 3, 8 */ err = __vma_adjust(area, addr, next->vm_end, next->vm_pgoff - pglen, NULL, next); /* @@ -1202,6 +1427,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, khugepaged_enter_vma_merge(area, vm_flags); return area; } + validate_mm_mt(mm, NULL); return NULL; } @@ -1694,6 +1920,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, struct rb_node **rb_link, *rb_parent; unsigned long charged = 0; + validate_mm_mt(mm, NULL); /* Check against address space limit. */ if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { unsigned long nr_pages; @@ -1710,6 +1937,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, } /* Clear old maps */ + // if (clean_overlaps(mm, addr, len, &prev)) + // return -ENOMEM; while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { if (do_munmap(mm, addr, len, uf)) @@ -1792,6 +2021,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_anonymous(vma); } + // Drop this for maple uses later. vma_link(mm, vma, prev, rb_link, rb_parent); /* Once vma denies write, undo our temporary denial count */ if (file) { @@ -1828,6 +2058,7 @@ out: vma_set_page_prot(vma); + validate_mm_mt(mm, NULL); return addr; unmap_and_free_vma: @@ -1847,6 +2078,7 @@ free_vma: unacct_error: if (charged) vm_unacct_memory(charged); + validate_mm_mt(mm, NULL); return error; } @@ -1863,12 +2095,24 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; + unsigned long gap; + MA_STATE(mas, &mm->mm_mt, 0, 0); /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; + + // Maple tree is self contained. + rcu_read_lock(); + if (mas_get_unmapped_area(&mas, info->low_limit, + info->high_limit - 1, length)) + return -ENOMEM; + rcu_read_unlock(); + gap = mas.index; + gap += (info->align_offset - gap) & info->align_mask; + /* Adjust search limits by the desired length */ if (info->high_limit < length) return -ENOMEM; @@ -1950,20 +2194,33 @@ found: VM_BUG_ON(gap_start + info->length > info->high_limit); VM_BUG_ON(gap_start + info->length > gap_end); + + VM_BUG_ON(gap != gap_start); return gap_start; } static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_area_struct *vma = NULL; unsigned long length, low_limit, high_limit, gap_start, gap_end; + unsigned long gap; + MA_STATE(mas, &mm->mm_mt, 0, 0); + validate_mm_mt(mm, NULL); /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; + rcu_read_lock(); + if (mas_get_unmapped_area_rev(&mas, info->low_limit, info->high_limit, + length)) + return -ENOMEM; + + rcu_read_unlock(); + gap = (mas.index + info->align_mask) & ~info->align_mask; + gap -= info->align_offset & info->align_mask; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). @@ -2049,6 +2306,30 @@ found_highest: VM_BUG_ON(gap_end < info->low_limit); VM_BUG_ON(gap_end < gap_start); + + if (gap != gap_end) { + struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr); + pr_err("%s: %px Gap was found: mt %lu gap_end %lu\n", __func__, + mm, gap, gap_end); + pr_err("window was %lu - %lu size %lu\n", info->high_limit, + info->low_limit, length); + pr_err("mas.min %lu max %lu mas.last %lu\n", mas.min, mas.max, + mas.last); + pr_err("mas.index %lu align mask %lu offset %lu\n", mas.index, + info->align_mask, info->align_offset); + pr_err("rb_find_vma find on %lu => %px (%px)\n", mas.index, + rb_find_vma(mm, mas.index), vma); + mt_dump(&mm->mm_mt); + { + struct vm_area_struct *dv = mm->mmap; + while (dv) { + printk("vma %px %lu-%lu\n", dv, dv->vm_start, dv->vm_end); + dv = dv->vm_next; + } + } + VM_BUG_ON(gap != gap_end); + } + return gap_end; } @@ -2230,8 +2511,33 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, EXPORT_SYMBOL(get_unmapped_area); +/** + * mt_find_vma() - Find the VMA for a given address, or the next vma. May return + * NULL in the case of no vma at addr or above + * @mm The mm_struct to check + * @addr: The address + * + * Returns: The VMA assoicated with addr, or the next vma. + * May return NULL in the case of no vma at addr or above. + */ +struct vm_area_struct *mt_find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma; + + /* Check the cache first. */ + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; + + vma = mt_find(&mm->mm_mt, &addr, ULONG_MAX); + + if (vma) + vmacache_update(addr, vma); + return vma; +} + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr) { struct rb_node *rb_node; struct vm_area_struct *vma; @@ -2262,13 +2568,49 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) return vma; } +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *ret = rb_find_vma(mm, addr); + struct vm_area_struct *mt_ret = mt_find_vma(mm, addr); + if (ret != mt_ret) { + pr_err("Looking for %lu\n", addr); + mt_dump(&mm->mm_mt); + pr_err("%px %lu: ret %px mt_ret %px\n", mm, addr, ret, mt_ret); + } + VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)mt_ret , ret); + return ret; +} EXPORT_SYMBOL(find_vma); +/** + * mt_find_vma_prev() - Find the VMA for a given address, or the next vma and + * sets %pprev to the previous VMA, if any. + * @mm The mm_struct to check + * @addr: The address + * @pprev: The pointer to set to the previous VMA + * + * Returns: The VMA associated with @addr, or the next vma. + * May return NULL in the case of no vma at addr or above. + */ +struct vm_area_struct * +mt_find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *vma; + MA_STATE(mas, &mm->mm_mt, addr, addr); + + rcu_read_lock(); + vma = mas_find(&mas, ULONG_MAX); + *pprev = mas_prev(&mas, 0); + rcu_read_unlock(); + return vma; +} + /* * Same as find_vma, but also return a pointer to the previous VMA in *pprev. */ struct vm_area_struct * -find_vma_prev(struct mm_struct *mm, unsigned long addr, +rb_find_vma_prev(struct mm_struct *mm, unsigned long addr, struct vm_area_struct **pprev) { struct vm_area_struct *vma; @@ -2284,6 +2626,19 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, return vma; } +struct vm_area_struct * +find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *mt_prev; + struct vm_area_struct *ret = rb_find_vma_prev(mm, addr, pprev); + VM_BUG_ON_VMA((unsigned long)ret != + (unsigned long)mt_find_vma_prev(mm, addr, &mt_prev), + ret); + VM_BUG_ON_VMA(mt_prev != *pprev, *pprev); + return ret; +} + /* * Verify that the stack growth is acceptable and * update accounting. This is shared with both the @@ -2342,6 +2697,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) unsigned long gap_addr; int error = 0; + validate_mm_mt(mm, NULL); if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; @@ -2418,6 +2774,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); + validate_mm_mt(mm, NULL); return error; } #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ @@ -2432,6 +2789,7 @@ int expand_downwards(struct vm_area_struct *vma, struct vm_area_struct *prev; int error = 0; + validate_mm(mm); address &= PAGE_MASK; if (address < mmap_min_addr) return -EPERM; @@ -2485,6 +2843,8 @@ int expand_downwards(struct vm_area_struct *vma, anon_vma_interval_tree_pre_update_vma(vma); vma->vm_start = address; vma->vm_pgoff -= grow; + // Overwrite old entry in mtree. + __vma_mt_store(mm, vma); anon_vma_interval_tree_post_update_vma(vma); vma_gap_update(vma); spin_unlock(&mm->page_table_lock); @@ -2592,7 +2952,7 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) vma = remove_vma(vma); } while (vma); vm_unacct_memory(nr_accounted); - validate_mm(mm); + //validate_mm(mm); } /* @@ -2631,6 +2991,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, vma->vm_prev = NULL; do { vma_rb_erase(vma, &mm->mm_rb); + __vma_mt_erase(mm, vma); mm->map_count--; tail_vma = vma; vma = vma->vm_next; @@ -2656,6 +3017,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, { struct vm_area_struct *new; int err; + validate_mm_mt(mm, NULL); if (vma->vm_ops && vma->vm_ops->split) { err = vma->vm_ops->split(vma, addr); @@ -2708,6 +3070,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, mpol_put(vma_policy(new)); out_free_vma: vm_area_free(new); + validate_mm_mt(mm, NULL); return err; } @@ -2995,6 +3358,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla pgoff_t pgoff = addr >> PAGE_SHIFT; int error; unsigned long mapped_addr; + validate_mm_mt(mm, NULL); /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) @@ -3012,6 +3376,8 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* * Clear old maps. this also does some error checking for us */ + //if (clean_overlaps(mm, addr, len, &prev)) + // return -ENOMEM; while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { if (do_munmap(mm, addr, len, uf)) @@ -3057,6 +3423,7 @@ out: if (flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); vma->vm_flags |= VM_SOFTDIRTY; + validate_mm_mt(mm, NULL); return 0; } @@ -3160,6 +3527,13 @@ void exit_mmap(struct mm_struct *mm) nr_accounted += vma_pages(vma); vma = remove_vma(vma); } + + mtree_destroy(&mm->mm_mt); + trace_exit_mmap(mm); +#if defined(CONFIG_DEBUG_MAPLE_TREE_VERBOSE) + printk("mt_mod %px, (%px), DESTROY", mm, &mm->mm_mt); +#endif + vm_unacct_memory(nr_accounted); } @@ -3171,10 +3545,21 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; + unsigned long start = vma->vm_start; + struct vm_area_struct *overlap = NULL; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) return -ENOMEM; + + if ((overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1)) != NULL) { + pr_err("Found vma ending at %lu\n", start - 1); + pr_err("vma : %lu => %lu-%lu\n", (unsigned long)overlap, + overlap->vm_start, overlap->vm_end - 1); + mt_dump(&mm->mm_mt); + BUG(); + } + if ((vma->vm_flags & VM_ACCOUNT) && security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; @@ -3214,7 +3599,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; bool faulted_in_anon_vma = true; + unsigned long index = addr; + validate_mm_mt(mm, NULL); /* * If anonymous vma has not yet been faulted, update new pgoff * to match new location, to increase its chance of merging. @@ -3226,6 +3613,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) return NULL; /* should never get here */ + if (mt_find(&mm->mm_mt, &index, addr+len - 1)) + BUG(); new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx); @@ -3269,6 +3658,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, vma_link(mm, new_vma, prev, rb_link, rb_parent); *need_rmap_locks = false; } + validate_mm_mt(mm, NULL); return new_vma; out_free_mempol: @@ -3276,6 +3666,7 @@ out_free_mempol: out_free_vma: vm_area_free(new_vma); out: + validate_mm_mt(mm, NULL); return NULL; } @@ -3400,6 +3791,7 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; + validate_mm_mt(mm, NULL); vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); @@ -3421,10 +3813,12 @@ static struct vm_area_struct *__install_special_mapping( perf_event_mmap(vma); + validate_mm_mt(mm, NULL); return vma; out: vm_area_free(vma); + validate_mm_mt(mm, NULL); return ERR_PTR(ret); } diff --git a/mm/mprotect.c b/mm/mprotect.c index ce8b8a5eacbb..b0c46b084478 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -407,6 +407,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, int error; int dirty_accountable = 0; + //printk("vma area %lu-%lu\n", vma->vm_start, vma->vm_end); if (newflags == oldflags) { *pprev = vma; return 0;