]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: Add maple tree to init-mm,mmap, mprotect, mm_types
authorLiam R. Howlett <Liam.Howlett@Oracle.com>
Thu, 16 Jan 2020 16:06:42 +0000 (11:06 -0500)
committerLiam R. Howlett <Liam.Howlett@Oracle.com>
Tue, 5 Jan 2021 17:28:19 +0000 (12:28 -0500)
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
include/linux/mm_types.h
include/linux/mmdebug.h
init/main.c
mm/init-mm.c
mm/mmap.c
mm/mprotect.c

index 5a9238f6caad97b40be7acb40f896b99d2f6f4ea..f2c725e1052f8817140843b6804ff8a70bba544d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
@@ -388,6 +389,7 @@ struct kioctx_table;
 struct mm_struct {
        struct {
                struct vm_area_struct *mmap;            /* list of VMAs */
+               struct maple_tree mm_mt;
                struct rb_root mm_rb;
                u64 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
index 2ad72d2c8cc528a2103556eeadd657389f5ece35..48ed391c41d52582f3c3678319c693c711a93574 100644 (file)
@@ -13,7 +13,7 @@ extern void dump_page(struct page *page, const char *reason);
 extern void __dump_page(struct page *page, const char *reason);
 void dump_vma(const struct vm_area_struct *vma);
 void dump_mm(const struct mm_struct *mm);
-
+#define CONFIG_DEBUG_VM
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #define VM_BUG_ON_PAGE(cond, page)                                     \
index 32b2a8affafd1b434ec319a1bfb842e7ac7a298f..506bcdbeb02d81b1265c482d9959977a7e5bdde1 100644 (file)
@@ -114,6 +114,7 @@ static int kernel_init(void *);
 
 extern void init_IRQ(void);
 extern void radix_tree_init(void);
+extern void maple_tree_init(void);
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -923,6 +924,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
                 "Interrupts were enabled *very* early, fixing it\n"))
                local_irq_disable();
        radix_tree_init();
+       maple_tree_init();
 
        /*
         * Set up housekeeping before setting up workqueues to allow the unbound
index 3a613c85f9ede21e197ec7cf00d249b4085d4df4..27229044a070211f08e930c1070d8a6cc6d26572 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm_types.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
@@ -28,6 +29,7 @@
  */
 struct mm_struct init_mm = {
        .mm_rb          = RB_ROOT,
+       .mm_mt          = MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE),
        .pgd            = swapper_pg_dir,
        .mm_users       = ATOMIC_INIT(2),
        .mm_count       = ATOMIC_INIT(1),
index 5c8b4485860de42ad69f056c7f67ad38e3ad58e6..2d04d0a948b7627a390f117a84427ec3981e9eb1 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -122,6 +122,7 @@ static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 }
 
 /* Update vma->vm_page_prot to reflect vma->vm_flags. */
+// LRH: Needed
 void vma_set_page_prot(struct vm_area_struct *vma)
 {
        unsigned long vm_flags = vma->vm_flags;
@@ -139,6 +140,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
 /*
  * Requires inode->i_mapping->i_mmap_rwsem
  */
+// LRH: Needed
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                struct file *file, struct address_space *mapping)
 {
@@ -156,6 +158,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
  * Unlink a file-based vm structure from its interval tree, to hide
  * vma from rmap and vmtruncate before freeing its page tables.
  */
+// LRH: Needed
 void unlink_file_vma(struct vm_area_struct *vma)
 {
        struct file *file = vma->vm_file;
@@ -171,6 +174,7 @@ void unlink_file_vma(struct vm_area_struct *vma)
 /*
  * Close a vm structure and free it, returning the next.
  */
+// LRH: Needed
 static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 {
        struct vm_area_struct *next = vma->vm_next;
@@ -185,6 +189,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
        return next;
 }
 
+// LRH: Needed
 static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
                struct list_head *uf);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
@@ -286,6 +291,7 @@ out:
        return retval;
 }
 
+// LRH: not needed.
 static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
 {
        unsigned long gap, prev_end;
@@ -306,8 +312,9 @@ static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
        }
        return gap;
 }
-
-#ifdef CONFIG_DEBUG_VM_RB
+#define CONFIG_DEBUG_VM_RB
+#define CONFIG_DEBUG_MAPLE_TREE
+#if defined(CONFIG_DEBUG_VM_RB) || defined(CONFIG_DEBUG_MAPLE_TREE)
 static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
        unsigned long max = vma_compute_gap(vma), subtree_gap;
@@ -373,7 +380,40 @@ static int browse_rb(struct mm_struct *mm)
        }
        return bug ? -1 : i;
 }
+extern void mt_dump(const struct maple_tree *mt);
+
+/* Validate the maple tree
+ *
+ */
+static void validate_mm_mt(struct mm_struct *mm,
+               struct vm_area_struct *ignore)
+{
+       struct maple_tree *mt = &mm->mm_mt;
+       struct vm_area_struct *vma_mt, *vma = mm->mmap;
 
+       MA_STATE(mas, mt, 0, 0);
+       rcu_read_lock();
+       mas_for_each(&mas, vma_mt, ULONG_MAX) {
+               if (mas_retry(&mas, vma_mt))
+                       continue;
+
+               if (!vma)
+                       break;
+               if (vma != vma_mt) {
+                       pr_emerg("mt: %px %lu - %lu\n", vma_mt,
+                                vma_mt->vm_start, vma_mt->vm_end);
+                       pr_emerg("rb: %px %lu - %lu\n", vma,
+                                vma->vm_start, vma->vm_end);
+               }
+               VM_BUG_ON(vma != vma_mt);
+               if (vma)
+                       vma = vma->vm_next;
+       }
+       VM_BUG_ON(vma);
+
+       rcu_read_unlock();
+       //printk("%s: done\n", __func__);
+}
 static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
 {
        struct rb_node *nd;
@@ -386,7 +426,6 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
                        vma);
        }
 }
-
 static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
@@ -394,21 +433,54 @@ static void validate_mm(struct mm_struct *mm)
        unsigned long highest_address = 0;
        struct vm_area_struct *vma = mm->mmap;
 
+       struct maple_tree *mt = &mm->mm_mt;
+       MA_STATE(mas, mt, 0, 0);
+       struct vm_area_struct *entry = NULL;
+       unsigned long mt_highest_address = 0;
+       int mt_i = 0;
+
+
+       //printk("%s: vma linked list\n", __func__);
        while (vma) {
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
+               //pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end);
                if (anon_vma) {
+               //      pr_cont(" anon");
                        anon_vma_lock_read(anon_vma);
-                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-                               anon_vma_interval_tree_verify(avc);
+//                     list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+//                             anon_vma_interval_tree_verify(avc);
                        anon_vma_unlock_read(anon_vma);
                }
+               //pr_cont("\n");
 
                highest_address = vm_end_gap(vma);
                vma = vma->vm_next;
                i++;
        }
+       //printk("%s: mas for each\n", __func__);
+       rcu_read_lock();
+       mas_for_each(&mas, entry, ULONG_MAX) {
+               if (mas_retry(&mas, entry))
+                       continue;
+
+       //      printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end,
+       //                      mas.index, mas.last);
+               VM_BUG_ON_MM(entry->vm_end != mas.last + 1, mm);
+               VM_BUG_ON_MM(entry->vm_start != mas.index, mm);
+               mt_highest_address = vm_end_gap(entry);
+               mt_i++;
+       }
+       rcu_read_unlock();
+       //printk("%s: mas for each done\n", __func__);
+       if (i != mt_i) {
+               pr_emerg("%s: %d != %d\n", __func__, i, mt_i);
+               mt_dump(mas.tree);
+       }
+       VM_BUG_ON_MM(i != mt_i, mm);
+       VM_BUG_ON_MM(mt_highest_address != highest_address, mm);
+
        if (i != mm->map_count) {
                pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
                bug = 1;
@@ -440,6 +512,7 @@ RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
  * vma->vm_prev->vm_end values changed, without modifying the vma's position
  * in the rbtree.
  */
+// LRH: Not needed
 static void vma_gap_update(struct vm_area_struct *vma)
 {
        /*
@@ -449,15 +522,18 @@ static void vma_gap_update(struct vm_area_struct *vma)
        vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
 }
 
+// LRH: Not needed
 static inline void vma_rb_insert(struct vm_area_struct *vma,
                                 struct rb_root *root)
 {
        /* All rb_subtree_gap values must be consistent prior to insertion */
        validate_mm_rb(root, NULL);
 
+       //printk("insert augmented %lu-%lu\n", vma->vm_start, vma->vm_end);
        rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
 }
 
+// LRH:  Make sure everywhere uses an mt_erase too..
 static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
 {
        /*
@@ -468,6 +544,7 @@ static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
        rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
 }
 
+// LRH: Not needed
 static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
                                                struct rb_root *root,
                                                struct vm_area_struct *ignore)
@@ -539,6 +616,8 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
                __rb_parent = *__rb_link;
                vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
 
+               //printk("%s: checking %lu-%lu\n", __func__,
+               //              vma_tmp->vm_start, vma_tmp->vm_end);
                if (vma_tmp->vm_end > addr) {
                        /* Fail if an existing vma overlaps the area */
                        if (vma_tmp->vm_start < end)
@@ -602,10 +681,39 @@ munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
 
        return 0;
 }
+
+/* Private
+ * clean_overlaps() - Call do_munmap if there exists any mapping within @start
+ * to @end.  Sets @pprev to the previous entry or NULL if none exists.
+ *
+ */
+static int clean_overlaps(struct mm_struct *mm, unsigned long start,
+               unsigned long len, struct vm_area_struct **pprev,
+               struct list_head *uf)
+{
+       struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, start, start);
+
+       *pprev = NULL;
+
+       rcu_read_lock();
+       vma = mas_find(&mas, start + len);
+       if (vma)
+               *pprev = mas_prev(&mas, 0);
+       rcu_read_unlock();
+
+       if (vma) {
+               if (do_munmap(mm, start, len, uf))
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
                unsigned long addr, unsigned long end)
 {
        unsigned long nr_pages = 0;
+       unsigned long nr_mt_pages = 0;
        struct vm_area_struct *vma;
 
        /* Find first overlaping mapping */
@@ -627,9 +735,17 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
                nr_pages += overlap_len >> PAGE_SHIFT;
        }
 
+       mt_for_each(&mm->mm_mt, vma, addr, end) {
+               nr_mt_pages +=
+                       (min(end, vma->vm_end) - vma->vm_start) >> PAGE_SHIFT;
+       }
+
+       VM_BUG_ON_MM(nr_pages != nr_mt_pages, mm);
+
        return nr_pages;
 }
 
+// LRH: Not needed
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
                struct rb_node **rb_link, struct rb_node *rb_parent)
 {
@@ -654,6 +770,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
        vma_rb_insert(vma, &mm->mm_rb);
 }
 
+// LRH: Needed
 static void __vma_link_file(struct vm_area_struct *vma)
 {
        struct file *file;
@@ -672,14 +789,44 @@ static void __vma_link_file(struct vm_area_struct *vma)
                flush_dcache_mmap_unlock(mapping);
        }
 }
+static void __vma_mt_dump(struct mm_struct *mm)
+{
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       struct vm_area_struct *entry = NULL;
+       rcu_read_lock();
+       mas_for_each(&mas, entry, ULONG_MAX) {
+               if (mas_retry(&mas, entry))
+                       continue;
+
+               printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end,
+                               mas.index, mas.last);
+       }
+       rcu_read_unlock();
 
+}
+static void __vma_mt_erase(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       printk("mt_mod %px ERASE, %lu, %lu,\n",
+                       mm, vma->vm_start, vma->vm_start);
+       mtree_erase(&mm->mm_mt, vma->vm_start);
+}
+static void __vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       printk("mt_mod %px STORE, %lu, %lu,\n",
+                       mm, vma->vm_start, vma->vm_end - 1);
+       mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma,
+               GFP_KERNEL);
+}
+// LRH: Needed - update linked list, should fine.
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
        struct vm_area_struct *prev, struct rb_node **rb_link,
        struct rb_node *rb_parent)
 {
+       __vma_mt_store(mm, vma);
        __vma_link_list(mm, vma, prev);
        __vma_link_rb(mm, vma, rb_link, rb_parent);
+       validate_mm_mt(mm, NULL);
 }
 
 static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -702,29 +849,50 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
        mm->map_count++;
        validate_mm(mm);
 }
-
 /*
  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
  * mm's list and rbtree.  It has already been inserted into the interval tree.
  */
+extern void mt_dump(const struct maple_tree *mt);
 static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
+       unsigned long vm_start = vma->vm_start;
+       struct vm_area_struct *overlap = NULL;
 
        if (find_vma_links(mm, vma->vm_start, vma->vm_end,
                           &prev, &rb_link, &rb_parent))
                BUG();
+
+       //printk("going to insert %lx: vma %lu-%lu\n", (unsigned long) current, vma->vm_start, vma->vm_end);
+       if ((overlap = mt_find(&mm->mm_mt, &vm_start, vma->vm_end - 1, true)) != NULL) {
+               /*
+               printk("Found vma ending at %lu\n", vm_start - 1);
+               printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
+                               overlap->vm_start, overlap->vm_end);
+               printk("rbtree:\n");
+               */
+               browse_rb(mm);
+#define CONFIG_DEBUG_MAPLE_TREE
+               //mt_dump(&mm->mm_mt);
+       }
        __vma_link(mm, vma, prev, rb_link, rb_parent);
        mm->map_count++;
 }
 
+<<<<<<< HEAD
 static __always_inline void __vma_unlink(struct mm_struct *mm,
+=======
+// LRH: Fixed.
+static __always_inline void __vma_unlink_common(struct mm_struct *mm,
+>>>>>>> 6942ca05fee78... mm: Add maple tree to init-mm,mmap, mprotect, mm_types
                                                struct vm_area_struct *vma,
                                                struct vm_area_struct *ignore)
 {
        vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
        __vma_unlink_list(mm, vma);
+       __vma_mt_erase(mm, vma);
        /* Kill the cache */
        vmacache_invalidate(mm);
 }
@@ -750,6 +918,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        long adjust_next = 0;
        int remove_next = 0;
 
+       validate_mm(mm);
        if (next && !insert) {
                struct vm_area_struct *exporter = NULL, *importer = NULL;
 
@@ -859,12 +1028,14 @@ again:
        if (!anon_vma && adjust_next)
                anon_vma = next->anon_vma;
        if (anon_vma) {
+               browse_rb(mm);
                VM_WARN_ON(adjust_next && next->anon_vma &&
                           anon_vma != next->anon_vma);
                anon_vma_lock_write(anon_vma);
                anon_vma_interval_tree_pre_update_vma(vma);
                if (adjust_next)
                        anon_vma_interval_tree_pre_update_vma(next);
+               browse_rb(mm);
        }
 
        if (file) {
@@ -876,16 +1047,31 @@ again:
 
        if (start != vma->vm_start) {
                vma->vm_start = start;
+               if (vma->vm_start < start)
+                       __vma_mt_erase(mm, vma);
                start_changed = true;
        }
        if (end != vma->vm_end) {
+               if (vma->vm_end > end)
+                       __vma_mt_erase(mm, vma);
                vma->vm_end = end;
                end_changed = true;
        }
+
+       if (end_changed || start_changed) {
+               __vma_mt_store(mm, vma);
+       }
+
        vma->vm_pgoff = pgoff;
        if (adjust_next) {
+<<<<<<< HEAD
                next->vm_start += adjust_next;
                next->vm_pgoff += adjust_next >> PAGE_SHIFT;
+=======
+               next->vm_start += adjust_next << PAGE_SHIFT;
+               next->vm_pgoff += adjust_next;
+               __vma_mt_store(mm, next);
+>>>>>>> 6942ca05fee78... mm: Add maple tree to init-mm,mmap, mprotect, mm_types
        }
 
        if (file) {
@@ -925,6 +1111,7 @@ again:
        } else {
                if (start_changed)
                        vma_gap_update(vma);
+
                if (end_changed) {
                        if (!next)
                                mm->highest_vm_end = vm_end_gap(vma);
@@ -943,7 +1130,6 @@ again:
        if (file) {
                i_mmap_unlock_write(mapping);
                uprobe_mmap(vma);
-
                if (adjust_next)
                        uprobe_mmap(next);
        }
@@ -1154,6 +1340,18 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
  * parameter) may establish ptes with the wrong permissions of NNNN
  * instead of the right permissions of XXXX.
  */
+
+//LRH:
+//  p = prev, n = next, a = add, nn = next next
+// 0. Adding page over partial p, cannot merge
+// 1. Adding page between p and n, all become p
+// 2. Adding page between p and n, a merges with p
+// 3. Adding page between p and n, a merges with n
+// 4. Adding page over p, a merges with n
+// 5. Adding page over n, a merges with p
+// 6. Adding page over all of n, p-a-nn all become p
+// 7. Adding page over all of n, p-a all become p
+// 8. Adding page over all of n, a-nn all become nn.
 struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        struct vm_area_struct *prev, unsigned long addr,
                        unsigned long end, unsigned long vm_flags,
@@ -1177,6 +1375,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (area && area->vm_end == end)                /* cases 6, 7, 8 */
                next = next->vm_next;
 
+       //printk("%s: %lx: vma %lu-%lu\n", __func__, (unsigned long) current, addr, end);
        /* verify some invariant that must be enforced by the caller */
        VM_WARN_ON(prev && addr <= prev->vm_start);
        VM_WARN_ON(area && end > area->vm_end);
@@ -1222,10 +1421,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                        can_vma_merge_before(next, vm_flags,
                                             anon_vma, file, pgoff+pglen,
                                             vm_userfaultfd_ctx)) {
-               if (prev && addr < prev->vm_end)        /* case 4 */
+               if (prev && addr < prev->vm_end){       /* case 4 */
                        err = __vma_adjust(prev, prev->vm_start,
                                         addr, prev->vm_pgoff, NULL, next);
-               else {                                  /* cases 3, 8 */
+               }else {                                 /* cases 3, 8 */
                        err = __vma_adjust(area, addr, next->vm_end,
                                         next->vm_pgoff - pglen, NULL, next);
                        /*
@@ -1929,6 +2128,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
@@ -1944,6 +2144,12 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
                return -ENOMEM;
        low_limit = info->low_limit + length;
 
+       // Maple tree is self contained.
+       rcu_read_lock();
+       if (mas_get_unmapped_area(&mas, low_limit, high_limit, length))
+               return -ENOMEM;
+       rcu_read_unlock();
+
        /* Check if rbtree root looks promising */
        if (RB_EMPTY_ROOT(&mm->mm_rb))
                goto check_highest;
@@ -2016,6 +2222,8 @@ found:
 
        VM_BUG_ON(gap_start + info->length > info->high_limit);
        VM_BUG_ON(gap_start + info->length > gap_end);
+
+       //VM_BUG_ON(mas.index != gap_start);
        return gap_start;
 }
 
@@ -2024,6 +2232,7 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
@@ -2043,6 +2252,12 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
                return -ENOMEM;
        low_limit = info->low_limit + length;
 
+
+       rcu_read_lock();
+       if (mas_get_unmapped_area_rev(&mas, low_limit, high_limit, length))
+               return -ENOMEM;
+       rcu_read_unlock();
+
        /* Check highest gap, which does not precede any rbtree node */
        gap_start = mm->highest_vm_end;
        if (gap_start <= high_limit)
@@ -2115,6 +2330,8 @@ found_highest:
 
        VM_BUG_ON(gap_end < info->low_limit);
        VM_BUG_ON(gap_end < gap_start);
+       
+       VM_BUG_ON(mas.last + 1  != gap_end);
        return gap_end;
 }
 
@@ -2296,8 +2513,35 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 EXPORT_SYMBOL(get_unmapped_area);
 
+/**
+ * mt_find_vma() - Find the VMA for a given address, or the next vma.  May return
+ * NULL in the case of no vma at addr or above
+ * @mm The mm_struct to check
+ * @addr: The address
+ *
+ * Returns: The VMA assoicated with addr, or the next vma.
+ * May return NULL in the case of no vma at addr or above.
+ */
+struct vm_area_struct *mt_find_vma(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *vma;
+
+       //printk("%s: looking up %lu\n", __func__, addr);
+       /* Check the cache first. */
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
+
+       vma = mt_find(&mm->mm_mt, &addr, ULONG_MAX, 0);
+       //printk("Found %lu\n", (unsigned long)vma);
+
+       if (vma)
+               vmacache_update(addr, vma);
+       return vma;
+}
+
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
-struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr)
 {
        struct rb_node *rb_node;
        struct vm_area_struct *vma;
@@ -2328,13 +2572,49 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
        return vma;
 }
 
+struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
+{
+       struct vm_area_struct *ret = rb_find_vma(mm, addr);
+       struct vm_area_struct *ret2 = mt_find_vma(mm, addr);
+       if (ret != ret2)
+               printk("%px %lu: ret %px ret2 %px\n", mm, addr, ret, ret2);
+       VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)ret2 , ret);
+       return ret;
+}
+
 EXPORT_SYMBOL(find_vma);
 
+
+/**
+ * mt_find_vma_prev() - Find the VMA for a given address, or the next vma and
+ * sets %pprev to the previous VMA, if any.
+ * @mm The mm_struct to check
+ * @addr: The address
+ * @pprev: The pointer to set to the previous VMA
+ *
+ * Returns: The VMA associated with @addr, or the next vma.
+ * May return NULL in the case of no vma at addr or above.
+ */
+struct vm_area_struct *
+mt_find_vma_prev(struct mm_struct *mm, unsigned long addr,
+                       struct vm_area_struct **pprev)
+{
+       struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, addr, addr);
+
+       rcu_read_lock();
+       vma = mas_find(&mas, ULONG_MAX);
+       *pprev = mas_prev(&mas, 0);
+       rcu_read_unlock();
+       return vma;
+}
+
 /*
  * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
  */
+
 struct vm_area_struct *
-find_vma_prev(struct mm_struct *mm, unsigned long addr,
+rb_find_vma_prev(struct mm_struct *mm, unsigned long addr,
                        struct vm_area_struct **pprev)
 {
        struct vm_area_struct *vma;
@@ -2345,11 +2625,25 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr,
        } else {
                struct rb_node *rb_node = rb_last(&mm->mm_rb);
 
-               *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
+               *pprev = rb_node ?
+                       rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
        }
        return vma;
 }
 
+
+struct vm_area_struct *
+find_vma_prev(struct mm_struct *mm, unsigned long addr,
+                       struct vm_area_struct **pprev)
+{
+       struct vm_area_struct *mt_prev;
+       struct vm_area_struct *ret = rb_find_vma_prev(mm, addr, pprev);
+       VM_BUG_ON_VMA((unsigned long)ret !=
+                       (unsigned long)mt_find_vma_prev(mm, addr, &mt_prev),
+                       ret);
+       VM_BUG_ON_VMA(mt_prev != *pprev, *pprev);
+       return ret;
+}
 /*
  * Verify that the stack growth is acceptable and
  * update accounting. This is shared with both the
@@ -2411,6 +2705,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
+       printk("%s: %lu-%lu expand to %lu-%lu\n", __func__, vma->vm_start, vma->vm_end,
+                       vma->vm_start, address);
        /* Guard against exceeding limits of the address space. */
        address &= PAGE_MASK;
        if (address >= (TASK_SIZE & PAGE_MASK))
@@ -2498,10 +2794,12 @@ int expand_downwards(struct vm_area_struct *vma,
        struct vm_area_struct *prev;
        int error = 0;
 
+       validate_mm(mm);
        address &= PAGE_MASK;
        if (address < mmap_min_addr)
                return -EPERM;
 
+
        /* Enforce stack_guard_gap */
        prev = vma->vm_prev;
        /* Check that both stack segments have the same anon_vma? */
@@ -2530,35 +2828,40 @@ int expand_downwards(struct vm_area_struct *vma,
                grow = (vma->vm_start - address) >> PAGE_SHIFT;
 
                error = -ENOMEM;
-               if (grow <= vma->vm_pgoff) {
-                       error = acct_stack_growth(vma, size, grow);
-                       if (!error) {
-                               /*
-                                * vma_gap_update() doesn't support concurrent
-                                * updates, but we only hold a shared mmap_lock
-                                * lock here, so we need to protect against
-                                * concurrent vma expansions.
-                                * anon_vma_lock_write() doesn't help here, as
-                                * we don't guarantee that all growable vmas
-                                * in a mm share the same root anon vma.
-                                * So, we reuse mm->page_table_lock to guard
-                                * against concurrent vma expansions.
-                                */
-                               spin_lock(&mm->page_table_lock);
-                               if (vma->vm_flags & VM_LOCKED)
-                                       mm->locked_vm += grow;
-                               vm_stat_account(mm, vma->vm_flags, grow);
-                               anon_vma_interval_tree_pre_update_vma(vma);
-                               vma->vm_start = address;
-                               vma->vm_pgoff -= grow;
-                               anon_vma_interval_tree_post_update_vma(vma);
-                               vma_gap_update(vma);
-                               spin_unlock(&mm->page_table_lock);
+               if (grow > vma->vm_pgoff)
+                       goto no_update;
 
-                               perf_event_mmap(vma);
-                       }
-               }
+               error = acct_stack_growth(vma, size, grow);
+               if (error)
+                       goto no_update;
+               /*
+                * vma_gap_update() doesn't support concurrent
+                * updates, but we only hold a shared mmap_sem
+                * lock here, so we need to protect against
+                * concurrent vma expansions.
+                * anon_vma_lock_write() doesn't help here, as
+                * we don't guarantee that all growable vmas
+                * in a mm share the same root anon vma.
+                * So, we reuse mm->page_table_lock to guard
+                * against concurrent vma expansions.
+                */
+               spin_lock(&mm->page_table_lock);
+               if (vma->vm_flags & VM_LOCKED)
+                       mm->locked_vm += grow;
+               vm_stat_account(mm, vma->vm_flags, grow);
+               anon_vma_interval_tree_pre_update_vma(vma);
+               vma->vm_start = address;
+               vma->vm_pgoff -= grow;
+               // Overwrite old entry in mtree.
+               mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1,
+                                 vma, GFP_KERNEL);
+               anon_vma_interval_tree_post_update_vma(vma);
+               vma_gap_update(vma);
+               spin_unlock(&mm->page_table_lock);
+
+               perf_event_mmap(vma);
        }
+no_update:
        anon_vma_unlock_write(vma->anon_vma);
        khugepaged_enter_vma_merge(vma, vma->vm_flags);
        validate_mm(mm);
@@ -2694,6 +2997,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        vma->vm_prev = NULL;
        do {
                vma_rb_erase(vma, &mm->mm_rb);
+               __vma_mt_erase(mm, vma);
                mm->map_count--;
                tail_vma = vma;
                vma = vma->vm_next;
@@ -3088,6 +3392,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
        if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
                return -ENOMEM;
 
+
        /* Check against address space limits *after* clearing old maps... */
        if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
                return -ENOMEM;
@@ -3242,10 +3547,22 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
+       unsigned long start = vma->vm_start;
+       struct vm_area_struct *overlap = NULL;
 
        if (find_vma_links(mm, vma->vm_start, vma->vm_end,
                           &prev, &rb_link, &rb_parent))
                return -ENOMEM;
+
+       //printk("%s: insert %lx: vma %lu-%lu\n", __func__, (unsigned long) current, vma->vm_start, vma->vm_end);
+       if ((overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1, true)) != NULL) {
+               printk("Found vma ending at %lu\n", start - 1);
+               printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
+                               overlap->vm_start, overlap->vm_end - 1);
+               mt_dump(&mm->mm_mt);
+               BUG();
+       }
+
        if ((vma->vm_flags & VM_ACCOUNT) &&
             security_vm_enough_memory_mm(mm, vma_pages(vma)))
                return -ENOMEM;
@@ -3285,6 +3602,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        struct vm_area_struct *new_vma, *prev;
        struct rb_node **rb_link, *rb_parent;
        bool faulted_in_anon_vma = true;
+       unsigned long index = addr;
 
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
@@ -3297,6 +3615,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 
        if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
                return NULL;    /* should never get here */
+       if (mt_find(&mm->mm_mt, &index, addr+len - 1, true))
+               BUG();
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
                            vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
                            vma->vm_userfaultfd_ctx);
index 56c02beb60414129b898416a244f8404cc546e9a..1505664dbbd4a1f4183e98d00b259e34653fe3a9 100644 (file)
@@ -407,6 +407,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
        int error;
        int dirty_accountable = 0;
 
+       printk("vma area %lu-%lu\n", vma->vm_start, vma->vm_end);
        if (newflags == oldflags) {
                *pprev = vma;
                return 0;