]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: Remove vma linked list.
authorLiam R. Howlett <Liam.Howlett@Oracle.com>
Mon, 4 Jan 2021 20:15:50 +0000 (15:15 -0500)
committerLiam R. Howlett <Liam.Howlett@Oracle.com>
Wed, 19 May 2021 20:39:09 +0000 (16:39 -0400)
The vma linked list has been replaced by the maple tree iterators and
vma_next() vma_prev() functions.

A part of this change is also the iterators free_pgd_range(),
zap_page_range(), and unmap_single_vma()

The internal _vma_next() has been dropped.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
fs/proc/task_nommu.c
include/linux/mm.h
include/linux/mm_types.h
kernel/fork.c
mm/debug.c
mm/internal.h
mm/memory.c
mm/mmap.c
mm/nommu.c

index 8691a1216d1ccc84622ae7157d1e94597adfc6a1..be02e8997ddf1f95b51484d7dde5362bb4e6566e 100644 (file)
@@ -246,7 +246,7 @@ static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
        struct vm_area_struct *vma = _p;
 
        *pos = vma->vm_end;
-       return vma->vm_next;
+       return vma_next(vma->vm_mm, vma);
 }
 
 static const struct seq_operations proc_pid_maps_ops = {
index 82b0767875156aac3f6ebd52f21101ae5d18c3dd..e1f1ae32fa9d98114325e74c0ae3ba37d9db1052 100644 (file)
@@ -1731,7 +1731,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
                    unsigned long size);
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-               unsigned long start, unsigned long end);
+               struct ma_state *mas, unsigned long start, unsigned long end);
 
 struct mmu_notifier_range;
 
index 304692ada0241552c8ae7b8a1f488a895409f822..ca9fb13c2acaecc2841a1e9cbd8d84aa60ba1f73 100644 (file)
@@ -303,14 +303,11 @@ struct vm_userfaultfd_ctx {};
  * library, the executable area etc).
  */
 struct vm_area_struct {
-       /* The first cache line has the info for VMA tree walking. */
-
        unsigned long vm_start;         /* Our start address within vm_mm. */
        unsigned long vm_end;           /* The first byte after our end address
                                           within vm_mm. */
 
        /* linked list of VM areas per task, sorted by address */
-       struct vm_area_struct *vm_next, *vm_prev;
        struct mm_struct *vm_mm;        /* The address space we belong to. */
 
        /*
@@ -324,7 +321,6 @@ struct vm_area_struct {
        unsigned long vm_pgoff;         /* Offset (within vm_file) in PAGE_SIZE
                                         * units
                                         */
-       /* Second cache line starts here. */
        struct file *vm_file;           /* File we map to (can be NULL). */
        /*
         * For areas with an address space and backing store,
@@ -378,7 +374,6 @@ struct core_state {
 struct kioctx_table;
 struct mm_struct {
        struct {
-               struct vm_area_struct *mmap;            /* list of VMAs */
                struct maple_tree mm_mt;
 #ifdef CONFIG_MMU
                unsigned long (*get_unmapped_area) (struct file *filp,
@@ -393,7 +388,6 @@ struct mm_struct {
                unsigned long mmap_compat_legacy_base;
 #endif
                unsigned long task_size;        /* size of task vm space */
-               unsigned long highest_vm_end;   /* highest vma end address */
                pgd_t * pgd;
 
 #ifdef CONFIG_MEMBARRIER
index a3504820c5427a85399272bdf6f009cfcbdc1180..fb7eb69fec6f23cc724cc8e0c979b3f31d9419e4 100644 (file)
@@ -364,7 +364,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
                 */
                *new = data_race(*orig);
                INIT_LIST_HEAD(&new->anon_vma_chain);
-               new->vm_next = new->vm_prev = NULL;
        }
        return new;
 }
@@ -473,7 +472,7 @@ EXPORT_SYMBOL(free_task);
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
 {
-       struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
+       struct vm_area_struct *mpnt, *tmp;
        int retval;
        unsigned long charge = 0;
        MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
@@ -500,7 +499,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        mm->exec_vm = oldmm->exec_vm;
        mm->stack_vm = oldmm->stack_vm;
 
-       pprev = &mm->mmap;
        retval = ksm_fork(mm, oldmm);
        if (retval)
                goto out;
@@ -508,8 +506,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        if (retval)
                goto out;
 
-       prev = NULL;
-
        retval = mas_entry_count(&mas, oldmm->map_count);
        if (retval)
                goto out;
@@ -585,14 +581,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                if (is_vm_hugetlb_page(tmp))
                        reset_vma_resv_huge_pages(tmp);
 
-               /*
-                * Link in the new vma and copy the page table entries.
-                */
-               *pprev = tmp;
-               pprev = &tmp->vm_next;
-               tmp->vm_prev = prev;
-               prev = tmp;
-
                /* Link the vma into the MT */
                mas.index = tmp->vm_start;
                mas.last = tmp->vm_end - 1;
@@ -1024,7 +1012,6 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        struct user_namespace *user_ns)
 {
-       mm->mmap = NULL;
        mt_init_flags(&mm->mm_mt, MAPLE_ALLOC_RANGE);
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
index f382d319722a5ce726254e1b989b8f30506161d7..55afc85c4fcc29d5d6232b26ad93e33d547ad0a4 100644 (file)
@@ -203,8 +203,8 @@ void dump_vma(const struct vm_area_struct *vma)
                "prot %lx anon_vma %px vm_ops %px\n"
                "pgoff %lx file %px private_data %px\n"
                "flags: %#lx(%pGv)\n",
-               vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
-               vma->vm_prev, vma->vm_mm,
+               vma, (void *)vma->vm_start, (void *)vma->vm_end,
+               vma_next(vma->vm_mm, vma), vma_prev(vma->vm_mm, vma), vma->vm_mm,
                (unsigned long)pgprot_val(vma->vm_page_prot),
                vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
                vma->vm_file, vma->vm_private_data,
@@ -214,11 +214,11 @@ EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-       pr_emerg("mm %px mmap %px task_size %lu\n"
+       pr_emerg("mm %px task_size %lu\n"
 #ifdef CONFIG_MMU
                "get_unmapped_area %px\n"
 #endif
-               "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
+               "mmap_base %lu mmap_legacy_base %lu\n"
                "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
                "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
                "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
@@ -242,11 +242,11 @@ void dump_mm(const struct mm_struct *mm)
                "tlb_flush_pending %d\n"
                "def_flags: %#lx(%pGv)\n",
 
-               mm, mm->mmap, mm->task_size,
+               mm, mm->task_size,
 #ifdef CONFIG_MMU
                mm->get_unmapped_area,
 #endif
-               mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
+               mm->mmap_base, mm->mmap_legacy_base,
                mm->pgd, atomic_read(&mm->mm_users),
                atomic_read(&mm->mm_count),
                mm_pgtables_bytes(mm),
index 8d414d009882d742d04b600974c3afcc954fed18..f26ef82f3a7b2bf6faeee9a0fc23d7875283e291 100644 (file)
@@ -36,8 +36,8 @@ void page_writeback_init(void);
 
 vm_fault_t do_swap_page(struct vm_fault *vmf);
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-               unsigned long floor, unsigned long ceiling);
+void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
+       struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling);
 
 static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
 {
index e3b56903b1117991b7b997dfbd540b74a2061034..47af2029be8aa1a93371b49c3b1e7218aff29f4b 100644 (file)
@@ -399,13 +399,18 @@ void free_pgd_range(struct mmu_gather *tlb,
        } while (pgd++, addr = next, addr != end);
 }
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
-               unsigned long floor, unsigned long ceiling)
+void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
+       struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling)
 {
-       while (vma) {
-               struct vm_area_struct *next = vma->vm_next;
+       struct vm_area_struct *next;
+       struct ma_state ma_next = *mas;
+
+       do {
                unsigned long addr = vma->vm_start;
 
+               next = mas_find(&ma_next, ceiling - 1);
+               BUG_ON(vma->vm_start < floor);
+               BUG_ON(vma->vm_end - 1 > ceiling - 1);
                /*
                 * Hide vma from rmap and truncate_pagecache before freeing
                 * pgtables
@@ -422,16 +427,20 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                         */
                        while (next && next->vm_start <= vma->vm_end + PMD_SIZE
                               && !is_vm_hugetlb_page(next)) {
+                               *mas = ma_next;
                                vma = next;
-                               next = vma->vm_next;
+                               next = mas_find(&ma_next, ceiling - 1);
+                               BUG_ON(vma->vm_start < floor);
+                               BUG_ON(vma->vm_end -1 > ceiling - 1);
                                unlink_anon_vmas(vma);
                                unlink_file_vma(vma);
                        }
                        free_pgd_range(tlb, addr, vma->vm_end,
                                floor, next ? next->vm_start : ceiling);
                }
+               *mas = ma_next;
                vma = next;
-       }
+       } while (vma);
 }
 
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
@@ -1510,16 +1519,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
  * drops the lock and schedules.
  */
 void unmap_vmas(struct mmu_gather *tlb,
-               struct vm_area_struct *vma, unsigned long start_addr,
-               unsigned long end_addr)
+               struct vm_area_struct *vma, struct ma_state *mas,
+               unsigned long start_addr, unsigned long end_addr)
 {
        struct mmu_notifier_range range;
 
        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
                                start_addr, end_addr);
        mmu_notifier_invalidate_range_start(&range);
-       for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
+       do {
+               BUG_ON(vma->vm_start < start_addr);
+               BUG_ON(vma->vm_end > end_addr);
                unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+       } while ((vma = mas_find(mas, end_addr - 1)) != NULL);
        mmu_notifier_invalidate_range_end(&range);
 }
 
@@ -1536,15 +1548,20 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 {
        struct mmu_notifier_range range;
        struct mmu_gather tlb;
+       unsigned long end = start + size;
+       MA_STATE(mas, &vma->vm_mm->mm_mt, start, start);
 
        lru_add_drain();
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               start, start + size);
+                               start, end);
        tlb_gather_mmu(&tlb, vma->vm_mm);
        update_hiwater_rss(vma->vm_mm);
        mmu_notifier_invalidate_range_start(&range);
-       for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
-               unmap_single_vma(&tlb, vma, start, range.end, NULL);
+       rcu_read_lock();
+       mas_for_each(&mas, vma, end - 1)
+               unmap_single_vma(&tlb, vma, start, end, NULL);
+       rcu_read_unlock();
+
        mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb);
 }
index 80dd47cfeac125c432defba346998405ee02c5d4..994aa9a70fb71c57a8e79e75e06bec885a8c1c39 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -74,10 +74,6 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
-static void unmap_region(struct mm_struct *mm,
-               struct vm_area_struct *vma, struct vm_area_struct *prev,
-               unsigned long start, unsigned long end);
-
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
  * behavior is in parens:
@@ -175,10 +171,8 @@ void unlink_file_vma(struct vm_area_struct *vma)
 /*
  * Close a vm structure and free it, returning the next.
  */
-static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+static void remove_vma(struct vm_area_struct *vma)
 {
-       struct vm_area_struct *next = vma->vm_next;
-
        might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
@@ -186,13 +180,13 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
                fput(vma->vm_file);
        mpol_put(vma_policy(vma));
        vm_area_free(vma);
-       return next;
 }
 
 static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                         unsigned long newbrk, unsigned long oldbrk,
                         struct list_head *uf);
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
+static int do_brk_flags(struct ma_state *mas, struct ma_state *ma_prev,
+                       struct vm_area_struct **brkvma,
                        unsigned long addr, unsigned long request,
                        unsigned long flags);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
@@ -205,6 +199,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
        bool downgraded = false;
        LIST_HEAD(uf);
        MA_STATE(mas, &mm->mm_mt, 0, 0);
+       struct ma_state ma_neighbour;
 
        if (mmap_write_lock_killable(mm))
                return -EINTR;
@@ -261,7 +256,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
                 * before calling do_brk_munmap().
                 */
                mm->brk = brk;
-               mas.last = oldbrk - 1;
                ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
                if (ret == 1)  {
                        downgraded = true;
@@ -272,26 +266,26 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
                mm->brk = origbrk;
                goto out;
        }
+       ma_neighbour = mas;
+       next = mas_next(&ma_neighbour, newbrk + PAGE_SIZE + stack_guard_gap);
        /* Only check if the next VMA is within the stack_guard_gap of the
         * expansion area */
-       next = mas_next(&mas, newbrk + PAGE_SIZE + stack_guard_gap);
        /* Check against existing mmap mappings. */
        if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
                goto out;
 
-       brkvma = mas_prev(&mas, mm->start_brk);
+       brkvma = mas_prev(&ma_neighbour, mm->start_brk);
        if (brkvma) {
-               if(brkvma->vm_start >= oldbrk)
+               if (brkvma->vm_start >= oldbrk)
                        goto out; // Trying to map over another vma.
 
-               if (brkvma->vm_end <= min_brk) {
+               if (brkvma->vm_end <= min_brk)
                        brkvma = NULL;
-                       mas_reset(&mas);
-               }
        }
 
        /* Ok, looks good - let it rip. */
-       if (do_brk_flags(&mas, &brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
+       if (do_brk_flags(&mas, &ma_neighbour, &brkvma, oldbrk,
+                        newbrk - oldbrk, 0) < 0)
                goto out;
 
        mm->brk = brk;
@@ -316,85 +310,17 @@ out:
 extern void mt_validate(struct maple_tree *mt);
 extern void mt_dump(const struct maple_tree *mt);
 
-/* Validate the maple tree */
-static void validate_mm_mt(struct mm_struct *mm)
-{
-       struct maple_tree *mt = &mm->mm_mt;
-       struct vm_area_struct *vma_mt, *vma = mm->mmap;
-
-       MA_STATE(mas, mt, 0, 0);
-       rcu_read_lock();
-       mas_for_each(&mas, vma_mt, ULONG_MAX) {
-               if (xa_is_zero(vma_mt))
-                       continue;
-
-               if (!vma)
-                       break;
-
-               if ((vma != vma_mt) ||
-                   (vma->vm_start != vma_mt->vm_start) ||
-                   (vma->vm_end != vma_mt->vm_end) ||
-                   (vma->vm_start != mas.index) ||
-                   (vma->vm_end - 1 != mas.last)) {
-                       pr_emerg("issue in %s\n", current->comm);
-                       dump_stack();
-#ifdef CONFIG_DEBUG_VM
-                       dump_vma(vma_mt);
-                       pr_emerg("and vm_next\n");
-                       dump_vma(vma->vm_next);
-#endif // CONFIG_DEBUG_VM
-                       pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
-                                mas.index, mas.last);
-                       pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
-                                vma_mt->vm_start, vma_mt->vm_end);
-                       if (vma->vm_prev) {
-                               pr_emerg("ll prev: %px %lu - %lu\n",
-                                        vma->vm_prev, vma->vm_prev->vm_start,
-                                        vma->vm_prev->vm_end);
-                       }
-                       pr_emerg("ll vma: %px %lu - %lu\n", vma,
-                                vma->vm_start, vma->vm_end);
-                       if (vma->vm_next) {
-                               pr_emerg("ll next: %px %lu - %lu\n",
-                                        vma->vm_next, vma->vm_next->vm_start,
-                                        vma->vm_next->vm_end);
-                       }
-
-                       mt_dump(mas.tree);
-                       if (vma_mt->vm_end != mas.last + 1) {
-                               pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n",
-                                               mm, vma_mt->vm_start, vma_mt->vm_end,
-                                               mas.index, mas.last);
-                               mt_dump(mas.tree);
-                       }
-                       VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
-                       if (vma_mt->vm_start != mas.index) {
-                               pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n",
-                                               mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
-                               mt_dump(mas.tree);
-                       }
-                       VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
-               }
-               VM_BUG_ON(vma != vma_mt);
-               vma = vma->vm_next;
-
-       }
-       VM_BUG_ON(vma);
-
-       rcu_read_unlock();
-       mt_validate(&mm->mm_mt);
-}
-
 static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
        int i = 0;
-       unsigned long highest_address = 0;
-       struct vm_area_struct *vma = mm->mmap;
+       struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
-       validate_mm_mt(mm);
+       mmap_assert_locked(mm);
 
-       while (vma) {
+       mt_validate(&mm->mm_mt);
+       mas_for_each(&mas, vma, ULONG_MAX) {
 #ifdef CONFIG_DEBUG_VM_RB
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
@@ -405,24 +331,18 @@ static void validate_mm(struct mm_struct *mm)
                        anon_vma_unlock_read(anon_vma);
                }
 #endif
-               highest_address = vm_end_gap(vma);
-               vma = vma->vm_next;
+               VM_BUG_ON(mas.index != vma->vm_start);
+               VM_BUG_ON(mas.last != vma->vm_end - 1);
                i++;
        }
        if (i != mm->map_count) {
-               pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
-               bug = 1;
-       }
-       if (highest_address != mm->highest_vm_end) {
-               pr_emerg("mm->highest_vm_end %lx, found %lx\n",
-                         mm->highest_vm_end, highest_address);
+               pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
                bug = 1;
        }
        VM_BUG_ON_MM(bug, mm);
 }
 
 #else // !CONFIG_DEBUG_MAPLE_TREE
-#define validate_mm_mt(root) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif // CONFIG_DEBUG_MAPLE_TREE
 
@@ -481,24 +401,6 @@ bool range_has_overlap(struct mm_struct *mm, unsigned long start,
        return existing ? true : false;
 }
 
-/*
- * _vma_next() - Get the next VMA or the first.
- * @mm: The mm_struct.
- * @vma: The current vma.
- *
- * If @vma is NULL, return the first vma in the mm.
- *
- * Returns: The next VMA after @vma.
- */
-static inline struct vm_area_struct *_vma_next(struct mm_struct *mm,
-                                        struct vm_area_struct *vma)
-{
-       if (!vma)
-               return mm->mmap;
-
-       return vma->vm_next;
-}
-
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
                unsigned long addr, unsigned long end)
 {
@@ -575,7 +477,7 @@ void vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma)
 }
 
 static void vma_mas_link(struct mm_struct *mm, struct vm_area_struct *vma,
-                        struct ma_state *mas, struct vm_area_struct *prev)
+                        struct ma_state *mas)
 {
        struct address_space *mapping = NULL;
 
@@ -594,36 +496,26 @@ static void vma_mas_link(struct mm_struct *mm, struct vm_area_struct *vma,
        validate_mm(mm);
 }
 
-static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
-                       struct vm_area_struct *prev)
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       struct address_space *mapping = NULL;
-
-       if (vma->vm_file) {
-               mapping = vma->vm_file->f_mapping;
-               i_mmap_lock_write(mapping);
-       }
-
-       vma_mt_store(mm, vma);
-       __vma_link_file(vma);
+       MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end - 1);
 
-       if (mapping)
-               i_mmap_unlock_write(mapping);
-
-       mm->map_count++;
-       validate_mm(mm);
+       vma_mas_link(mm, vma, &mas);
 }
 
 /*
  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
  * mm's list and the mm tree.  It has already been inserted into the interval tree.
  */
-static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void __insert_vm_struct(struct mm_struct *mm,
+                                     struct vm_area_struct *vma)
 {
-       struct vm_area_struct *prev;
+       MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end - 1);
+
+       BUG_ON(mas_find(&mas, vma->vm_end - 1));
+       mas_reset(&mas);
 
-       BUG_ON(range_has_overlap(mm, vma->vm_start, vma->vm_end, &prev));
-       vma_mt_store(mm, vma);
+       vma_mas_store(vma, &mas);
        mm->map_count++;
 }
 
@@ -639,9 +531,11 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
        bool remove_next = false;
        int error;
 
+
        if (next && (vma != next) && (end == next->vm_end)) {
+               /* Expanding existing VMA over a gap and next */
                remove_next = true;
-               if (next->anon_vma && !vma->anon_vma) {
+               if (next->anon_vma && !anon_vma) {
                        vma->anon_vma = next->anon_vma;
                        error = anon_vma_clone(vma, next);
                        if (error)
@@ -671,7 +565,6 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
        vma->vm_start = start;
        vma->vm_end = end;
        vma->vm_pgoff = pgoff;
-       /* Note: mas must be pointing to the expanding VMA */
        vma_mas_store(vma, mas);
 
        if (file) {
@@ -680,11 +573,8 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
        }
 
        /* Expanding over the next vma */
-       if (remove_next && file) {
+       if (remove_next && file)
                __remove_shared_vm_struct(next, file, mapping);
-       } else if (!next) {
-               mm->highest_vm_end = vm_end_gap(vma);
-       }
 
        if (anon_vma) {
                anon_vma_interval_tree_post_update_vma(vma);
@@ -723,7 +613,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        struct vm_area_struct *expand)
 {
        struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
+       struct vm_area_struct *next = vma_next(mm, vma), *orig_vma = vma;
        struct address_space *mapping = NULL;
        struct rb_root_cached *root = NULL;
        struct anon_vma *anon_vma = NULL;
@@ -732,6 +622,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        long adjust_next = 0;
        int remove_next = 0;
 
+       validate_mm(mm);
        if (next && !insert) {
                struct vm_area_struct *exporter = NULL, *importer = NULL;
 
@@ -764,7 +655,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                                 */
                                remove_next = 1 + (end > next->vm_end);
                                VM_WARN_ON(remove_next == 2 &&
-                                          end != next->vm_next->vm_end);
+                                          end != vma_next(mm, next)->vm_end);
                                /* trim end to next, for case 6 first pass */
                                end = next->vm_end;
                        }
@@ -777,7 +668,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         * next, if the vma overlaps with it.
                         */
                        if (remove_next == 2 && !next->anon_vma)
-                               exporter = next->vm_next;
+                               exporter = vma_next(mm, next);
 
                } else if (end > next->vm_start) {
                        /*
@@ -869,8 +760,6 @@ again:
                else
                        vma_changed = true;
                vma->vm_end = end;
-               if (!next)
-                       mm->highest_vm_end = vm_end_gap(vma);
        }
 
        if (vma_changed)
@@ -938,7 +827,7 @@ again:
                         * "next->vm_prev->vm_end" changed and the
                         * "vma->vm_next" gap must be updated.
                         */
-                       next = vma->vm_next;
+                       next = vma_next(mm, vma);
                } else {
                        /*
                         * For the scope of the comment "next" and
@@ -956,27 +845,6 @@ again:
                        remove_next = 1;
                        end = next->vm_end;
                        goto again;
-               } else if (!next) {
-                       /*
-                        * If remove_next == 2 we obviously can't
-                        * reach this path.
-                        *
-                        * If remove_next == 3 we can't reach this
-                        * path because pre-swap() next is always not
-                        * NULL. pre-swap() "next" is not being
-                        * removed and its next->vm_end is not altered
-                        * (and furthermore "end" already matches
-                        * next->vm_end in remove_next == 3).
-                        *
-                        * We reach this only in the remove_next == 1
-                        * case if the "next" vma that was removed was
-                        * the highest vma of the mm. However in such
-                        * case next->vm_end == "end" and the extended
-                        * "vma" has vma->vm_end == next->vm_end so
-                        * mm->highest_vm_end doesn't need any update
-                        * in remove_next == 1 case.
-                        */
-                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
                }
        }
        if (insert && file)
@@ -1136,10 +1004,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (vm_flags & VM_SPECIAL)
                return NULL;
 
-       next = _vma_next(mm, prev);
+       if (!prev)
+               next = find_vma(mm, 0);
+       else
+               next = vma_next(mm, prev);
+
        area = next;
        if (area && area->vm_end == end)                /* cases 6, 7, 8 */
-               next = next->vm_next;
+               next = vma_next(mm, next);
 
        /* verify some invariant that must be enforced by the caller */
        VM_WARN_ON(prev && addr <= prev->vm_start);
@@ -1205,6 +1077,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
                return area;
        }
 
+       validate_mm(mm);
        return NULL;
 }
 
@@ -1274,17 +1147,20 @@ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
 {
        struct anon_vma *anon_vma = NULL;
+       struct vm_area_struct *next, *prev;
 
        /* Try next first. */
-       if (vma->vm_next) {
-               anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+       next = vma_next(vma->vm_mm, vma);
+       if (next) {
+               anon_vma = reusable_anon_vma(next, vma, next);
                if (anon_vma)
                        return anon_vma;
        }
 
        /* Try prev next. */
-       if (vma->vm_prev)
-               anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
+       prev = vma_prev(vma->vm_mm, vma);
+       if (prev)
+               anon_vma = reusable_anon_vma(prev, prev, vma);
 
        /*
         * We might reach here with anon_vma == NULL if we can't find
@@ -2057,7 +1933,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        if (gap_addr < address || gap_addr > TASK_SIZE)
                gap_addr = TASK_SIZE;
 
-       next = vma->vm_next;
+       next = vma_next(mm, vma);
        if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
                if (!(next->vm_flags & VM_GROWSUP))
                        return -ENOMEM;
@@ -2103,8 +1979,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                vma->vm_end = address;
                                vma_mt_store(mm, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
-                               if (!vma->vm_next)
-                                       mm->highest_vm_end = vm_end_gap(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
@@ -2131,7 +2005,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
                return -EPERM;
 
        /* Enforce stack_guard_gap */
-       prev = vma->vm_prev;
+       prev = vma_prev(mm, vma);
        /* Check that both stack segments have the same anon_vma? */
        if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
                        vma_is_accessible(prev)) {
@@ -2266,20 +2140,21 @@ EXPORT_SYMBOL_GPL(find_extend_vma);
  *
  * Called with the mm semaphore held.
  */
-static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
 {
+       struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
 
        /* Update high watermark before we lower total_vm */
        update_hiwater_vm(mm);
-       do {
+       mas_for_each(mas, vma, ULONG_MAX) {
                long nrpages = vma_pages(vma);
 
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += nrpages;
                vm_stat_account(mm, vma->vm_flags, -nrpages);
-               vma = remove_vma(vma);
-       } while (vma);
+               remove_vma(vma);
+       }
        vm_unacct_memory(nr_accounted);
        validate_mm(mm);
 }
@@ -2290,21 +2165,22 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  * Called with the mm semaphore held.
  */
 static void unmap_region(struct mm_struct *mm,
-               struct vm_area_struct *vma, struct vm_area_struct *prev,
-               unsigned long start, unsigned long end)
+                    struct vm_area_struct *vma, struct ma_state *mas,
+                    unsigned long start, unsigned long end,
+                    struct vm_area_struct *prev, unsigned long max)
 {
-       struct vm_area_struct *next = _vma_next(mm, prev);
        struct mmu_gather tlb;
+       struct ma_state ma_pgtb = *mas;
 
        lru_add_drain();
        tlb_gather_mmu(&tlb, mm);
        update_hiwater_rss(mm);
-       unmap_vmas(&tlb, vma, start, end);
-       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
-                                next ? next->vm_start : USER_PGTABLES_CEILING);
+       unmap_vmas(&tlb, vma, mas, start, end);
+       free_pgtables(&tlb, &ma_pgtb, vma,
+                     prev ? prev->vm_end : FIRST_USER_ADDRESS,
+                     max);
        tlb_finish_mmu(&tlb);
 }
-
 /*
  * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
  * has already been checked or doesn't make sense to fail.
@@ -2314,7 +2190,6 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct vm_area_struct *new;
        int err;
-       validate_mm_mt(mm);
 
        if (vma->vm_ops && vma->vm_ops->may_split) {
                err = vma->vm_ops->may_split(vma, addr);
@@ -2367,7 +2242,6 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        mpol_put(vma_policy(new));
  out_free_vma:
        vm_area_free(new);
-       validate_mm_mt(mm);
        return err;
 }
 
@@ -2384,24 +2258,30 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        return __split_vma(mm, vma, addr, new_below);
 }
 
-static inline int unlock_range(struct vm_area_struct *start,
-                       struct vm_area_struct **tail, unsigned long limit)
+static inline void detach_range(struct mm_struct *mm, struct ma_state *mas,
+                       struct ma_state *dst, struct vm_area_struct **vma)
 {
-       struct mm_struct *mm = start->vm_mm;
-       struct vm_area_struct *tmp = start;
+       unsigned long start = dst->index;
+       unsigned long end = dst->last;
        int count = 0;
 
-       while (tmp && tmp->vm_start < limit) {
-               *tail = tmp;
+       do {
                count++;
-               if (tmp->vm_flags & VM_LOCKED) {
-                       mm->locked_vm -= vma_pages(tmp);
-                       munlock_vma_pages_all(tmp);
+               *vma = mas_prev(mas, start);
+               BUG_ON((*vma)->vm_start < start);
+               BUG_ON((*vma)->vm_end > end + 1);
+               vma_mas_store(*vma, dst);
+               if ((*vma)->vm_flags & VM_LOCKED) {
+                       mm->locked_vm -= vma_pages(*vma);
+                       munlock_vma_pages_all(*vma);
                }
-               tmp = tmp->vm_next;
-       }
+       } while ((*vma)->vm_start > start);
 
-       return count;
+       /* Drop removed area from the tree */
+       mas->last = end;
+       mas_store_gfp(mas, NULL, GFP_KERNEL);
+       /* Decrement map_count */
+       mm->map_count -= count;
 }
 
 /* do_mas_align_munmap() - munmap the aligned region from @start to @end.
@@ -2420,9 +2300,17 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
               struct mm_struct *mm, unsigned long start, unsigned long end,
               struct list_head *uf, bool downgrade)
 {
-       struct vm_area_struct *prev, *last;
+       struct vm_area_struct *prev, *last, *next = NULL;
+       struct maple_tree mt_detach;
+       unsigned long max = USER_PGTABLES_CEILING;
+       MA_STATE(dst, NULL, start, end - 1);
+       struct ma_state tmp;
        /* we have start < vma->vm_end  */
 
+       validate_mm(mm);
+       /* arch_unmap() might do unmaps itself.  */
+       arch_unmap(mm, start, end);
+
        /*
         * If we need to split any vma, do it now to save pain later.
         *
@@ -2443,28 +2331,33 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                error = __split_vma(mm, vma, start, 0);
                if (error)
                        return error;
+
                prev = vma;
-               vma = _vma_next(mm, prev);
-               mas->index = start;
-               mas_reset(mas);
+               mas_set_range(mas, start, end - 1);
+               vma = mas_walk(mas);
+
        } else {
-               prev = vma->vm_prev;
+               tmp = *mas;
+               prev = mas_prev(&tmp, 0);
        }
 
-       if (vma->vm_end >= end)
+       if (end < vma->vm_end) {
                last = vma;
-       else
-               last = find_vma_intersection(mm, end - 1, end);
+       } else {
+               mas_set(mas, end - 1);
+               last = mas_walk(mas);
+       }
 
        /* Does it split the last one? */
        if (last && end < last->vm_end) {
-               int error = __split_vma(mm, last, end, 1);
+               int error;
+               error = __split_vma(mm, last, end, 1);
                if (error)
                        return error;
-               vma = _vma_next(mm, prev);
-               mas_reset(mas);
+               mas_set(mas, end - 1);
+               last = mas_walk(mas);
        }
-
+       next = mas_next(mas, ULONG_MAX);
 
        if (unlikely(uf)) {
                /*
@@ -2482,27 +2375,15 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                        return error;
        }
 
-       /*
-        * unlock any mlock()ed ranges before detaching vmas, count the number
-        * of VMAs to be dropped, and return the tail entry of the affected
-        * area.
-        */
-       mm->map_count -= unlock_range(vma, &last, end);
-       /* Drop removed area from the tree */
-       mas_store_gfp(mas, NULL, GFP_KERNEL);
+       /* Point of no return */
+       mas_lock(mas);
+       if (next)
+               max = next->vm_start;
 
-       /* Detach vmas from the MM linked list */
-       vma->vm_prev = NULL;
-       if (prev)
-               prev->vm_next = last->vm_next;
-       else
-               mm->mmap = last->vm_next;
-
-       if (last->vm_next) {
-               last->vm_next->vm_prev = prev;
-               last->vm_next = NULL;
-       } else
-               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+       mtree_init(&mt_detach, MAPLE_ALLOC_RANGE);
+       dst.tree = &mt_detach;
+       detach_range(mm, mas, &dst, &vma);
+       mas_unlock(mas);
 
        /*
         * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
@@ -2510,7 +2391,7 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
         * down_read(mmap_lock) and collide with the VMA we are about to unmap.
         */
        if (downgrade) {
-               if (last && (last->vm_flags & VM_GROWSDOWN))
+               if (next && (next->vm_flags & VM_GROWSDOWN))
                        downgrade = false;
                else if (prev && (prev->vm_flags & VM_GROWSUP))
                        downgrade = false;
@@ -2518,11 +2399,16 @@ static int do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                        mmap_write_downgrade(mm);
        }
 
-       unmap_region(mm, vma, prev, start, end);
+       /* Unmap the region */
+       unmap_region(mm, vma, &dst, start, end, prev, max);
+
+       /* Statistics and freeing VMAs */
+       mas_set(&dst, start);
+       remove_mt(mm, &dst);
 
-       /* Fix up all other VM information */
-       remove_vma_list(mm, vma);
+       mtree_destroy(&mt_detach);
 
+       validate_mm(mm);
        return downgrade ? 1 : 0;
 }
 
@@ -2547,16 +2433,14 @@ int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
        unsigned long end;
        struct vm_area_struct *vma;
 
-       if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
+       if ((offset_in_page(start)) || (start > TASK_SIZE) ||
+           (len > TASK_SIZE - start))
                return -EINVAL;
 
        end = start + PAGE_ALIGN(len);
        if (end == start)
                return -EINVAL;
 
-        /* arch_unmap() might do unmaps itself.  */
-       arch_unmap(mm, start, end);
-
        /* Find the first overlapping VMA */
        vma = mas_find(mas, end - 1);
        if (!vma)
@@ -2575,8 +2459,11 @@ int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
              struct list_head *uf)
 {
+       int ret;
        MA_STATE(mas, &mm->mm_mt, start, start);
-       return do_mas_munmap(&mas, mm, start, len, uf, false);
+
+       ret = do_mas_munmap(&mas, mm, start, len, uf, false);
+       return ret;
 }
 
 unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2585,15 +2472,18 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma = NULL;
-       struct vm_area_struct *prev, *next;
+       struct vm_area_struct *prev, *next = NULL;
        pgoff_t pglen = len >> PAGE_SHIFT;
        unsigned long charged = 0;
        unsigned long end = addr + len;
        unsigned long merge_start = addr, merge_end = end;
+       unsigned long max = USER_PGTABLES_CEILING;
        pgoff_t vm_pgoff;
        int error;
+       struct ma_state ma_prev, tmp;
        MA_STATE(mas, &mm->mm_mt, addr, end - 1);
 
+
        /* Check against address space limit. */
        if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
                unsigned long nr_pages;
@@ -2609,57 +2499,66 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
                        return -ENOMEM;
        }
 
+       validate_mm(mm);
        /* Unmap any existing mapping in the area */
-       if (do_mas_munmap(&mas, mm, addr, len, uf, false))
+       if (do_mas_munmap(&mas, mm, addr, len, uf, false)) {
                return -ENOMEM;
+       }
 
        /*
         * Private writable mapping: check memory availability
         */
        if (accountable_mapping(file, vm_flags)) {
                charged = len >> PAGE_SHIFT;
-               if (security_vm_enough_memory_mm(mm, charged))
+               if (security_vm_enough_memory_mm(mm, charged)) {
                        return -ENOMEM;
+               }
                vm_flags |= VM_ACCOUNT;
        }
 
+       mas_set_range(&mas, addr, end - 1);
+       mas_walk(&mas);  // Walk to the empty area (munmapped above)
+       ma_prev = mas;
+       prev = mas_prev(&ma_prev, 0);
 
-       if (vm_flags & VM_SPECIAL) {
-               prev = mas_prev(&mas, 0);
+       if (vm_flags & VM_SPECIAL)
                goto cannot_expand;
-       }
 
        /* Attempt to expand an old mapping */
 
        /* Check next */
-       next = mas_next(&mas, ULONG_MAX);
-       if (next && next->vm_start == end && vma_policy(next) &&
-           can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
-                                NULL_VM_UFFD_CTX)) {
-               merge_end = next->vm_end;
-               vma = next;
-               vm_pgoff = next->vm_pgoff - pglen;
+       tmp = mas;
+       next = mas_next(&tmp, ULONG_MAX);
+       if (next) {
+               max = next->vm_start;
+               if (next->vm_start == end && vma_policy(next) &&
+                   can_vma_merge_before(next, vm_flags, NULL, file,
+                                        pgoff + pglen, NULL_VM_UFFD_CTX)) {
+                       /* Try to expand next back over the requested area */
+                       merge_end = next->vm_end;
+                       vma = next;
+                       vm_pgoff = next->vm_pgoff - pglen;
+               }
        }
 
        /* Check prev */
-       prev = mas_prev(&mas, 0);
        if (prev && prev->vm_end == addr && !vma_policy(prev) &&
            can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
                                NULL_VM_UFFD_CTX)) {
+               /* Try to expand the prev over the requested area */
                merge_start = prev->vm_start;
                vma = prev;
+               mas = ma_prev;
                vm_pgoff = prev->vm_pgoff;
        }
 
-
        /* Actually expand, if possible */
        if (vma &&
            !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
-               khugepaged_enter_vma_merge(prev, vm_flags);
+               khugepaged_enter_vma_merge(vma, vm_flags);
                goto expanded;
        }
 
-       mas_set_range(&mas, addr, end - 1);
 cannot_expand:
        /*
         * Determine the object being mapped and call the appropriate
@@ -2705,9 +2604,12 @@ cannot_expand:
                 * Answer: Yes, several device drivers can do it in their
                 *         f_op->mmap method. -DaveM
                 */
-               WARN_ON_ONCE(addr != vma->vm_start);
+               if (addr != vma->vm_start) {
+                       WARN_ON_ONCE(addr != vma->vm_start);
+                       addr = vma->vm_start;
+                       mas_set_range(&mas, addr, end - 1);
+               }
 
-               addr = vma->vm_start;
 
                /* If vm_flags changed after call_mmap(), we should try merge vma again
                 * as we may succeed this time.
@@ -2718,7 +2620,7 @@ cannot_expand:
                                                 pgoff, NULL_VM_UFFD_CTX))) {
                        merge_start = prev->vm_start;
                        vm_pgoff = prev->vm_pgoff;
-                       if (!vma_expand(&mas, prev, merge_start, merge_end,
+                       if (!vma_expand(&ma_prev, prev, merge_start, merge_end,
                                        vm_pgoff, next)) {
                                /* ->mmap() can change vma->vm_file and fput the original file. So
                                 * fput the vma->vm_file here or we would add an extra fput for file
@@ -2743,6 +2645,7 @@ cannot_expand:
                        goto free_vma;
        } else {
                vma_set_anonymous(vma);
+               vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
 
        /* Allow architectures to sanity-check the vm_flags */
@@ -2754,9 +2657,8 @@ cannot_expand:
                        goto free_vma;
        }
 
-       mas.index = mas.last = addr;
-       mas_walk(&mas);
-       vma_mas_link(mm, vma, &mas, prev);
+       vma_mas_link(mm, vma, &mas);
+
        /* Once vma denies write, undo our temporary denial count */
        if (file) {
 unmap_writable:
@@ -2792,6 +2694,7 @@ expanded:
        vma->vm_flags |= VM_SOFTDIRTY;
 
        vma_set_page_prot(vma);
+       validate_mm(mm);
 
        return addr;
 
@@ -2800,7 +2703,8 @@ unmap_and_free_vma:
        vma->vm_file = NULL;
 
        /* Undo any partial mapping done by a device driver. */
-       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+       mas_set_range(&mas, addr, end - 1);
+       unmap_region(mm, vma, &mas, vma->vm_start, vma->vm_end, prev, max);
        charged = 0;
        if (vm_flags & VM_SHARED)
                mapping_unmap_writable(file->f_mapping);
@@ -2824,7 +2728,6 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
 
        if (mmap_write_lock_killable(mm))
                return -EINTR;
-
        ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade);
        /*
         * Returning 1 indicates mmap_lock is downgraded.
@@ -2867,15 +2770,16 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        unsigned long populate = 0;
        unsigned long ret = -EINVAL;
        struct file *file;
+       MA_STATE(mas, &mm->mm_mt, start, start);
 
        pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
                     current->comm, current->pid);
 
        if (prot)
                return ret;
+
        start = start & PAGE_MASK;
        size = size & PAGE_MASK;
-
        if (start + size <= start)
                return ret;
 
@@ -2886,20 +2790,22 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
        if (mmap_write_lock_killable(mm))
                return -EINTR;
 
-       vma = find_vma(mm, start);
+       mas_set(&mas, start);
+       vma = mas_walk(&mas);
 
        if (!vma || !(vma->vm_flags & VM_SHARED))
                goto out;
 
-       if (start < vma->vm_start)
+       if (!vma->vm_file)
                goto out;
 
        if (start + size > vma->vm_end) {
-               struct vm_area_struct *next;
+               struct vm_area_struct *prev, *next;
 
-               for (next = vma->vm_next; next; next = next->vm_next) {
+               prev = vma;
+               mas_for_each(&mas, next, start + size) {
                        /* hole between vmas ? */
-                       if (next->vm_start != next->vm_prev->vm_end)
+                       if (next->vm_start != prev->vm_end)
                                goto out;
 
                        if (next->vm_file != vma->vm_file)
@@ -2910,6 +2816,8 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 
                        if (start + size <= next->vm_end)
                                break;
+
+                       prev = next;
                }
 
                if (!next)
@@ -2955,9 +2863,10 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                         struct list_head *uf)
 {
        struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct unmap;
+       struct vm_area_struct unmap, *next;
        unsigned long unmap_pages;
        int ret;
+       struct ma_state ma_next;
 
        arch_unmap(mm, newbrk, oldbrk);
 
@@ -2973,11 +2882,17 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        vma_init(&unmap, mm);
        unmap.vm_start = newbrk;
        unmap.vm_end = oldbrk;
+       unmap.vm_pgoff = newbrk >> PAGE_SHIFT;
+       if (vma->anon_vma)
+               vma_set_anonymous(&unmap);
+
        ret = userfaultfd_unmap_prep(&unmap, newbrk, oldbrk, uf);
        if (ret)
                return ret;
-       ret = 1;
 
+       ret = 1;
+       ma_next = *mas;
+       next = mas_next(&ma_next, ULONG_MAX);
        // Change the oldbrk of vma to the newbrk of the munmap area
        vma_adjust_trans_huge(vma, vma->vm_start, newbrk, 0);
        if (vma->anon_vma) {
@@ -2985,15 +2900,16 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                anon_vma_interval_tree_pre_update_vma(vma);
        }
 
-       vma->vm_end = newbrk;
        if (vma_mas_remove(&unmap, mas))
                goto mas_store_fail;
 
+       vma->vm_end = newbrk;
        if (vma->anon_vma) {
                anon_vma_interval_tree_post_update_vma(vma);
                anon_vma_unlock_write(vma->anon_vma);
        }
 
+       validate_mm(mm);
        unmap_pages = vma_pages(&unmap);
        if (unmap.vm_flags & VM_LOCKED) {
                mm->locked_vm -= unmap_pages;
@@ -3001,14 +2917,15 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        }
 
        mmap_write_downgrade(mm);
-       unmap_region(mm, &unmap, vma, newbrk, oldbrk);
+       unmap_region(mm, &unmap, mas, newbrk, oldbrk, vma,
+                    next ? next->vm_start : 0);
        /* Statistics */
        vm_stat_account(mm, unmap.vm_flags, -unmap_pages);
        if (unmap.vm_flags & VM_ACCOUNT)
                vm_unacct_memory(unmap_pages);
 
 munmap_full_vma:
-       validate_mm_mt(mm);
+       validate_mm(mm);
        return ret;
 
 mas_store_fail:
@@ -3032,15 +2949,15 @@ mas_store_fail:
  * do not match then create a new anonymous VMA.  Eventually we may be able to
  * do some brk-specific accounting here.
  */
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
+static int do_brk_flags(struct ma_state *mas, struct ma_state *ma_prev,
+                       struct vm_area_struct **brkvma,
                        unsigned long addr, unsigned long len,
                        unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *prev = NULL, *vma;
+       struct vm_area_struct *vma;
        int error;
        unsigned long mapped_addr;
-       validate_mm_mt(mm);
 
        /* Until we need other flags, refuse anything except VM_EXEC. */
        if ((flags & (~VM_EXEC)) != 0)
@@ -3065,7 +2982,6 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
        if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
-       mas->last = addr + len - 1;
        if (*brkvma) {
                vma = *brkvma;
                /* Expand the existing vma if possible; almost never a singular
@@ -3074,17 +2990,21 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
                if ((!vma->anon_vma ||
                     list_is_singular(&vma->anon_vma_chain)) &&
                     ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)){
-                       mas->index = vma->vm_start;
+                       ma_prev->index = vma->vm_start;
+                       ma_prev->last = addr + len - 1;
 
                        vma_adjust_trans_huge(vma, addr, addr + len, 0);
                        if (vma->anon_vma) {
                                anon_vma_lock_write(vma->anon_vma);
                                anon_vma_interval_tree_pre_update_vma(vma);
                        }
+                       mas_lock(ma_prev);
                        vma->vm_end = addr + len;
                        vma->vm_flags |= VM_SOFTDIRTY;
-                       if (mas_store_gfp(mas, vma, GFP_KERNEL))
+                       if (mas_store_gfp(ma_prev, vma, GFP_KERNEL)) {
+                               mas_unlock(ma_prev);
                                goto mas_mod_fail;
+                       }
 
                        if (vma->anon_vma) {
                                anon_vma_interval_tree_post_update_vma(vma);
@@ -3093,11 +3013,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
                        khugepaged_enter_vma_merge(vma, flags);
                        goto out;
                }
-               prev = vma;
        }
-       mas->index = addr;
-       mas_walk(mas);
 
+       mas->last = addr + len - 1;
        /* create a vma struct for an anonymous mapping */
        vma = vm_area_alloc(mm);
        if (!vma)
@@ -3112,9 +3030,6 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct **brkvma,
        if (vma_mas_store(vma, mas))
                goto mas_store_fail;
 
-       if (!prev)
-               prev = mas_prev(mas, 0);
-
        mm->map_count++;
        *brkvma = vma;
 out:
@@ -3124,7 +3039,6 @@ out:
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
        vma->vm_flags |= VM_SOFTDIRTY;
-       validate_mm_mt(mm);
        return 0;
 
 mas_store_fail:
@@ -3163,7 +3077,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 
        // This vma left intentionally blank.
        mas_walk(&mas);
-       ret = do_brk_flags(&mas, &vma, addr, len, flags);
+       ret = do_brk_flags(&mas, &mas, &vma, addr, len, flags);
        populate = ((mm->def_flags & VM_LOCKED) != 0);
        mmap_write_unlock(mm);
        if (populate && !ret)
@@ -3184,6 +3098,8 @@ void exit_mmap(struct mm_struct *mm)
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
+       struct ma_state mas2;
+       MA_STATE(mas, &mm->mm_mt, FIRST_USER_ADDRESS, FIRST_USER_ADDRESS);
 
        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);
@@ -3212,32 +3128,43 @@ void exit_mmap(struct mm_struct *mm)
                mmap_write_unlock(mm);
        }
 
-       if (mm->locked_vm)
-               unlock_range(mm->mmap, &vma, ULONG_MAX);
+       if (mm->locked_vm) {
+               mas_for_each(&mas, vma, ULONG_MAX) {
+                       if (vma->vm_flags & VM_LOCKED) {
+                               mm->locked_vm -= vma_pages(vma);
+                               munlock_vma_pages_all(vma);
+                       }
+               }
+               mas_set(&mas, FIRST_USER_ADDRESS);
+       }
 
        arch_exit_mmap(mm);
 
-       vma = mm->mmap;
-       if (!vma)       /* Can happen if dup_mmap() received an OOM */
+       vma = mas_find(&mas, ULONG_MAX);
+       if (!vma) { /* Can happen if dup_mmap() received an OOM */
+               rcu_read_unlock();
                return;
+       }
 
        lru_add_drain();
        flush_cache_mm(mm);
        tlb_gather_mmu_fullmm(&tlb, mm);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       unmap_vmas(&tlb, vma, 0, -1);
-       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+       mas2 = mas;
+       unmap_vmas(&tlb, vma, &mas, 0, -1);
+       free_pgtables(&tlb, &mas2, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);
 
        /*
         * Walk the list again, actually closing and freeing it,
         * with preemption enabled, without holding any MM locks.
         */
-       while (vma) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
-               vma = remove_vma(vma);
+               remove_vma(vma);
                cond_resched();
        }
 
@@ -3252,9 +3179,7 @@ void exit_mmap(struct mm_struct *mm)
  */
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       struct vm_area_struct *prev;
-
-       if (range_has_overlap(mm, vma->vm_start, vma->vm_end, &prev))
+       if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
                return -ENOMEM;
 
        if ((vma->vm_flags & VM_ACCOUNT) &&
@@ -3278,7 +3203,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
 
-       vma_link(mm, vma, prev);
+       vma_link(mm, vma);
        return 0;
 }
 
@@ -3296,7 +3221,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        struct vm_area_struct *new_vma, *prev;
        bool faulted_in_anon_vma = true;
 
-       validate_mm_mt(mm);
        /*
         * If anonymous vma has not yet been faulted, update new pgoff
         * to match new location, to increase its chance of merging.
@@ -3349,10 +3273,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        get_file(new_vma->vm_file);
                if (new_vma->vm_ops && new_vma->vm_ops->open)
                        new_vma->vm_ops->open(new_vma);
-               vma_link(mm, new_vma, prev);
+               vma_link(mm, new_vma);
                *need_rmap_locks = false;
        }
-       validate_mm_mt(mm);
        return new_vma;
 
 out_free_mempol:
@@ -3360,7 +3283,6 @@ out_free_mempol:
 out_free_vma:
        vm_area_free(new_vma);
 out:
-       validate_mm_mt(mm);
        return NULL;
 }
 
@@ -3497,7 +3419,6 @@ static struct vm_area_struct *__install_special_mapping(
        int ret;
        struct vm_area_struct *vma;
 
-       validate_mm_mt(mm);
        vma = vm_area_alloc(mm);
        if (unlikely(vma == NULL))
                return ERR_PTR(-ENOMEM);
@@ -3519,12 +3440,10 @@ static struct vm_area_struct *__install_special_mapping(
 
        perf_event_mmap(vma);
 
-       validate_mm_mt(mm);
        return vma;
 
 out:
        vm_area_free(vma);
-       validate_mm_mt(mm);
        return ERR_PTR(ret);
 }
 
@@ -3649,12 +3568,14 @@ int mm_take_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        BUG_ON(mmap_read_trylock(mm));
 
        mutex_lock(&mm_all_locks_mutex);
+       rcu_read_lock();
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3662,7 +3583,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3670,7 +3592,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->anon_vma)
@@ -3678,9 +3601,11 @@ int mm_take_all_locks(struct mm_struct *mm)
                                vm_lock_anon_vma(mm, avc->anon_vma);
        }
 
+       rcu_read_unlock();
        return 0;
 
 out_unlock:
+       rcu_read_unlock();
        mm_drop_all_locks(mm);
        return -EINTR;
 }
@@ -3729,17 +3654,21 @@ void mm_drop_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        BUG_ON(mmap_read_trylock(mm));
        BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+
+       rcu_read_lock();
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (vma->anon_vma)
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                vm_unlock_anon_vma(avc->anon_vma);
                if (vma->vm_file && vma->vm_file->f_mapping)
                        vm_unlock_mapping(vma->vm_file->f_mapping);
        }
+       rcu_read_unlock();
 
        mutex_unlock(&mm_all_locks_mutex);
 }
index 916038bafc65c63f62dd3885b60632fdb89d0d56..fa1e387c2a33c1832076089fef2bb96d087728d6 100644 (file)
@@ -1428,7 +1428,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
                                return -EINVAL;
                        if (end == vma->vm_end)
                                goto erase_whole_vma;
-                       vma =  vma_next(mm, vma);
+
+                       vma = vma_next(mm, vma);
                } while (vma);
                return -EINVAL;
        } else {
@@ -1479,14 +1480,13 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 void exit_mmap(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
+       unsigned long addr = 0;
 
        if (!mm)
                return;
 
        mm->total_vm = 0;
-
-       while ((vma = mm->mmap)) {
-               mm->mmap = vma_next(mm, vma);
+       mt_for_each(&mm->mm_mt, vma, addr, ULONG_MAX) {
                delete_vma_from_mm(vma);
                delete_vma(mm, vma);
                cond_resched();