]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: Remove the vma linked list
authorLiam R. Howlett <Liam.Howlett@Oracle.com>
Mon, 4 Jan 2021 20:10:54 +0000 (15:10 -0500)
committerLiam R. Howlett <Liam.Howlett@oracle.com>
Mon, 14 Mar 2022 18:49:48 +0000 (14:49 -0400)
Replace any vm_next use with vma_find().

Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the
maple tree.

Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap().
At the same time, alter the loop to be more compact.

Now that free_pgtables() and unmap_vmas() take a maple tree as an
argument, rearrange do_mas_align_munmap() to use the new tree to hold
the vmas to remove.

Remove __vma_link_list() and __vma_unlink_list() as they are exclusively
used to update the linked list

Drop linked list update from __insert_vm_struct().

Rework validation of tree as it was depending on the linked list.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
include/linux/mm.h
include/linux/mm_types.h
kernel/fork.c
mm/debug.c
mm/gup.c
mm/internal.h
mm/memory.c
mm/mmap.c
mm/nommu.c
mm/util.c

index c720f60e233977fb2351e3846edee1f7e918eb06..49bfd0eb87920beb7f950159dbdd549e7fafacb3 100644 (file)
@@ -1859,8 +1859,9 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                  unsigned long size);
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
                    unsigned long size);
-void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-               unsigned long start, unsigned long end);
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+               struct vm_area_struct *start_vma, unsigned long start,
+               unsigned long end);
 
 struct mmu_notifier_range;
 
index c36a3c4fc086a3ae50fd7bd995399ef94709e1a6..0cbc5facf57446c4c11c39ea47da9d1418494fce 100644 (file)
@@ -380,8 +380,6 @@ struct vm_area_struct {
        unsigned long vm_end;           /* The first byte after our end address
                                           within vm_mm. */
 
-       /* linked list of VM areas per task, sorted by address */
-       struct vm_area_struct *vm_next, *vm_prev;
        struct mm_struct *vm_mm;        /* The address space we belong to. */
 
        /*
@@ -442,7 +440,6 @@ struct vm_area_struct {
 struct kioctx_table;
 struct mm_struct {
        struct {
-               struct vm_area_struct *mmap;            /* list of VMAs */
                struct maple_tree mm_mt;
 #ifdef CONFIG_MMU
                unsigned long (*get_unmapped_area) (struct file *filp,
@@ -457,7 +454,6 @@ struct mm_struct {
                unsigned long mmap_compat_legacy_base;
 #endif
                unsigned long task_size;        /* size of task vm space */
-               unsigned long highest_vm_end;   /* highest vma end address */
                pgd_t * pgd;
 
 #ifdef CONFIG_MEMBARRIER
index 00d646803bd6fb14ab08f4e02623b4d7864bc509..c166bd30f8f8d6fe299abde8758ddf4f066b24e9 100644 (file)
@@ -364,7 +364,6 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
                 */
                *new = data_race(*orig);
                INIT_LIST_HEAD(&new->anon_vma_chain);
-               new->vm_next = new->vm_prev = NULL;
                dup_vma_anon_name(orig, new);
        }
        return new;
@@ -490,7 +489,7 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
 static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
 {
-       struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
+       struct vm_area_struct *mpnt, *tmp;
        int retval;
        unsigned long charge = 0;
        MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
@@ -517,7 +516,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        mm->exec_vm = oldmm->exec_vm;
        mm->stack_vm = oldmm->stack_vm;
 
-       pprev = &mm->mmap;
        retval = ksm_fork(mm, oldmm);
        if (retval)
                goto out;
@@ -525,8 +523,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        if (retval)
                goto out;
 
-       prev = NULL;
-
        retval = mas_expected_entries(&mas, oldmm->map_count);
        if (retval)
                goto out;
@@ -598,14 +594,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                if (is_vm_hugetlb_page(tmp))
                        reset_vma_resv_huge_pages(tmp);
 
-               /*
-                * Link in the new vma and copy the page table entries.
-                */
-               *pprev = tmp;
-               pprev = &tmp->vm_next;
-               tmp->vm_prev = prev;
-               prev = tmp;
-
                /* Link the vma into the MT */
                mas.index = tmp->vm_start;
                mas.last = tmp->vm_end - 1;
@@ -1043,7 +1031,6 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        struct user_namespace *user_ns)
 {
-       mm->mmap = NULL;
        mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
        mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
        atomic_set(&mm->mm_users, 1);
index ef3555d071791bab8601156039b4635a5ed9290d..ce8aa17d44f8c008387f3e02518e6564a2242a1f 100644 (file)
@@ -143,13 +143,11 @@ EXPORT_SYMBOL(dump_page);
 
 void dump_vma(const struct vm_area_struct *vma)
 {
-       pr_emerg("vma %px start %px end %px\n"
-               "next %px prev %px mm %px\n"
+       pr_emerg("vma %px start %px end %px mm %px\n"
                "prot %lx anon_vma %px vm_ops %px\n"
                "pgoff %lx file %px private_data %px\n"
                "flags: %#lx(%pGv)\n",
-               vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
-               vma->vm_prev, vma->vm_mm,
+               vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_mm,
                (unsigned long)pgprot_val(vma->vm_page_prot),
                vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
                vma->vm_file, vma->vm_private_data,
@@ -159,11 +157,11 @@ EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-       pr_emerg("mm %px mmap %px task_size %lu\n"
+       pr_emerg("mm %px task_size %lu\n"
 #ifdef CONFIG_MMU
                "get_unmapped_area %px\n"
 #endif
-               "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
+               "mmap_base %lu mmap_legacy_base %lu\n"
                "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
                "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
                "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
@@ -187,11 +185,11 @@ void dump_mm(const struct mm_struct *mm)
                "tlb_flush_pending %d\n"
                "def_flags: %#lx(%pGv)\n",
 
-               mm, mm->mmap, mm->task_size,
+               mm, mm->task_size,
 #ifdef CONFIG_MMU
                mm->get_unmapped_area,
 #endif
-               mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
+               mm->mmap_base, mm->mmap_legacy_base,
                mm->pgd, atomic_read(&mm->mm_users),
                atomic_read(&mm->mm_count),
                mm_pgtables_bytes(mm),
index 2d12b423fea6dbf7cb0729d04297aa85df469976..c39ddf2b1e48816532cb88a1a0ccb8e9e4a8748d 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1764,7 +1764,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
                        mmap_read_lock(mm);
                        vma = find_vma(mm, nstart);
                } else if (nstart >= vma->vm_end)
-                       vma = vma->vm_next;
+                       vma = find_vma(mm, vma->vm_end);
                if (!vma || vma->vm_start >= end)
                        break;
                nend = end ? min(end, vma->vm_end) : vma->vm_end;
index 473bc832a2b9281185bd48eeeb2aa6f2ff552457..2d3ede05cd7bf71eb6ee6d19a7d88f5ffc7d4628 100644 (file)
@@ -67,8 +67,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
 void folio_rotate_reclaimable(struct folio *folio);
 bool __folio_end_writeback(struct folio *folio);
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
-               unsigned long floor, unsigned long ceiling);
+void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+                  struct vm_area_struct *start_vma, unsigned long floor,
+                  unsigned long ceiling);
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
 
 static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
@@ -456,11 +457,6 @@ static inline int vma_mas_remove(struct vm_area_struct *vma, struct ma_state *ma
        return ret;
 }
 
-/* mm/util.c */
-void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct vm_area_struct *prev);
-void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma);
-
 #ifdef CONFIG_MMU
 void unmap_mapping_folio(struct folio *folio);
 extern long populate_vma_page_range(struct vm_area_struct *vma,
index c125c4969913ac7ec0b221cc010bdcc6ad378cf8..ee2d6394d3b660578d86ef129ef19d0cae59f38e 100644 (file)
@@ -399,12 +399,21 @@ void free_pgd_range(struct mmu_gather *tlb,
        } while (pgd++, addr = next, addr != end);
 }
 
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
-               unsigned long floor, unsigned long ceiling)
+void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+                  struct vm_area_struct *vma, unsigned long floor,
+                  unsigned long ceiling)
 {
-       while (vma) {
-               struct vm_area_struct *next = vma->vm_next;
+       MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
+
+       do {
                unsigned long addr = vma->vm_start;
+               struct vm_area_struct *next;
+
+               /*
+                * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
+                * be 0.  This will underflow and is okay.
+                */
+               next = mas_find(&mas, ceiling - 1);
 
                /*
                 * Hide vma from rmap and truncate_pagecache before freeing
@@ -423,7 +432,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        while (next && next->vm_start <= vma->vm_end + PMD_SIZE
                               && !is_vm_hugetlb_page(next)) {
                                vma = next;
-                               next = vma->vm_next;
+                               next = mas_find(&mas, ceiling - 1);
                                unlink_anon_vmas(vma);
                                unlink_file_vma(vma);
                        }
@@ -431,7 +440,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                                floor, next ? next->vm_start : ceiling);
                }
                vma = next;
-       }
+       } while (vma);
 }
 
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
@@ -1625,17 +1634,19 @@ static void unmap_single_vma(struct mmu_gather *tlb,
  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  * drops the lock and schedules.
  */
-void unmap_vmas(struct mmu_gather *tlb,
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr)
 {
        struct mmu_notifier_range range;
+       MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
 
        mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
                                start_addr, end_addr);
        mmu_notifier_invalidate_range_start(&range);
-       for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
+       do {
                unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+       } while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
        mmu_notifier_invalidate_range_end(&range);
 }
 
@@ -1650,8 +1661,11 @@ void unmap_vmas(struct mmu_gather *tlb,
 void zap_page_range(struct vm_area_struct *vma, unsigned long start,
                unsigned long size)
 {
+       struct maple_tree *mt = &vma->vm_mm->mm_mt;
+       unsigned long end = start + size;
        struct mmu_notifier_range range;
        struct mmu_gather tlb;
+       MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
 
        lru_add_drain();
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@@ -1659,8 +1673,9 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
        tlb_gather_mmu(&tlb, vma->vm_mm);
        update_hiwater_rss(vma->vm_mm);
        mmu_notifier_invalidate_range_start(&range);
-       for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
+       do {
                unmap_single_vma(&tlb, vma, start, range.end, NULL);
+       } while ((vma = mas_find(&mas, end - 1)) != NULL);
        mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb);
 }
index 7f8e623950100f039f16c9c2bb9d3b48306aa165..47939fe1526df1db2285dd10c4d63aa08b8408a9 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -75,9 +75,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
-               unsigned long start, unsigned long end);
+               struct vm_area_struct *next, unsigned long start,
+               unsigned long end);
 
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
@@ -172,12 +173,10 @@ void unlink_file_vma(struct vm_area_struct *vma)
 }
 
 /*
- * Close a vm structure and free it, returning the next.
+ * Close a vm structure and free it.
  */
-static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+static void remove_vma(struct vm_area_struct *vma)
 {
-       struct vm_area_struct *next = vma->vm_next;
-
        might_sleep();
        if (vma->vm_ops && vma->vm_ops->close)
                vma->vm_ops->close(vma);
@@ -185,7 +184,6 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
                fput(vma->vm_file);
        mpol_put(vma_policy(vma));
        vm_area_free(vma);
-       return next;
 }
 
 /*
@@ -210,8 +208,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                         unsigned long newbrk, unsigned long oldbrk,
                         struct list_head *uf);
 static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *brkvma,
-                       unsigned long addr, unsigned long request,
-                       unsigned long flags);
+               unsigned long addr, unsigned long request, unsigned long flags);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
        unsigned long newbrk, oldbrk, origbrk;
@@ -280,7 +277,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
                 * before calling do_brk_munmap().
                 */
                mm->brk = brk;
-               mas.last = oldbrk - 1;
                ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
                if (ret == 1)  {
                        downgraded = true;
@@ -335,42 +331,20 @@ extern void mt_dump(const struct maple_tree *mt);
 static void validate_mm_mt(struct mm_struct *mm)
 {
        struct maple_tree *mt = &mm->mm_mt;
-       struct vm_area_struct *vma_mt, *vma = mm->mmap;
+       struct vm_area_struct *vma_mt;
 
        MA_STATE(mas, mt, 0, 0);
-       mas_for_each(&mas, vma_mt, ULONG_MAX) {
-               if (xa_is_zero(vma_mt))
-                       continue;
-
-               if (!vma)
-                       break;
 
-               if ((vma != vma_mt) ||
-                   (vma->vm_start != vma_mt->vm_start) ||
-                   (vma->vm_end != vma_mt->vm_end) ||
-                   (vma->vm_start != mas.index) ||
-                   (vma->vm_end - 1 != mas.last)) {
+       mas_for_each(&mas, vma_mt, ULONG_MAX) {
+               if ((vma_mt->vm_start != mas.index) ||
+                   (vma_mt->vm_end - 1 != mas.last)) {
                        pr_emerg("issue in %s\n", current->comm);
                        dump_stack();
                        dump_vma(vma_mt);
-                       pr_emerg("and vm_next\n");
-                       dump_vma(vma->vm_next);
                        pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
                                 mas.index, mas.last);
                        pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
                                 vma_mt->vm_start, vma_mt->vm_end);
-                       if (vma->vm_prev) {
-                               pr_emerg("ll prev: %px %lu - %lu\n",
-                                        vma->vm_prev, vma->vm_prev->vm_start,
-                                        vma->vm_prev->vm_end);
-                       }
-                       pr_emerg("ll vma: %px %lu - %lu\n", vma,
-                                vma->vm_start, vma->vm_end);
-                       if (vma->vm_next) {
-                               pr_emerg("ll next: %px %lu - %lu\n",
-                                        vma->vm_next, vma->vm_next->vm_start,
-                                        vma->vm_next->vm_end);
-                       }
 
                        mt_dump(mas.tree);
                        if (vma_mt->vm_end != mas.last + 1) {
@@ -387,11 +361,7 @@ static void validate_mm_mt(struct mm_struct *mm)
                        }
                        VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
                }
-               VM_BUG_ON(vma != vma_mt);
-               vma = vma->vm_next;
-
        }
-       VM_BUG_ON(vma);
        mt_validate(&mm->mm_mt);
 }
 
@@ -399,12 +369,12 @@ static void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
        int i = 0;
-       unsigned long highest_address = 0;
-       struct vm_area_struct *vma = mm->mmap;
+       struct vm_area_struct *vma;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        validate_mm_mt(mm);
 
-       while (vma) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
 #ifdef CONFIG_DEBUG_VM_RB
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
@@ -416,18 +386,10 @@ static void validate_mm(struct mm_struct *mm)
                        anon_vma_unlock_read(anon_vma);
                }
 #endif
-
-               highest_address = vm_end_gap(vma);
-               vma = vma->vm_next;
                i++;
        }
        if (i != mm->map_count) {
-               pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
-               bug = 1;
-       }
-       if (highest_address != mm->highest_vm_end) {
-               pr_emerg("mm->highest_vm_end %lx, found %lx\n",
-                         mm->highest_vm_end, highest_address);
+               pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
                bug = 1;
        }
        VM_BUG_ON_MM(bug, mm);
@@ -487,29 +449,13 @@ bool range_has_overlap(struct mm_struct *mm, unsigned long start,
        struct vm_area_struct *existing;
 
        MA_STATE(mas, &mm->mm_mt, start, start);
+       rcu_read_lock();
        existing = mas_find(&mas, end - 1);
        *pprev = mas_prev(&mas, 0);
+       rcu_read_unlock();
        return existing ? true : false;
 }
 
-/*
- * __vma_next() - Get the next VMA.
- * @mm: The mm_struct.
- * @vma: The current vma.
- *
- * If @vma is NULL, return the first vma in the mm.
- *
- * Returns: The next VMA after @vma.
- */
-static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
-                                        struct vm_area_struct *vma)
-{
-       if (!vma)
-               return mm->mmap;
-
-       return vma->vm_next;
-}
-
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
                unsigned long addr, unsigned long end)
 {
@@ -574,8 +520,7 @@ static void vma_store(struct mm_struct *mm, struct vm_area_struct *vma)
        vma_mas_store(vma, &mas);
 }
 
-static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
-                       struct vm_area_struct *prev)
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct address_space *mapping = NULL;
 
@@ -585,7 +530,6 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
        }
 
        vma_store(mm, vma);
-       __vma_link_list(mm, vma, prev);
        __vma_link_file(vma);
 
        if (mapping)
@@ -599,15 +543,9 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
  * mm's list and the mm tree.  It has already been inserted into the interval tree.
  */
-static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma,
-                              unsigned long location)
+static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       struct vm_area_struct *prev;
-       MA_STATE(mas, &mm->mm_mt, location, location);
-
-       prev = mas_prev(&mas, 0);
        vma_store(mm, vma);
-       __vma_link_list(mm, vma, prev);
        mm->map_count++;
 }
 
@@ -682,15 +620,8 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
        }
 
        /* Expanding over the next vma */
-       if (remove_next) {
-               /* Remove from mm linked list - also updates highest_vm_end */
-               __vma_unlink_list(mm, next);
-
-               if (file)
-                       __remove_shared_vm_struct(next, file, mapping);
-
-       } else if (!next) {
-               mm->highest_vm_end = vm_end_gap(vma);
+       if (remove_next && file) {
+               __remove_shared_vm_struct(next, file, mapping);
        }
 
        if (anon_vma) {
@@ -731,7 +662,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        struct vm_area_struct *expand)
 {
        struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
+       struct vm_area_struct *next = find_vma(mm, vma->vm_end);
+       struct vm_area_struct *orig_vma = vma;
        struct address_space *mapping = NULL;
        struct rb_root_cached *root = NULL;
        struct anon_vma *anon_vma = NULL;
@@ -739,7 +671,6 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
        bool vma_changed = false;
        long adjust_next = 0;
        int remove_next = 0;
-       unsigned long ll_prev = vma->vm_start; /* linked list prev. */
 
        if (next && !insert) {
                struct vm_area_struct *exporter = NULL, *importer = NULL;
@@ -773,7 +704,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                                 */
                                remove_next = 1 + (end > next->vm_end);
                                VM_WARN_ON(remove_next == 2 &&
-                                          end != next->vm_next->vm_end);
+                                          end != find_vma(mm, next->vm_end)->vm_end);
                                /* trim end to next, for case 6 first pass */
                                end = next->vm_end;
                        }
@@ -786,7 +717,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
                         * next, if the vma overlaps with it.
                         */
                        if (remove_next == 2 && !next->anon_vma)
-                               exporter = next->vm_next;
+                               exporter = find_vma(mm, next->vm_end);
 
                } else if (end > next->vm_start) {
                        /*
@@ -881,15 +812,11 @@ again:
                                vma_mt_szero(mm, end, vma->vm_end);
                                VM_WARN_ON(insert &&
                                           insert->vm_end < vma->vm_end);
-                       } else if (insert->vm_start == end) {
-                               ll_prev = vma->vm_end;
                        }
                } else {
                        vma_changed = true;
                }
                vma->vm_end = end;
-               if (!next)
-                       mm->highest_vm_end = vm_end_gap(vma);
        }
 
        if (vma_changed)
@@ -909,17 +836,15 @@ again:
                flush_dcache_mmap_unlock(mapping);
        }
 
-       if (remove_next) {
-               __vma_unlink_list(mm, next);
-               if (file)
-                       __remove_shared_vm_struct(next, file, mapping);
+       if (remove_next && file) {
+               __remove_shared_vm_struct(next, file, mapping);
        } else if (insert) {
                /*
                 * split_vma has split insert from vma, and needs
                 * us to insert it before dropping the locks
                 * (it may either follow vma or precede it).
                 */
-               __insert_vm_struct(mm, insert, ll_prev);
+               __insert_vm_struct(mm, insert);
        }
 
        if (anon_vma) {
@@ -956,10 +881,10 @@ again:
                        /*
                         * If "next" was removed and vma->vm_end was
                         * expanded (up) over it, in turn
-                        * "next->vm_prev->vm_end" changed and the
-                        * "vma->vm_next" gap must be updated.
+                        * "next->prev->vm_end" changed and the
+                        * "vma->next" gap must be updated.
                         */
-                       next = vma->vm_next;
+                       next = find_vma(mm, vma->vm_end);
                } else {
                        /*
                         * For the scope of the comment "next" and
@@ -977,33 +902,14 @@ again:
                        remove_next = 1;
                        end = next->vm_end;
                        goto again;
-               } else if (!next) {
-                       /*
-                        * If remove_next == 2 we obviously can't
-                        * reach this path.
-                        *
-                        * If remove_next == 3 we can't reach this
-                        * path because pre-swap() next is always not
-                        * NULL. pre-swap() "next" is not being
-                        * removed and its next->vm_end is not altered
-                        * (and furthermore "end" already matches
-                        * next->vm_end in remove_next == 3).
-                        *
-                        * We reach this only in the remove_next == 1
-                        * case if the "next" vma that was removed was
-                        * the highest vma of the mm. However in such
-                        * case next->vm_end == "end" and the extended
-                        * "vma" has vma->vm_end == next->vm_end so
-                        * mm->highest_vm_end doesn't need any update
-                        * in remove_next == 1 case.
-                        */
-                       VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
                }
        }
-       if (insert && file)
+       if (insert && file) {
                uprobe_mmap(insert);
+       }
 
        validate_mm(mm);
+
        return 0;
 }
 
@@ -1163,10 +1069,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (vm_flags & VM_SPECIAL)
                return NULL;
 
-       next = __vma_next(mm, prev);
+       next = find_vma(mm, prev ? prev->vm_end : 0);
        area = next;
        if (area && area->vm_end == end)                /* cases 6, 7, 8 */
-               next = next->vm_next;
+               next = find_vma(mm, next->vm_end);
 
        /* verify some invariant that must be enforced by the caller */
        VM_WARN_ON(prev && addr <= prev->vm_start);
@@ -1300,18 +1206,24 @@ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
  */
 struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
 {
+       MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
        struct anon_vma *anon_vma = NULL;
+       struct vm_area_struct *prev, *next;
 
        /* Try next first. */
-       if (vma->vm_next) {
-               anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+       next = mas_walk(&mas);
+       if (next) {
+               anon_vma = reusable_anon_vma(next, vma, next);
                if (anon_vma)
                        return anon_vma;
        }
 
+       prev = mas_prev(&mas, 0);
+       VM_BUG_ON_VMA(prev != vma, vma);
+       prev = mas_prev(&mas, 0);
        /* Try prev next. */
-       if (vma->vm_prev)
-               anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
+       if (prev)
+               anon_vma = reusable_anon_vma(prev, prev, vma);
 
        /*
         * We might reach here with anon_vma == NULL if we can't find
@@ -2076,8 +1988,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        if (gap_addr < address || gap_addr > TASK_SIZE)
                gap_addr = TASK_SIZE;
 
-       next = vma->vm_next;
-       if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
+       next = find_vma_intersection(mm, vma->vm_end, gap_addr);
+       if (next && vma_is_accessible(next)) {
                if (!(next->vm_flags & VM_GROWSUP))
                        return -ENOMEM;
                /* Check that both stack segments have the same anon_vma? */
@@ -2123,8 +2035,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                /* Overwrite old entry in mtree. */
                                vma_store(mm, vma);
                                anon_vma_interval_tree_post_update_vma(vma);
-                               if (!vma->vm_next)
-                                       mm->highest_vm_end = vm_end_gap(vma);
                                spin_unlock(&mm->page_table_lock);
 
                                perf_event_mmap(vma);
@@ -2143,6 +2053,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
        struct mm_struct *mm = vma->vm_mm;
+       MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
        struct vm_area_struct *prev;
        int error = 0;
 
@@ -2151,7 +2062,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
                return -EPERM;
 
        /* Enforce stack_guard_gap */
-       prev = vma->vm_prev;
+       prev = mas_prev(&mas, 0);
        /* Check that both stack segments have the same anon_vma? */
        if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
                        vma_is_accessible(prev)) {
@@ -2281,25 +2192,26 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 EXPORT_SYMBOL_GPL(find_extend_vma);
 
 /*
- * Ok - we have the memory areas we should free on the vma list,
- * so release them, and do the vma updates.
+ * Ok - we have the memory areas we should free on a maple tree so release them,
+ * and do the vma updates.
  *
  * Called with the mm semaphore held.
  */
-static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
 {
        unsigned long nr_accounted = 0;
+       struct vm_area_struct *vma;
 
        /* Update high watermark before we lower total_vm */
        update_hiwater_vm(mm);
-       do {
+       mas_for_each(mas, vma, ULONG_MAX) {
                long nrpages = vma_pages(vma);
 
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += nrpages;
                vm_stat_account(mm, vma->vm_flags, -nrpages);
-               vma = remove_vma(vma);
-       } while (vma);
+               remove_vma(vma);
+       }
        vm_unacct_memory(nr_accounted);
        validate_mm(mm);
 }
@@ -2309,18 +2221,18 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  *
  * Called with the mm semaphore held.
  */
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
+               struct vm_area_struct *next,
                unsigned long start, unsigned long end)
 {
-       struct vm_area_struct *next = __vma_next(mm, prev);
        struct mmu_gather tlb;
 
        lru_add_drain();
        tlb_gather_mmu(&tlb, mm);
        update_hiwater_rss(mm);
-       unmap_vmas(&tlb, vma, start, end);
-       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+       unmap_vmas(&tlb, mt, vma, start, end);
+       free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
                                 next ? next->vm_start : USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);
 }
@@ -2361,8 +2273,9 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        if (err)
                goto out_free_mpol;
 
-       if (new->vm_file)
+       if (new->vm_file) {
                get_file(new->vm_file);
+       }
 
        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);
@@ -2404,26 +2317,14 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
        return __split_vma(mm, vma, addr, new_below);
 }
 
-static inline int
-unlock_range(struct vm_area_struct *start, struct vm_area_struct **tail,
-            unsigned long limit)
+static inline void munmap_sidetree(struct vm_area_struct *vma,
+                                  struct ma_state *mas_detach)
 {
-       struct mm_struct *mm = start->vm_mm;
-       struct vm_area_struct *tmp = start;
-       int count = 0;
-
-       while (tmp && tmp->vm_start < limit) {
-               *tail = tmp;
-               count++;
-               if (tmp->vm_flags & VM_LOCKED) {
-                       mm->locked_vm -= vma_pages(tmp);
-                       munlock_vma_pages_all(tmp);
-               }
-
-               tmp = tmp->vm_next;
+       vma_mas_store(vma, mas_detach);
+       if (vma->vm_flags & VM_LOCKED) {
+               vma->vm_mm->locked_vm -= vma_pages(vma);
+               munlock_vma_pages_all(vma);
        }
-
-       return count;
 }
 
 /*
@@ -2443,8 +2344,12 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                    struct mm_struct *mm, unsigned long start,
                    unsigned long end, struct list_head *uf, bool downgrade)
 {
-       struct vm_area_struct *prev, *last;
-       /* we have start < vma->vm_end  */
+       struct vm_area_struct *prev, *next = NULL;
+       struct maple_tree mt_detach;
+       int count = 0;
+       MA_STATE(mas_detach, &mt_detach, start, end - 1);
+       mt_init_flags(&mt_detach, MM_MT_FLAGS);
+       mt_set_external_lock(&mt_detach, &mm->mmap_lock);
 
        mas->last = end - 1;
        /*
@@ -2454,6 +2359,8 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
         * unmapped vm_area_struct will remain in use: so lower split_vma
         * places tmp vma above, and higher split_vma places tmp vma below.
         */
+
+       /* Does it split the first one? */
        if (start > vma->vm_start) {
                int error;
 
@@ -2465,34 +2372,55 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
                        return -ENOMEM;
 
+               /*
+                * mas_pause() is not needed since mas->index needs to be set
+                * differently than vma->vm_end anyways.
+                */
                error = __split_vma(mm, vma, start, 0);
                if (error)
                        return error;
-               prev = vma;
-               vma = __vma_next(mm, prev);
-               mas->index = start;
-               mas_reset(mas);
-       } else {
-               prev = vma->vm_prev;
+
+               mas_set(mas, start);
+               vma = mas_walk(mas);
        }
 
-       if (vma->vm_end >= end)
-               last = vma;
-       else
-               last = find_vma_intersection(mm, end - 1, end);
+       prev = mas_prev(mas, 0);
+       if (unlikely((!prev)))
+               mas_set(mas, start);
 
-       /* Does it split the last one? */
-       if (last && end < last->vm_end) {
-               int error = __split_vma(mm, last, end, 1);
+       /*
+        * Detach a range of VMAs from the mm. Using next as a temp variable as
+        * it is always overwritten.
+        */
+       mas_for_each(mas, next, end - 1) {
+               /* Does it split the end? */
+               if (next->vm_end > end) {
+                       struct vm_area_struct *split;
+                       int error;
 
-               if (error)
-                       return error;
+                       error = __split_vma(mm, next, end, 1);
+                       if (error)
+                               return error;
 
-               if (vma == last)
-                       vma = __vma_next(mm, prev);
-               mas_reset(mas);
+                       mas_set(mas, end);
+                       split = mas_prev(mas, 0);
+                       munmap_sidetree(split, &mas_detach);
+                       count++;
+                       if (vma == next)
+                               vma = split;
+                       break;
+               }
+               count++;
+               munmap_sidetree(next, &mas_detach);
+#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
+               BUG_ON(next->vm_start < start);
+               BUG_ON(next->vm_start > end);
+#endif
        }
 
+       if (!next)
+               next = mas_next(mas, ULONG_MAX);
+
        if (unlikely(uf)) {
                /*
                 * If userfaultfd_unmap_prep returns an error the vmas
@@ -2509,35 +2437,36 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                        return error;
        }
 
-       /*
-        * unlock any mlock()ed ranges before detaching vmas, count the number
-        * of VMAs to be dropped, and return the tail entry of the affected
-        * area.
-        */
-       mm->map_count -= unlock_range(vma, &last, end);
-       /* Drop removed area from the tree */
+       /* Point of no return */
+       mas_set_range(mas, start, end - 1);
+#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+       /* Make sure no VMAs are about to be lost. */
+       {
+               MA_STATE(test, &mt_detach, start, end - 1);
+               struct vm_area_struct *vma_mas, *vma_test;
+               int test_count = 0;
+
+               rcu_read_lock();
+               vma_test = mas_find(&test, end - 1);
+               mas_for_each(mas, vma_mas, end - 1) {
+                       BUG_ON(vma_mas != vma_test);
+                       test_count++;
+                       vma_test = mas_next(&test, end - 1);
+               }
+               rcu_read_unlock();
+               BUG_ON(count != test_count);
+               mas_set_range(mas, start, end - 1);
+       }
+#endif
        mas_store_gfp(mas, NULL, GFP_KERNEL);
-
-       /* Detach vmas from the MM linked list */
-       vma->vm_prev = NULL;
-       if (prev)
-               prev->vm_next = last->vm_next;
-       else
-               mm->mmap = last->vm_next;
-
-       if (last->vm_next) {
-               last->vm_next->vm_prev = prev;
-               last->vm_next = NULL;
-       } else
-               mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
-
+       mm->map_count -= count;
        /*
         * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
         * VM_GROWSUP VMA. Such VMAs can change their size under
         * down_read(mmap_lock) and collide with the VMA we are about to unmap.
         */
        if (downgrade) {
-               if (last && (last->vm_flags & VM_GROWSDOWN))
+               if (next && (next->vm_flags & VM_GROWSDOWN))
                        downgrade = false;
                else if (prev && (prev->vm_flags & VM_GROWSUP))
                        downgrade = false;
@@ -2545,10 +2474,12 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                        mmap_write_downgrade(mm);
        }
 
-       unmap_region(mm, vma, prev, start, end);
-
-       /* Fix up all other VM information */
-       remove_vma_list(mm, vma);
+       unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+       /* Statistics and freeing VMAs */
+       mas_set(&mas_detach, start);
+       remove_mt(mm, &mas_detach);
+       validate_mm(mm);
+       __mt_destroy(&mt_detach);
 
        return downgrade ? 1 : 0;
 }
@@ -2773,7 +2704,6 @@ cannot_expand:
                i_mmap_lock_write(vma->vm_file->f_mapping);
 
        vma_mas_store(vma, &mas);
-       __vma_link_list(mm, vma, prev);
        mm->map_count++;
        if (vma->vm_file) {
                if (vma->vm_flags & VM_SHARED)
@@ -2825,7 +2755,7 @@ unmap_and_free_vma:
        vma->vm_file = NULL;
 
        /* Undo any partial mapping done by a device driver. */
-       unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+       unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
        charged = 0;
        if (vm_flags & VM_SHARED)
                mapping_unmap_writable(file->f_mapping);
@@ -2914,11 +2844,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                goto out;
 
        if (start + size > vma->vm_end) {
-               struct vm_area_struct *next;
+               VMA_ITERATOR(vmi, mm, vma->vm_end);
+               struct vm_area_struct *next, *prev = vma;
 
-               for (next = vma->vm_next; next; next = next->vm_next) {
+               for_each_vma_range(vmi, next, start + size) {
                        /* hole between vmas ? */
-                       if (next->vm_start != next->vm_prev->vm_end)
+                       if (next->vm_start != prev->vm_end)
                                goto out;
 
                        if (next->vm_file != vma->vm_file)
@@ -2927,8 +2858,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                        if (next->vm_flags != vma->vm_flags)
                                goto out;
 
-                       if (start + size <= next->vm_end)
-                               break;
+                       prev = next;
                }
 
                if (!next)
@@ -2974,7 +2904,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                         struct list_head *uf)
 {
        struct mm_struct *mm = vma->vm_mm;
-       struct vm_area_struct unmap;
+       struct vm_area_struct unmap, *next;
        unsigned long unmap_pages;
        int ret;
 
@@ -2991,6 +2921,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        ret = userfaultfd_unmap_prep(mm, newbrk, oldbrk, uf);
        if (ret)
                return ret;
+
        ret = 1;
 
        /* Change the oldbrk of vma to the newbrk of the munmap area */
@@ -3004,9 +2935,12 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
        vma_init(&unmap, mm);
        unmap.vm_start = newbrk;
        unmap.vm_end = oldbrk;
+       if (vma->anon_vma)
+               vma_set_anonymous(&unmap);
        if (vma_mas_remove(&unmap, mas))
                goto mas_store_fail;
 
+       vma->vm_end = newbrk;
        if (vma->anon_vma) {
                anon_vma_interval_tree_post_update_vma(vma);
                anon_vma_unlock_write(vma->anon_vma);
@@ -3018,8 +2952,9 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
                munlock_vma_pages_range(&unmap, newbrk, oldbrk);
        }
 
+       next = mas_next(mas, ULONG_MAX);
        mmap_write_downgrade(mm);
-       unmap_region(mm, &unmap, vma, newbrk, oldbrk);
+       unmap_region(mm, mas->tree, &unmap, vma, next, newbrk, oldbrk);
        /* Statistics */
        vm_stat_account(mm, vma->vm_flags, -unmap_pages);
        if (vma->vm_flags & VM_ACCOUNT)
@@ -3030,6 +2965,7 @@ munmap_full_vma:
        return ret;
 
 mas_store_fail:
+       mas_unlock(mas);
        vma->vm_end = oldbrk;
        if (vma->anon_vma) {
                anon_vma_interval_tree_post_update_vma(vma);
@@ -3051,11 +2987,9 @@ mas_store_fail:
  * do some brk-specific accounting here.
  */
 static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
-                       unsigned long addr, unsigned long len,
-                       unsigned long flags)
+               unsigned long addr, unsigned long len, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *prev = NULL;
        validate_mm_mt(mm);
 
 
@@ -3098,7 +3032,6 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
                khugepaged_enter_vma_merge(vma, flags);
                goto out;
        }
-       prev = vma;
 
        /* create a vma struct for an anonymous mapping */
        vma = vm_area_alloc(mm);
@@ -3111,13 +3044,15 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
        vma->vm_pgoff = addr >> PAGE_SHIFT;
        vma->vm_flags = flags;
        vma->vm_page_prot = vm_get_page_prot(flags);
+       if (vma->vm_file)
+               i_mmap_lock_write(vma->vm_file->f_mapping);
        vma_mas_store(vma, mas);
-
-       if (!prev)
-               prev = mas_prev(mas, 0);
-
-       __vma_link_list(mm, vma, prev);
        mm->map_count++;
+       if (vma->vm_file) {
+               __vma_link_file(vma);
+               i_mmap_unlock_write(vma->vm_file->f_mapping);
+       }
+
 out:
        perf_event_mmap(vma);
        mm->total_vm += len >> PAGE_SHIFT;
@@ -3125,7 +3060,7 @@ out:
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
        vma->vm_flags |= VM_SOFTDIRTY;
-       validate_mm_mt(mm);
+       validate_mm(mm);
        return 0;
 
 vma_alloc_fail:
@@ -3197,6 +3132,8 @@ void exit_mmap(struct mm_struct *mm)
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       int count = 0;
 
        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);
@@ -3214,8 +3151,8 @@ void exit_mmap(struct mm_struct *mm)
                 * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
                 * __oom_reap_task_mm() will not block.
                 *
-                * This needs to be done before calling unlock_range(),
-                * which clears VM_LOCKED, otherwise the oom reaper cannot
+                * This needs to be done before unlocking the VMAs in the loop
+                * below which clears VM_LOCKED, otherwise the oom reaper cannot
                 * reliably test it.
                 */
                (void)__oom_reap_task_mm(mm);
@@ -3224,12 +3161,19 @@ void exit_mmap(struct mm_struct *mm)
        }
 
        mmap_write_lock(mm);
-       if (mm->locked_vm)
-               unlock_range(mm->mmap, &vma, ULONG_MAX);
+       if (mm->locked_vm) {
+               mas_for_each(&mas, vma, ULONG_MAX) {
+                       if (vma->vm_flags & VM_LOCKED) {
+                               mm->locked_vm -= vma_pages(vma);
+                               munlock_vma_pages_all(vma);
+                       }
+               }
+               mas_set(&mas, 0);
+       }
 
        arch_exit_mmap(mm);
 
-       vma = mm->mmap;
+       vma = mas_find(&mas, ULONG_MAX);
        if (!vma) {
                /* Can happen if dup_mmap() received an OOM */
                mmap_write_unlock(mm);
@@ -3241,17 +3185,24 @@ void exit_mmap(struct mm_struct *mm)
        tlb_gather_mmu_fullmm(&tlb, mm);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       unmap_vmas(&tlb, vma, 0, -1);
-       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+       unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX);
+       free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
        tlb_finish_mmu(&tlb);
 
-       /* Walk the list again, actually closing and freeing it. */
-       while (vma) {
+       /*
+        * Walk the list again, actually closing and freeing it, with preemption
+        * enabled, without holding any MM locks besides the unreachable
+        * mmap_write_lock.
+        */
+       do {
                if (vma->vm_flags & VM_ACCOUNT)
                        nr_accounted += vma_pages(vma);
-               vma = remove_vma(vma);
+               remove_vma(vma);
+               count++;
                cond_resched();
-       }
+       } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
+
+       BUG_ON(count != mm->map_count);
 
        mmap_write_unlock(mm);
        trace_exit_mmap(mm);
@@ -3291,7 +3242,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
                vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
        }
 
-       vma_link(mm, vma, prev);
+       vma_link(mm, vma);
        return 0;
 }
 
@@ -3319,7 +3270,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                faulted_in_anon_vma = false;
        }
 
-       if (range_has_overlap(mm, addr, addr + len, &prev))
+       new_vma = find_vma_prev(mm, addr, &prev);
+       if (new_vma->vm_start < addr + len)
                return NULL;    /* should never get here */
 
        new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -3362,7 +3314,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        get_file(new_vma->vm_file);
                if (new_vma->vm_ops && new_vma->vm_ops->open)
                        new_vma->vm_ops->open(new_vma);
-               vma_link(mm, new_vma, prev);
+               vma_link(mm, new_vma);
                *need_rmap_locks = false;
        }
        validate_mm_mt(mm);
@@ -3662,12 +3614,13 @@ int mm_take_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        BUG_ON(mmap_read_trylock(mm));
 
        mutex_lock(&mm_all_locks_mutex);
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3675,7 +3628,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3683,7 +3637,8 @@ int mm_take_all_locks(struct mm_struct *mm)
                        vm_lock_mapping(mm, vma->vm_file->f_mapping);
        }
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_set(&mas, 0);
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (signal_pending(current))
                        goto out_unlock;
                if (vma->anon_vma)
@@ -3742,11 +3697,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
 {
        struct vm_area_struct *vma;
        struct anon_vma_chain *avc;
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        BUG_ON(mmap_read_trylock(mm));
        BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
 
-       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+       mas_for_each(&mas, vma, ULONG_MAX) {
                if (vma->anon_vma)
                        list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
                                vm_unlock_anon_vma(avc->anon_vma);
index 645d11d3a8ab613784a155db687ce037f5c7cb0a..8b801f5c9ef9c54e3ebe58f8675a68624818d40c 100644 (file)
@@ -551,7 +551,6 @@ static void put_nommu_region(struct vm_region *region)
 static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct address_space *mapping;
-       struct vm_area_struct *prev;
        MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);
 
        BUG_ON(!vma->vm_region);
@@ -570,11 +569,8 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
                i_mmap_unlock_write(mapping);
        }
 
-       prev = mas_prev(&mas, 0);
-       mas_reset(&mas);
        /* add the VMA to the tree */
        vma_mas_store(vma, &mas);
-       __vma_link_list(mm, vma, prev);
 }
 
 /*
@@ -599,7 +595,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 
        /* remove from the MM's tree and list */
        vma_mas_remove(vma, &mas);
-       __vma_unlink_list(vma->vm_mm, vma);
 }
 
 /*
index fcad6c87f9a0798cb116c84e4ca0b6a5bae7d4ad..9b969da621f60c25a93a56188f21956fab358ac2 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -271,46 +271,6 @@ void *memdup_user_nul(const void __user *src, size_t len)
 }
 EXPORT_SYMBOL(memdup_user_nul);
 
-void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct vm_area_struct *prev)
-{
-       struct vm_area_struct *next;
-
-       vma->vm_prev = prev;
-       if (prev) {
-               next = prev->vm_next;
-               prev->vm_next = vma;
-       } else {
-               next = mm->mmap;
-               mm->mmap = vma;
-       }
-       vma->vm_next = next;
-       if (next)
-               next->vm_prev = vma;
-       else
-               mm->highest_vm_end = vm_end_gap(vma);
-}
-
-void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma)
-{
-       struct vm_area_struct *prev, *next;
-
-       next = vma->vm_next;
-       prev = vma->vm_prev;
-       if (prev)
-               prev->vm_next = next;
-       else
-               mm->mmap = next;
-       if (next) {
-               next->vm_prev = prev;
-       } else {
-               if (prev)
-                       mm->highest_vm_end = vm_end_gap(prev);
-               else
-                       mm->highest_vm_end = 0;
-       }
-}
-
 /* Check if the vma is being used as a stack by this task */
 int vma_is_stack_for_current(struct vm_area_struct *vma)
 {