]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mmap/fork/mm.h: Add maple tree support.
authorLiam R. Howlett <Liam.Howlett@Oracle.com>
Fri, 14 Feb 2020 17:04:34 +0000 (12:04 -0500)
committerLiam R. Howlett <Liam.Howlett@Oracle.com>
Fri, 30 Oct 2020 18:59:00 +0000 (14:59 -0400)
When forking, init the maple tree and add entries.

mmap: Clean up prints.

mm: Export vma_store for adding things to mt in fork.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
include/linux/mm.h
kernel/fork.c
mm/mmap.c

index ef360fe70aafcf6d2e40bc5b89a4cd0fc82a1bc9..78d5bc3bbf345922fa48f9d8106a177cc18797dd 100644 (file)
@@ -2467,6 +2467,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
+/* maple_tree */
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma);
 
 /* interval_tree.c */
 void vma_interval_tree_insert(struct vm_area_struct *node,
index 32083db7a2a23e93bf2ed9786c92769a0e9c7136..1174565a3b96fe478231346d3af6e3e134270364 100644 (file)
@@ -474,6 +474,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
        int retval;
        unsigned long charge;
        LIST_HEAD(uf);
+       MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
+       struct vm_area_struct *old_vma;
 
        uprobe_start_dup_mmap();
        if (mmap_write_lock_killable(oldmm)) {
@@ -506,6 +508,98 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                goto out;
 
        prev = NULL;
+#if 0
+       mas_for_each(&old_mas, old_vma, ULONG_MAX) {
+               struct file *file;
+
+               if (mas_retry(&old_mas, old_vma))
+                       continue;
+
+               mas_pause(&old_mas);
+               mas_unlock(&old_mas);
+
+               if (old_vma->vm_flags & VM_DONTCOPY) {
+                       vm_stat_account(mm, old_vma->vm_flags,
+                                       -vma_pages(old_vma));
+                       continue;
+               }
+               charge = 0;
+               /*
+                * Don't duplicate many vmas if we've been oom-killed (for
+                * example)
+                */
+               if (fatal_signal_pending(current)) {
+                       retval = -EINTR;
+                       goto out;
+               }
+               if (old_vma->vm_flags & VM_ACCOUNT) {
+                       unsigned long len = vma_pages(old_vma);
+
+                       if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
+                               goto fail_nomem;
+                       charge = len;
+               }
+               tmp = vm_area_dup(old_vma);
+               if (!tmp)
+                       goto fail_nomem;
+               retval = vma_dup_policy(old_vma, tmp);
+               if (retval)
+                       goto fail_nomem_policy;
+               tmp->vm_mm = mm;
+               retval = dup_userfaultfd(tmp, &uf);
+               if (retval)
+                       goto fail_nomem_anon_vma_fork;
+               if (tmp->vm_flags & VM_WIPEONFORK) {
+                       /* VM_WIPEONFORK gets a clean slate in the child. */
+                       tmp->anon_vma = NULL;
+                       if (anon_vma_prepare(tmp))
+                               goto fail_nomem_anon_vma_fork;
+               } else if (anon_vma_fork(tmp, old_vma))
+                       goto fail_nomem_anon_vma_fork;
+               tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
+               tmp->vm_next = tmp->vm_prev = NULL;
+               file = tmp->vm_file;
+               if (file) {
+                       struct inode *inode = file_inode(file);
+                       struct address_space *mapping = file->f_mapping;
+
+                       get_file(file);
+                       if (tmp->vm_flags & VM_DENYWRITE)
+                               atomic_dec(&inode->i_writecount);
+                       i_mmap_lock_write(mapping);
+                       if (tmp->vm_flags & VM_SHARED)
+                               atomic_inc(&mapping->i_mmap_writable);
+                       flush_dcache_mmap_lock(mapping);
+                       /* insert tmp into the share list, just after old_vma*/
+                       vma_interval_tree_insert_after(tmp, old_vma,
+                                       &mapping->i_mmap);
+                       flush_dcache_mmap_unlock(mapping);
+                       i_mmap_unlock_write(mapping);
+               }
+
+               /*
+                * Clear hugetlb-related page reserves for children. This only
+                * affects MAP_PRIVATE mappings. Faults generated by the child
+                * are not guaranteed to succeed, even if read-only
+                */
+               if (is_vm_hugetlb_page(tmp))
+                       reset_vma_resv_huge_pages(tmp);
+
+               /* Actually insert the vma into the new mm. */
+               vma_store(mm, tmp);
+
+               mm->map_count++;
+               if (!(tmp->vm_flags & VM_WIPEONFORK))
+                       retval = copy_page_range(mm, oldmm, old_vma);
+
+               if (tmp->vm_ops && tmp->vm_ops->open)
+                       tmp->vm_ops->open(tmp);
+
+               if (retval)
+                       goto out;
+               mas_lock(&old_mas);
+       }
+#endif
        for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
                struct file *file;
 
@@ -588,6 +682,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                rb_link = &tmp->vm_rb.rb_right;
                rb_parent = &tmp->vm_rb;
 
+               /* Link the vma into the MT */
+               vma_store(mm, tmp);
+
                mm->map_count++;
                if (!(tmp->vm_flags & VM_WIPEONFORK))
                        retval = copy_page_range(tmp, mpnt);
@@ -1004,6 +1101,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 {
        mm->mmap = NULL;
        mm->mm_rb = RB_ROOT;
+       mt_init_flags(&mm->mm_mt, MAPLE_ALLOC_RANGE);
        mm->vmacache_seqnum = 0;
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
index 12cdf82fd7fe32a025e3f0004aa5dd100f9a03fe..5b8dd97f195a8f4fc22f7eeb8c597abacd9f461a 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -312,9 +312,7 @@ static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
        }
        return gap;
 }
-#define CONFIG_DEBUG_VM_RB
-#define CONFIG_DEBUG_MAPLE_TREE
-#if defined(CONFIG_DEBUG_VM_RB) || defined(CONFIG_DEBUG_MAPLE_TREE)
+#if defined(CONFIG_DEBUG_VM_RB)
 static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
 {
        unsigned long max = vma_compute_gap(vma), subtree_gap;
@@ -380,8 +378,23 @@ static int browse_rb(struct mm_struct *mm)
        }
        return bug ? -1 : i;
 }
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
 extern void mt_dump(const struct maple_tree *mt);
 
+static void __vma_mt_dump(struct mm_struct *mm)
+{
+       MA_STATE(mas, &mm->mm_mt, 0, 0);
+       struct vm_area_struct *entry = NULL;
+       rcu_read_lock();
+       mas_for_each(&mas, entry, ULONG_MAX) {
+               if (mas_retry(&mas, entry))
+                       continue;
+
+               pr_debug("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start,
+                               entry->vm_end, mas.index, mas.last);
+       }
+       rcu_read_unlock();
+}
 /* Validate the maple tree
  *
  */
@@ -412,8 +425,8 @@ static void validate_mm_mt(struct mm_struct *mm,
        VM_BUG_ON(vma);
 
        rcu_read_unlock();
-       //printk("%s: done\n", __func__);
 }
+#endif
 static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
 {
        struct rb_node *nd;
@@ -440,40 +453,41 @@ static void validate_mm(struct mm_struct *mm)
        int mt_i = 0;
 
 
-       //printk("%s: vma linked list\n", __func__);
        while (vma) {
                struct anon_vma *anon_vma = vma->anon_vma;
                struct anon_vma_chain *avc;
 
-               //pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end);
+               pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end);
                if (anon_vma) {
-               //      pr_cont(" anon");
+                       pr_cont(" anon");
                        anon_vma_lock_read(anon_vma);
-//                     list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
-//                             anon_vma_interval_tree_verify(avc);
+                       list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+                               anon_vma_interval_tree_verify(avc);
                        anon_vma_unlock_read(anon_vma);
                }
-               //pr_cont("\n");
+               pr_cont("\n");
 
                highest_address = vm_end_gap(vma);
                vma = vma->vm_next;
                i++;
        }
-       //printk("%s: mas for each\n", __func__);
        rcu_read_lock();
        mas_for_each(&mas, entry, ULONG_MAX) {
                if (mas_retry(&mas, entry))
                        continue;
 
-       //      printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end,
-       //                      mas.index, mas.last);
+               if (entry->vm_end != mas.last + 1) {
+                       printk("vma: entry %lu-%lu\tmt %lu-%lu\n",
+                                       entry->vm_start, entry->vm_end,
+                                       mas.index, mas.last);
+                       mt_dump(mas.tree);
+               }
                VM_BUG_ON_MM(entry->vm_end != mas.last + 1, mm);
                VM_BUG_ON_MM(entry->vm_start != mas.index, mm);
                mt_highest_address = vm_end_gap(entry);
                mt_i++;
        }
        rcu_read_unlock();
-       //printk("%s: mas for each done\n", __func__);
        if (i != mt_i) {
                pr_emerg("%s: %d != %d\n", __func__, i, mt_i);
                mt_dump(mas.tree);
@@ -487,7 +501,7 @@ static void validate_mm(struct mm_struct *mm)
        }
        if (highest_address != mm->highest_vm_end) {
                pr_emerg("mm->highest_vm_end %lx, found %lx\n",
-                         mm->highest_vm_end, highest_address);
+                               mm->highest_vm_end, highest_address);
                bug = 1;
        }
        i = browse_rb(mm);
@@ -500,6 +514,7 @@ static void validate_mm(struct mm_struct *mm)
 }
 #else
 #define validate_mm_rb(root, ignore) do { } while (0)
+#define validate_mm_mt(root, ignore) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif
 
@@ -529,7 +544,6 @@ static inline void vma_rb_insert(struct vm_area_struct *vma,
        /* All rb_subtree_gap values must be consistent prior to insertion */
        validate_mm_rb(root, NULL);
 
-       //printk("insert augmented %lu-%lu\n", vma->vm_start, vma->vm_end);
        rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
 }
 
@@ -616,8 +630,6 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
                __rb_parent = *__rb_link;
                vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
 
-               //printk("%s: checking %lu-%lu\n", __func__,
-               //              vma_tmp->vm_start, vma_tmp->vm_end);
                if (vma_tmp->vm_end > addr) {
                        /* Fail if an existing vma overlaps the area */
                        if (vma_tmp->vm_start < end)
@@ -789,34 +801,19 @@ static void __vma_link_file(struct vm_area_struct *vma)
                flush_dcache_mmap_unlock(mapping);
        }
 }
-static void __vma_mt_dump(struct mm_struct *mm)
-{
-       MA_STATE(mas, &mm->mm_mt, 0, 0);
-       struct vm_area_struct *entry = NULL;
-       rcu_read_lock();
-       mas_for_each(&mas, entry, ULONG_MAX) {
-               if (mas_retry(&mas, entry))
-                       continue;
-
-               printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end,
-                               mas.index, mas.last);
-       }
-       rcu_read_unlock();
-
-}
 static void __vma_mt_erase(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       printk("mt_mod %px ERASE, %lu, %lu,\n",
-                       mm, vma->vm_start, vma->vm_start);
        mtree_erase(&mm->mm_mt, vma->vm_start);
 }
 static void __vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-       printk("mt_mod %px STORE, %lu, %lu,\n",
-                       mm, vma->vm_start, vma->vm_end - 1);
        mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma,
                GFP_KERNEL);
 }
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+       __vma_mt_store(mm, vma);
+}
 // LRH: Needed - update linked list, should fine.
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -858,25 +855,11 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        struct vm_area_struct *prev;
        struct rb_node **rb_link, *rb_parent;
-       unsigned long vm_start = vma->vm_start;
-       struct vm_area_struct *overlap = NULL;
 
        if (find_vma_links(mm, vma->vm_start, vma->vm_end,
                           &prev, &rb_link, &rb_parent))
                BUG();
 
-       //printk("going to insert %lx: vma %lu-%lu\n", (unsigned long) current, vma->vm_start, vma->vm_end);
-       if ((overlap = mt_find(&mm->mm_mt, &vm_start, vma->vm_end - 1, true)) != NULL) {
-               /*
-               printk("Found vma ending at %lu\n", vm_start - 1);
-               printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
-                               overlap->vm_start, overlap->vm_end);
-               printk("rbtree:\n");
-               */
-               browse_rb(mm);
-#define CONFIG_DEBUG_MAPLE_TREE
-               //mt_dump(&mm->mm_mt);
-       }
        __vma_link(mm, vma, prev, rb_link, rb_parent);
        mm->map_count++;
 }
@@ -1028,14 +1011,12 @@ again:
        if (!anon_vma && adjust_next)
                anon_vma = next->anon_vma;
        if (anon_vma) {
-               browse_rb(mm);
                VM_WARN_ON(adjust_next && next->anon_vma &&
                           anon_vma != next->anon_vma);
                anon_vma_lock_write(anon_vma);
                anon_vma_interval_tree_pre_update_vma(vma);
                if (adjust_next)
                        anon_vma_interval_tree_pre_update_vma(next);
-               browse_rb(mm);
        }
 
        if (file) {
@@ -1375,7 +1356,6 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
        if (area && area->vm_end == end)                /* cases 6, 7, 8 */
                next = next->vm_next;
 
-       //printk("%s: %lx: vma %lu-%lu\n", __func__, (unsigned long) current, addr, end);
        /* verify some invariant that must be enforced by the caller */
        VM_WARN_ON(prev && addr <= prev->vm_start);
        VM_WARN_ON(area && end > area->vm_end);
@@ -2130,6 +2110,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       unsigned long gap;
        MA_STATE(mas, &mm->mm_mt, 0, 0);
 
        /* Adjust search length to account for worst case alignment overhead */
@@ -2137,6 +2118,16 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
        if (length < info->length)
                return -ENOMEM;
 
+
+       // Maple tree is self contained.
+       rcu_read_lock();
+       if (mas_get_unmapped_area(&mas, info->low_limit,
+                               info->high_limit - 1, length))
+               return -ENOMEM;
+       rcu_read_unlock();
+       gap = mas.index;
+       gap += (info->align_offset - gap) & info->align_mask;
+
        /* Adjust search limits by the desired length */
        if (info->high_limit < length)
                return -ENOMEM;
@@ -2146,12 +2137,6 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
                return -ENOMEM;
        low_limit = info->low_limit + length;
 
-       // Maple tree is self contained.
-       rcu_read_lock();
-       if (mas_get_unmapped_area(&mas, low_limit, high_limit, length))
-               return -ENOMEM;
-       rcu_read_unlock();
-
        /* Check if rbtree root looks promising */
        if (RB_EMPTY_ROOT(&mm->mm_rb))
                goto check_highest;
@@ -2225,7 +2210,7 @@ found:
        VM_BUG_ON(gap_start + info->length > info->high_limit);
        VM_BUG_ON(gap_start + info->length > gap_end);
 
-       //VM_BUG_ON(mas.index != gap_start);
+       VM_BUG_ON(gap != gap_start);
        return gap_start;
 }
 
@@ -2234,13 +2219,22 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long length, low_limit, high_limit, gap_start, gap_end;
+       unsigned long gap;
        MA_STATE(mas, &mm->mm_mt, 0, 0);
 
+       validate_mm_mt(mm, NULL);
+
        /* Adjust search length to account for worst case alignment overhead */
        length = info->length + info->align_mask;
        if (length < info->length)
                return -ENOMEM;
 
+       rcu_read_lock();
+       if (mas_get_unmapped_area_rev(&mas, info->low_limit,
+                       info->high_limit - 1, length))
+               return -ENOMEM;
+       rcu_read_unlock();
+       gap = ((mas.index) & ~info->align_mask) + info->align_offset;
        /*
         * Adjust search limits by the desired length.
         * See implementation comment at top of unmapped_area().
@@ -2254,12 +2248,6 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
                return -ENOMEM;
        low_limit = info->low_limit + length;
 
-
-       rcu_read_lock();
-       if (mas_get_unmapped_area_rev(&mas, low_limit, high_limit, length))
-               return -ENOMEM;
-       rcu_read_unlock();
-
        /* Check highest gap, which does not precede any rbtree node */
        gap_start = mm->highest_vm_end;
        if (gap_start <= high_limit)
@@ -2332,8 +2320,13 @@ found_highest:
 
        VM_BUG_ON(gap_end < info->low_limit);
        VM_BUG_ON(gap_end < gap_start);
-       
-       VM_BUG_ON(mas.last + 1  != gap_end);
+
+       if (gap != gap_end) {
+               pr_err("Gap was found: mt %lu gap_end %lu\n", gap, gap_end);
+               mt_dump(&mm->mm_mt);
+       }
+       VM_BUG_ON(gap != gap_end);
+
        return gap_end;
 }
 
@@ -2528,14 +2521,12 @@ struct vm_area_struct *mt_find_vma(struct mm_struct *mm, unsigned long addr)
 {
        struct vm_area_struct *vma;
 
-       //printk("%s: looking up %lu\n", __func__, addr);
        /* Check the cache first. */
        vma = vmacache_find(mm, addr);
        if (likely(vma))
                return vma;
 
        vma = mt_find(&mm->mm_mt, &addr, ULONG_MAX, 0);
-       //printk("Found %lu\n", (unsigned long)vma);
 
        if (vma)
                vmacache_update(addr, vma);
@@ -2577,10 +2568,13 @@ struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr)
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
        struct vm_area_struct *ret = rb_find_vma(mm, addr);
-       struct vm_area_struct *ret2 = mt_find_vma(mm, addr);
-       if (ret != ret2)
-               printk("%px %lu: ret %px ret2 %px\n", mm, addr, ret, ret2);
-       VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)ret2 , ret);
+       struct vm_area_struct *mt_ret = mt_find_vma(mm, addr);
+       if (ret != mt_ret) {
+               pr_err("Looking for %lu\n", addr);
+               mt_dump(&mm->mm_mt);
+               pr_err("%px %lu: ret %px mt_ret %px\n", mm, addr, ret, mt_ret);
+       }
+       VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)mt_ret , ret);
        return ret;
 }
 
@@ -2707,8 +2701,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
        if (!(vma->vm_flags & VM_GROWSUP))
                return -EFAULT;
 
-       printk("%s: %lu-%lu expand to %lu-%lu\n", __func__, vma->vm_start, vma->vm_end,
-                       vma->vm_start, address);
        /* Guard against exceeding limits of the address space. */
        address &= PAGE_MASK;
        if (address >= (TASK_SIZE & PAGE_MASK))
@@ -2855,8 +2847,7 @@ int expand_downwards(struct vm_area_struct *vma,
                vma->vm_start = address;
                vma->vm_pgoff -= grow;
                // Overwrite old entry in mtree.
-               mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1,
-                                 vma, GFP_KERNEL);
+               __vma_mt_store(mm, vma);
                anon_vma_interval_tree_post_update_vma(vma);
                vma_gap_update(vma);
                spin_unlock(&mm->page_table_lock);
@@ -3556,10 +3547,9 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
                           &prev, &rb_link, &rb_parent))
                return -ENOMEM;
 
-       //printk("%s: insert %lx: vma %lu-%lu\n", __func__, (unsigned long) current, vma->vm_start, vma->vm_end);
        if ((overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1, true)) != NULL) {
-               printk("Found vma ending at %lu\n", start - 1);
-               printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
+               pr_err("Found vma ending at %lu\n", start - 1);
+               pr_err("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
                                overlap->vm_start, overlap->vm_end - 1);
                mt_dump(&mm->mm_mt);
                BUG();