From 18f5a8a6d9db2c4722571cd1aa6825467cf39da4 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 14 Feb 2020 12:04:34 -0500 Subject: [PATCH] mmap/fork/mm.h: Add maple tree support. When forking, init the maple tree and add entries. mmap: Clean up prints. mm: Export vma_store for adding things to mt in fork. Signed-off-by: Liam R. Howlett --- include/linux/mm.h | 2 + kernel/fork.c | 98 +++++++++++++++++++++++++++++ mm/mmap.c | 154 +++++++++++++++++++++------------------------ 3 files changed, 172 insertions(+), 82 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..9cbbbbc3043a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2467,6 +2467,8 @@ extern bool arch_has_descending_max_zone_pfns(void); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); +/* maple_tree */ +void vma_store(struct mm_struct *mm, struct vm_area_struct *vma); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, diff --git a/kernel/fork.c b/kernel/fork.c index 6d266388d380..bd53ef38e298 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -474,6 +474,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, int retval; unsigned long charge; LIST_HEAD(uf); + MA_STATE(old_mas, &oldmm->mm_mt, 0, 0); + struct vm_area_struct *old_vma; uprobe_start_dup_mmap(); if (mmap_write_lock_killable(oldmm)) { @@ -506,6 +508,98 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto out; prev = NULL; +#if 0 + mas_for_each(&old_mas, old_vma, ULONG_MAX) { + struct file *file; + + if (mas_retry(&old_mas, old_vma)) + continue; + + mas_pause(&old_mas); + mas_unlock(&old_mas); + + if (old_vma->vm_flags & VM_DONTCOPY) { + vm_stat_account(mm, old_vma->vm_flags, + -vma_pages(old_vma)); + continue; + } + charge = 0; + /* + * Don't duplicate many vmas if we've been oom-killed (for + * example) + */ + if (fatal_signal_pending(current)) { + retval = -EINTR; + goto out; + } + if (old_vma->vm_flags & VM_ACCOUNT) { + unsigned long len = vma_pages(old_vma); + + if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ + goto fail_nomem; + charge = len; + } + tmp = vm_area_dup(old_vma); + if (!tmp) + goto fail_nomem; + retval = vma_dup_policy(old_vma, tmp); + if (retval) + goto fail_nomem_policy; + tmp->vm_mm = mm; + retval = dup_userfaultfd(tmp, &uf); + if (retval) + goto fail_nomem_anon_vma_fork; + if (tmp->vm_flags & VM_WIPEONFORK) { + /* VM_WIPEONFORK gets a clean slate in the child. */ + tmp->anon_vma = NULL; + if (anon_vma_prepare(tmp)) + goto fail_nomem_anon_vma_fork; + } else if (anon_vma_fork(tmp, old_vma)) + goto fail_nomem_anon_vma_fork; + tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); + tmp->vm_next = tmp->vm_prev = NULL; + file = tmp->vm_file; + if (file) { + struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; + + get_file(file); + if (tmp->vm_flags & VM_DENYWRITE) + atomic_dec(&inode->i_writecount); + i_mmap_lock_write(mapping); + if (tmp->vm_flags & VM_SHARED) + atomic_inc(&mapping->i_mmap_writable); + flush_dcache_mmap_lock(mapping); + /* insert tmp into the share list, just after old_vma*/ + vma_interval_tree_insert_after(tmp, old_vma, + &mapping->i_mmap); + flush_dcache_mmap_unlock(mapping); + i_mmap_unlock_write(mapping); + } + + /* + * Clear hugetlb-related page reserves for children. This only + * affects MAP_PRIVATE mappings. Faults generated by the child + * are not guaranteed to succeed, even if read-only + */ + if (is_vm_hugetlb_page(tmp)) + reset_vma_resv_huge_pages(tmp); + + /* Actually insert the vma into the new mm. */ + vma_store(mm, tmp); + + mm->map_count++; + if (!(tmp->vm_flags & VM_WIPEONFORK)) + retval = copy_page_range(mm, oldmm, old_vma); + + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + + if (retval) + goto out; + mas_lock(&old_mas); + } +#endif for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; @@ -588,6 +682,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, rb_link = &tmp->vm_rb.rb_right; rb_parent = &tmp->vm_rb; + /* Link the vma into the MT */ + vma_store(mm, tmp); + mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); @@ -1004,6 +1101,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, { mm->mmap = NULL; mm->mm_rb = RB_ROOT; + mt_init_flags(&mm->mm_mt, MAPLE_ALLOC_RANGE); mm->vmacache_seqnum = 0; atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); diff --git a/mm/mmap.c b/mm/mmap.c index 2d04d0a948b7..8bd40be2e8f0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -312,9 +312,7 @@ static inline unsigned long vma_compute_gap(struct vm_area_struct *vma) } return gap; } -#define CONFIG_DEBUG_VM_RB -#define CONFIG_DEBUG_MAPLE_TREE -#if defined(CONFIG_DEBUG_VM_RB) || defined(CONFIG_DEBUG_MAPLE_TREE) +#if defined(CONFIG_DEBUG_VM_RB) static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) { unsigned long max = vma_compute_gap(vma), subtree_gap; @@ -380,8 +378,23 @@ static int browse_rb(struct mm_struct *mm) } return bug ? -1 : i; } +#if defined(CONFIG_DEBUG_MAPLE_TREE) extern void mt_dump(const struct maple_tree *mt); +static void __vma_mt_dump(struct mm_struct *mm) +{ + MA_STATE(mas, &mm->mm_mt, 0, 0); + struct vm_area_struct *entry = NULL; + rcu_read_lock(); + mas_for_each(&mas, entry, ULONG_MAX) { + if (mas_retry(&mas, entry)) + continue; + + pr_debug("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, + entry->vm_end, mas.index, mas.last); + } + rcu_read_unlock(); +} /* Validate the maple tree * */ @@ -412,8 +425,8 @@ static void validate_mm_mt(struct mm_struct *mm, VM_BUG_ON(vma); rcu_read_unlock(); - //printk("%s: done\n", __func__); } +#endif static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) { struct rb_node *nd; @@ -440,40 +453,41 @@ static void validate_mm(struct mm_struct *mm) int mt_i = 0; - //printk("%s: vma linked list\n", __func__); while (vma) { struct anon_vma *anon_vma = vma->anon_vma; struct anon_vma_chain *avc; - //pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end); + pr_cont("vma: %lu-%lu", vma->vm_start, vma->vm_end); if (anon_vma) { - // pr_cont(" anon"); + pr_cont(" anon"); anon_vma_lock_read(anon_vma); -// list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) -// anon_vma_interval_tree_verify(avc); + list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) + anon_vma_interval_tree_verify(avc); anon_vma_unlock_read(anon_vma); } - //pr_cont("\n"); + pr_cont("\n"); highest_address = vm_end_gap(vma); vma = vma->vm_next; i++; } - //printk("%s: mas for each\n", __func__); rcu_read_lock(); mas_for_each(&mas, entry, ULONG_MAX) { if (mas_retry(&mas, entry)) continue; - // printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end, - // mas.index, mas.last); + if (entry->vm_end != mas.last + 1) { + printk("vma: entry %lu-%lu\tmt %lu-%lu\n", + entry->vm_start, entry->vm_end, + mas.index, mas.last); + mt_dump(mas.tree); + } VM_BUG_ON_MM(entry->vm_end != mas.last + 1, mm); VM_BUG_ON_MM(entry->vm_start != mas.index, mm); mt_highest_address = vm_end_gap(entry); mt_i++; } rcu_read_unlock(); - //printk("%s: mas for each done\n", __func__); if (i != mt_i) { pr_emerg("%s: %d != %d\n", __func__, i, mt_i); mt_dump(mas.tree); @@ -487,7 +501,7 @@ static void validate_mm(struct mm_struct *mm) } if (highest_address != mm->highest_vm_end) { pr_emerg("mm->highest_vm_end %lx, found %lx\n", - mm->highest_vm_end, highest_address); + mm->highest_vm_end, highest_address); bug = 1; } i = browse_rb(mm); @@ -500,6 +514,7 @@ static void validate_mm(struct mm_struct *mm) } #else #define validate_mm_rb(root, ignore) do { } while (0) +#define validate_mm_mt(root, ignore) do { } while (0) #define validate_mm(mm) do { } while (0) #endif @@ -529,7 +544,6 @@ static inline void vma_rb_insert(struct vm_area_struct *vma, /* All rb_subtree_gap values must be consistent prior to insertion */ validate_mm_rb(root, NULL); - //printk("insert augmented %lu-%lu\n", vma->vm_start, vma->vm_end); rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); } @@ -616,8 +630,6 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr, __rb_parent = *__rb_link; vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); - //printk("%s: checking %lu-%lu\n", __func__, - // vma_tmp->vm_start, vma_tmp->vm_end); if (vma_tmp->vm_end > addr) { /* Fail if an existing vma overlaps the area */ if (vma_tmp->vm_start < end) @@ -789,34 +801,19 @@ static void __vma_link_file(struct vm_area_struct *vma) flush_dcache_mmap_unlock(mapping); } } -static void __vma_mt_dump(struct mm_struct *mm) -{ - MA_STATE(mas, &mm->mm_mt, 0, 0); - struct vm_area_struct *entry = NULL; - rcu_read_lock(); - mas_for_each(&mas, entry, ULONG_MAX) { - if (mas_retry(&mas, entry)) - continue; - - printk("vma: %lu-%lu\t%lu-%lu\n", entry->vm_start, entry->vm_end, - mas.index, mas.last); - } - rcu_read_unlock(); - -} static void __vma_mt_erase(struct mm_struct *mm, struct vm_area_struct *vma) { - printk("mt_mod %px ERASE, %lu, %lu,\n", - mm, vma->vm_start, vma->vm_start); mtree_erase(&mm->mm_mt, vma->vm_start); } static void __vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma) { - printk("mt_mod %px STORE, %lu, %lu,\n", - mm, vma->vm_start, vma->vm_end - 1); mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma, GFP_KERNEL); } +void vma_store(struct mm_struct *mm, struct vm_area_struct *vma) +{ + __vma_mt_store(mm, vma); +} // LRH: Needed - update linked list, should fine. static void __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, @@ -858,25 +855,11 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) { struct vm_area_struct *prev; struct rb_node **rb_link, *rb_parent; - unsigned long vm_start = vma->vm_start; - struct vm_area_struct *overlap = NULL; if (find_vma_links(mm, vma->vm_start, vma->vm_end, &prev, &rb_link, &rb_parent)) BUG(); - //printk("going to insert %lx: vma %lu-%lu\n", (unsigned long) current, vma->vm_start, vma->vm_end); - if ((overlap = mt_find(&mm->mm_mt, &vm_start, vma->vm_end - 1, true)) != NULL) { - /* - printk("Found vma ending at %lu\n", vm_start - 1); - printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap, - overlap->vm_start, overlap->vm_end); - printk("rbtree:\n"); - */ - browse_rb(mm); -#define CONFIG_DEBUG_MAPLE_TREE - //mt_dump(&mm->mm_mt); - } __vma_link(mm, vma, prev, rb_link, rb_parent); mm->map_count++; } @@ -1028,14 +1011,12 @@ again: if (!anon_vma && adjust_next) anon_vma = next->anon_vma; if (anon_vma) { - browse_rb(mm); VM_WARN_ON(adjust_next && next->anon_vma && anon_vma != next->anon_vma); anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) anon_vma_interval_tree_pre_update_vma(next); - browse_rb(mm); } if (file) { @@ -1375,7 +1356,6 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (area && area->vm_end == end) /* cases 6, 7, 8 */ next = next->vm_next; - //printk("%s: %lx: vma %lu-%lu\n", __func__, (unsigned long) current, addr, end); /* verify some invariant that must be enforced by the caller */ VM_WARN_ON(prev && addr <= prev->vm_start); VM_WARN_ON(area && end > area->vm_end); @@ -2128,6 +2108,7 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; + unsigned long gap; MA_STATE(mas, &mm->mm_mt, 0, 0); /* Adjust search length to account for worst case alignment overhead */ @@ -2135,6 +2116,16 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) if (length < info->length) return -ENOMEM; + + // Maple tree is self contained. + rcu_read_lock(); + if (mas_get_unmapped_area(&mas, info->low_limit, + info->high_limit - 1, length)) + return -ENOMEM; + rcu_read_unlock(); + gap = mas.index; + gap += (info->align_offset - gap) & info->align_mask; + /* Adjust search limits by the desired length */ if (info->high_limit < length) return -ENOMEM; @@ -2144,12 +2135,6 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) return -ENOMEM; low_limit = info->low_limit + length; - // Maple tree is self contained. - rcu_read_lock(); - if (mas_get_unmapped_area(&mas, low_limit, high_limit, length)) - return -ENOMEM; - rcu_read_unlock(); - /* Check if rbtree root looks promising */ if (RB_EMPTY_ROOT(&mm->mm_rb)) goto check_highest; @@ -2223,7 +2208,7 @@ found: VM_BUG_ON(gap_start + info->length > info->high_limit); VM_BUG_ON(gap_start + info->length > gap_end); - //VM_BUG_ON(mas.index != gap_start); + VM_BUG_ON(gap != gap_start); return gap_start; } @@ -2232,13 +2217,22 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long length, low_limit, high_limit, gap_start, gap_end; + unsigned long gap; MA_STATE(mas, &mm->mm_mt, 0, 0); + validate_mm_mt(mm, NULL); + /* Adjust search length to account for worst case alignment overhead */ length = info->length + info->align_mask; if (length < info->length) return -ENOMEM; + rcu_read_lock(); + if (mas_get_unmapped_area_rev(&mas, info->low_limit, + info->high_limit - 1, length)) + return -ENOMEM; + rcu_read_unlock(); + gap = ((mas.index) & ~info->align_mask) + info->align_offset; /* * Adjust search limits by the desired length. * See implementation comment at top of unmapped_area(). @@ -2252,12 +2246,6 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) return -ENOMEM; low_limit = info->low_limit + length; - - rcu_read_lock(); - if (mas_get_unmapped_area_rev(&mas, low_limit, high_limit, length)) - return -ENOMEM; - rcu_read_unlock(); - /* Check highest gap, which does not precede any rbtree node */ gap_start = mm->highest_vm_end; if (gap_start <= high_limit) @@ -2330,8 +2318,13 @@ found_highest: VM_BUG_ON(gap_end < info->low_limit); VM_BUG_ON(gap_end < gap_start); - - VM_BUG_ON(mas.last + 1 != gap_end); + + if (gap != gap_end) { + pr_err("Gap was found: mt %lu gap_end %lu\n", gap, gap_end); + mt_dump(&mm->mm_mt); + } + VM_BUG_ON(gap != gap_end); + return gap_end; } @@ -2526,14 +2519,12 @@ struct vm_area_struct *mt_find_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; - //printk("%s: looking up %lu\n", __func__, addr); /* Check the cache first. */ vma = vmacache_find(mm, addr); if (likely(vma)) return vma; vma = mt_find(&mm->mm_mt, &addr, ULONG_MAX, 0); - //printk("Found %lu\n", (unsigned long)vma); if (vma) vmacache_update(addr, vma); @@ -2575,10 +2566,13 @@ struct vm_area_struct *rb_find_vma(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *ret = rb_find_vma(mm, addr); - struct vm_area_struct *ret2 = mt_find_vma(mm, addr); - if (ret != ret2) - printk("%px %lu: ret %px ret2 %px\n", mm, addr, ret, ret2); - VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)ret2 , ret); + struct vm_area_struct *mt_ret = mt_find_vma(mm, addr); + if (ret != mt_ret) { + pr_err("Looking for %lu\n", addr); + mt_dump(&mm->mm_mt); + pr_err("%px %lu: ret %px mt_ret %px\n", mm, addr, ret, mt_ret); + } + VM_BUG_ON_VMA((unsigned long)ret != (unsigned long)mt_ret , ret); return ret; } @@ -2705,8 +2699,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (!(vma->vm_flags & VM_GROWSUP)) return -EFAULT; - printk("%s: %lu-%lu expand to %lu-%lu\n", __func__, vma->vm_start, vma->vm_end, - vma->vm_start, address); /* Guard against exceeding limits of the address space. */ address &= PAGE_MASK; if (address >= (TASK_SIZE & PAGE_MASK)) @@ -2853,8 +2845,7 @@ int expand_downwards(struct vm_area_struct *vma, vma->vm_start = address; vma->vm_pgoff -= grow; // Overwrite old entry in mtree. - mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, - vma, GFP_KERNEL); + __vma_mt_store(mm, vma); anon_vma_interval_tree_post_update_vma(vma); vma_gap_update(vma); spin_unlock(&mm->page_table_lock); @@ -3554,10 +3545,9 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) &prev, &rb_link, &rb_parent)) return -ENOMEM; - //printk("%s: insert %lx: vma %lu-%lu\n", __func__, (unsigned long) current, vma->vm_start, vma->vm_end); if ((overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1, true)) != NULL) { - printk("Found vma ending at %lu\n", start - 1); - printk("vma : %lu => %lu-%lu\n", (unsigned long)overlap, + pr_err("Found vma ending at %lu\n", start - 1); + pr_err("vma : %lu => %lu-%lu\n", (unsigned long)overlap, overlap->vm_start, overlap->vm_end - 1); mt_dump(&mm->mm_mt); BUG(); -- 2.50.1