From: Peter Xu Date: Thu, 14 Apr 2022 19:16:50 +0000 (-0700) Subject: mm/shmem: handle uffd-wp during fork() X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=36f142b098a96bb9a82c241b033a06ad8686183a;p=users%2Fjedix%2Flinux-maple.git mm/shmem: handle uffd-wp during fork() Normally we skip copy page when fork() for VM_SHARED shmem, but we can't skip it anymore if uffd-wp is enabled on dst vma. This should only happen when the src uffd has UFFD_FEATURE_EVENT_FORK enabled on uffd-wp shmem vma, so that VM_UFFD_WP will be propagated onto dst vma too, then we should copy the pgtables with uffd-wp bit and pte markers, because these information will be lost otherwise. Since the condition checks will become even more complicated for deciding "whether a vma needs to copy the pgtable during fork()", introduce a helper vma_needs_copy() for it, so everything will be clearer. Link: https://lkml.kernel.org/r/20220405014855.14468-1-peterx@redhat.com Signed-off-by: Peter Xu Cc: Alistair Popple Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jerome Glisse Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Mike Rapoport Cc: Nadav Amit Signed-off-by: Andrew Morton --- diff --git a/mm/memory.c b/mm/memory.c index 09f7e79847b8..1df7fda07c86 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -876,6 +876,14 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (try_restore_exclusive_pte(src_pte, src_vma, addr)) return -EBUSY; return -ENOENT; + } else if (is_pte_marker_entry(entry)) { + /* + * We're copying the pgtable should only because dst_vma has + * uffd-wp enabled, do sanity check. + */ + WARN_ON_ONCE(!userfaultfd_wp(dst_vma)); + set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; } if (!userfaultfd_wp(dst_vma)) pte = pte_swp_clear_uffd_wp(pte); @@ -1230,6 +1238,38 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, return 0; } +/* + * Return true if the vma needs to copy the pgtable during this fork(). Return + * false when we can speed up fork() by allowing lazy page faults later until + * when the child accesses the memory range. + */ +bool +vma_needs_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) +{ + /* + * Always copy pgtables when dst_vma has uffd-wp enabled even if it's + * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable + * contains uffd-wp protection information, that's something we can't + * retrieve from page cache, and skip copying will lose those info. + */ + if (userfaultfd_wp(dst_vma)) + return true; + + if (src_vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) + return true; + + if (src_vma->anon_vma) + return true; + + /* + * Don't copy ptes where a page fault will fill them correctly. Fork + * becomes much lighter when there are big shared or private readonly + * mappings. The tradeoff is that copy_page_range is more efficient + * than faulting. + */ + return false; +} + int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { @@ -1243,14 +1283,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) bool is_cow; int ret; - /* - * Don't copy ptes where a page fault will fill them correctly. - * Fork becomes much lighter when there are big shared or private - * readonly mappings. The tradeoff is that copy_page_range is more - * efficient than faulting. - */ - if (!(src_vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) && - !src_vma->anon_vma) + if (!vma_needs_copy(dst_vma, src_vma)) return 0; if (is_vm_hugetlb_page(src_vma))