From: Liam R. Howlett Date: Thu, 23 Oct 2025 17:51:06 +0000 (-0400) Subject: mm/userfaultfd: Inline mfill_atomic_pte() and move pmd internal to callers X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=2166e91882eb195677717ac2f8fbfc58171196ce;p=users%2Fjedix%2Flinux-maple.git mm/userfaultfd: Inline mfill_atomic_pte() and move pmd internal to callers mfill_atomic_pte() is just decoding each memory type and dispatching them to different locations. Now that mfill_atomic() is smaller and less complex, the mfill_atomic_pte() call can be included in the only calling function that exists to see the logic of what is happening. Signed-off-by: Liam R. Howlett --- diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 5b368f9549d6..89f1ace2dd18 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -198,14 +198,13 @@ extern void shmem_uncharge(struct inode *inode, long pages); #ifdef CONFIG_USERFAULTFD #ifdef CONFIG_SHMEM -extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +extern int shmem_mfill_atomic_pte(struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, struct folio **foliop); #else /* !CONFIG_SHMEM */ -#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \ +#define shmem_mfill_atomic_pte(dst_vma, dst_addr, \ src_addr, flags, foliop) ({ BUG(); 0; }) #endif /* CONFIG_SHMEM */ #endif /* CONFIG_USERFAULTFD */ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c0e716aec26a..af2059864ef3 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -243,6 +243,9 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, vma_is_shmem(vma); } +ssize_t uffd_get_dst_pmd(struct vm_area_struct *dst_vma, unsigned long dst_addr, + pmd_t **dst_pmd); + static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; diff --git a/mm/shmem.c b/mm/shmem.c index eb8161136a7f..a602705c4aae 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3150,8 +3150,7 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, #endif /* CONFIG_TMPFS_QUOTA */ #ifdef CONFIG_USERFAULTFD -int shmem_mfill_atomic_pte(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +int shmem_mfill_atomic_pte(struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, @@ -3166,6 +3165,11 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd, struct folio *folio; int ret; pgoff_t max_off; + pmd_t *dst_pmd; + + ret = uffd_get_dst_pmd(dst_vma, dst_addr, &dst_pmd); + if (ret) + return ret; if (shmem_inode_acct_blocks(inode, 1)) { /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 2a81b4982f74..e8341a45e7e8 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -234,8 +234,7 @@ out: return ret; } -static int mfill_atomic_pte_copy(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +static int mfill_atomic_pte_copy(struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, @@ -244,6 +243,11 @@ static int mfill_atomic_pte_copy(pmd_t *dst_pmd, void *kaddr; int ret; struct folio *folio; + pmd_t *dst_pmd; + + ret = uffd_get_dst_pmd(dst_vma, dst_addr, &dst_pmd); + if (ret) + return ret; if (!*foliop) { ret = -ENOMEM; @@ -342,14 +346,18 @@ out_put: return ret; } -static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +static int mfill_atomic_pte_zeropage(struct vm_area_struct *dst_vma, unsigned long dst_addr) { pte_t _dst_pte, *dst_pte; + pmd_t *dst_pmd; spinlock_t *ptl; int ret; + ret = uffd_get_dst_pmd(dst_vma, dst_addr, &dst_pmd); + if (ret) + return ret; + if (mm_forbids_zeropage(dst_vma->vm_mm)) return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr); @@ -377,17 +385,21 @@ out: } /* Handles UFFDIO_CONTINUE for all shmem VMAs (shared or private). */ -static int mfill_atomic_pte_continue(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +static int mfill_atomic_pte_continue( struct vm_area_struct *dst_vma, unsigned long dst_addr, uffd_flags_t flags) { struct inode *inode = file_inode(dst_vma->vm_file); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); + pmd_t *dst_pmd; struct folio *folio; struct page *page; int ret; + ret = uffd_get_dst_pmd(dst_vma, dst_addr, &dst_pmd); + if (ret) + return ret; + ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); /* Our caller expects us to return -EFAULT if we failed to find folio */ if (ret == -ENOENT) @@ -421,16 +433,20 @@ out_release: } /* Handles UFFDIO_POISON for all non-hugetlb VMAs. */ -static int mfill_atomic_pte_poison(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, +static int mfill_atomic_pte_poison(struct vm_area_struct *dst_vma, unsigned long dst_addr, uffd_flags_t flags) { int ret; struct mm_struct *dst_mm = dst_vma->vm_mm; + pmd_t *dst_pmd; pte_t _dst_pte, *dst_pte; spinlock_t *ptl; + ret = uffd_get_dst_pmd(dst_vma, dst_addr, &dst_pmd); + if (ret) + return ret; + _dst_pte = make_pte_marker(PTE_MARKER_POISONED); ret = -EAGAIN; dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl); @@ -649,51 +665,6 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ -static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - uffd_flags_t flags, - struct folio **foliop) -{ - ssize_t err; - - if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { - return mfill_atomic_pte_continue(dst_pmd, dst_vma, - dst_addr, flags); - } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { - return mfill_atomic_pte_poison(dst_pmd, dst_vma, - dst_addr, flags); - } - - /* - * The normal page fault path for a shmem will invoke the - * fault, fill the hole in the file and COW it right away. The - * result generates plain anonymous memory. So when we are - * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll - * generate anonymous memory directly without actually filling - * the hole. For the MAP_PRIVATE case the robustness check - * only happens in the pagetable (to verify it's still none) - * and not in the radix tree. - */ - if (!(dst_vma->vm_flags & VM_SHARED)) { - if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) - err = mfill_atomic_pte_copy(dst_pmd, dst_vma, - dst_addr, src_addr, - flags, foliop); - else - err = mfill_atomic_pte_zeropage(dst_pmd, - dst_vma, dst_addr); - } else { - err = shmem_mfill_atomic_pte(dst_pmd, dst_vma, - dst_addr, src_addr, - flags, foliop); - } - - return err; -} - - static inline ssize_t uffd_ctx_lock_and_validate_dst(struct userfaultfd_ctx *ctx, struct vm_area_struct *dst_vma) @@ -739,19 +710,19 @@ uffd_failed_do_unlock(struct userfaultfd_ctx *ctx, struct vm_area_struct *dst, return 0; } -static inline ssize_t -uffd_get_dst_pmd(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, - unsigned long dst_addr, pmd_t **dst_pmd) +ssize_t +uffd_get_dst_pmd(struct vm_area_struct *dst_vma, unsigned long dst_addr, + pmd_t **dst_pmd) { pmd_t dst_pmdval; - *dst_pmd = mm_alloc_pmd(dst_mm, dst_addr); + *dst_pmd = mm_alloc_pmd(dst_vma->vm_mm, dst_addr); if (unlikely(!*dst_pmd)) return -ENOMEM; dst_pmdval = pmdp_get_lockless(*dst_pmd); if (unlikely(pmd_none(dst_pmdval)) && - unlikely(__pte_alloc(dst_mm, *dst_pmd))) + unlikely(__pte_alloc(dst_vma->vm_mm, *dst_pmd))) return -ENOMEM; dst_pmdval = pmdp_get_lockless(*dst_pmd); @@ -779,7 +750,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, struct mm_struct *dst_mm = ctx->mm; struct vm_area_struct *dst_vma; ssize_t err; - pmd_t *dst_pmd; unsigned long src_addr, dst_addr; long copied; struct folio *folio; @@ -845,17 +815,39 @@ retry: while (src_addr < src_start + len) { VM_WARN_ON_ONCE(dst_addr >= dst_start + len); - err = uffd_get_dst_pmd(dst_mm, dst_vma, dst_addr, &dst_pmd); - if (err) - break; - /* * For shmem mappings, khugepaged is allowed to remove page * tables under us; pte_offset_map_lock() will deal with that. */ + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { + err = mfill_atomic_pte_continue(dst_vma, dst_addr, + flags); + } else if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { + err = mfill_atomic_pte_poison(dst_vma, dst_addr, flags); + } else if (!(dst_vma->vm_flags & VM_SHARED)) { + /* + * The normal page fault path for a shmem will invoke + * the fault, fill the hole in the file and COW it right + * away. The result generates plain anonymous memory. So + * when we are asked to fill an hole in a MAP_PRIVATE + * shmem mapping, we'll generate anonymous memory + * directly without actually filling the hole. For the + * MAP_PRIVATE case the robustness check only happens in + * the pagetable (to verify it's still none) and not in + * the radix tree. + */ + if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) + err = mfill_atomic_pte_copy(dst_vma, dst_addr, + src_addr, flags, + &folio); + else + err = mfill_atomic_pte_zeropage(dst_vma, + dst_addr); + } else { + err = shmem_mfill_atomic_pte(dst_vma, dst_addr, + src_addr, flags, &folio); + } - err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, - src_addr, flags, &folio); cond_resched(); if (unlikely(err == -ENOENT)) {