From 1fa9377e57eb16d7fa579ea7f8eb832164d209ac Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 23 Oct 2025 14:10:38 -0400 Subject: [PATCH] mm/userfaultfd: Introduce userfaultfd ops and use it for destination validation Extract the destination vma validation into its own functions for anon vma, shmem, and hugetlb. Using the validation uffd_ops function pointer allows for abstraction of validation. This introduces a default operation that happens on any vma that does not have a userfaultfd op set so that the infrastructure can support anon vmas. Signed-off-by: Liam R. Howlett --- include/linux/mm.h | 11 +++++ include/linux/userfaultfd_k.h | 6 +++ mm/hugetlb.c | 28 +++++++++++ mm/shmem.c | 24 +++++++++ mm/userfaultfd.c | 93 +++++++++++++++++------------------ 5 files changed, 114 insertions(+), 48 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 41e04c00891b..183ac51dcef2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -620,6 +620,9 @@ struct vm_fault { */ }; +/* userfaultfd ops defined in userfaultfd_k.h */ +struct vm_uffd_ops; + /* * These are the virtual MM functions - opening of an area, closing and * unmapping it (needed to keep files on disk up-to-date etc), pointer @@ -705,6 +708,14 @@ struct vm_operations_struct { struct page *(*find_normal_page)(struct vm_area_struct *vma, unsigned long addr); #endif /* CONFIG_FIND_NORMAL_PAGE */ +#ifdef CONFIG_USERFAULTFD + /* + * Userfaultfd operations. Memory types need to define their own + * operations to support different features. anon vmas use the + * default_uffd_ops. + */ + const struct vm_uffd_ops *userfaultfd_ops; +#endif /* CONFIG_USERFAULTFD */ }; #ifdef CONFIG_NUMA_BALANCING diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index af2059864ef3..cefdae4d6c02 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -92,6 +92,12 @@ enum mfill_atomic_mode { NR_MFILL_ATOMIC_MODES, }; +struct vm_uffd_ops { + /* Required features below */ + ssize_t (*is_dst_valid)(struct vm_area_struct *dst_vma, + unsigned long dst_start, unsigned long len); +}; + #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 86e672fcb305..d8c30f401615 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5503,6 +5503,14 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) return 0; } +#ifdef CONFIG_USERFAULTFD +static ssize_t hugetlb_is_dst_valid(struct vm_area_struct *dst_vma, + unsigned long dst_start, unsigned long len); +static const struct vm_uffd_ops hugetlb_uffd_ops = { + .is_dst_valid = hugetlb_is_dst_valid, +}; +#endif + /* * When a new function is introduced to vm_operations_struct and added * to hugetlb_vm_ops, please consider adding the function to shm_vm_ops. @@ -5516,6 +5524,9 @@ const struct vm_operations_struct hugetlb_vm_ops = { .close = hugetlb_vm_op_close, .may_split = hugetlb_vm_op_split, .pagesize = hugetlb_vm_op_pagesize, +#ifdef CONFIG_USERFAULTFD + .userfaultfd_ops = &hugetlb_uffd_ops, +#endif /* CONFIG_USERFAULTFD */ }; static pte_t make_huge_pte(struct vm_area_struct *vma, struct folio *folio, @@ -6910,6 +6921,23 @@ static struct folio *alloc_hugetlb_folio_vma(struct hstate *h, return folio; } +static ssize_t hugetlb_is_dst_valid(struct vm_area_struct *dst_vma, + unsigned long dst_start, unsigned long len) +{ + unsigned long vma_hpagesize; + + vma_hpagesize = vma_kernel_pagesize(dst_vma); + if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) + return -EINVAL; + + if (!is_vm_hugetlb_page(dst_vma)) + return -ENOENT; + + if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) + return -EINVAL; + + return 0; +} /* * Used by userfaultfd UFFDIO_* ioctls. Based on userfaultfd's mfill_atomic_pte * with modifications for hugetlb pages. diff --git a/mm/shmem.c b/mm/shmem.c index a602705c4aae..f407786b621c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3150,6 +3150,20 @@ static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, #endif /* CONFIG_TMPFS_QUOTA */ #ifdef CONFIG_USERFAULTFD +static ssize_t shmem_is_dst_valid(struct vm_area_struct *dst_vma, + unsigned long dst_start, unsigned long len) +{ + /* + * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but + * it will overwrite vm_ops, so vma_is_anonymous must return false. + */ + if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && + dst_vma->vm_flags & VM_SHARED)) + return -EINVAL; + + return 0; +} + int shmem_mfill_atomic_pte(struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, @@ -5197,6 +5211,12 @@ static int shmem_error_remove_folio(struct address_space *mapping, return 0; } +#ifdef CONFIG_USERFAULTFD +static const struct vm_uffd_ops shmem_uffd_ops = { + .is_dst_valid = shmem_is_dst_valid, +}; +#endif + static const struct address_space_operations shmem_aops = { .dirty_folio = noop_dirty_folio, #ifdef CONFIG_TMPFS @@ -5308,6 +5328,10 @@ static const struct vm_operations_struct shmem_anon_vm_ops = { .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif +#ifdef CONFIG_USERFAULTFD + .userfaultfd_ops = &shmem_uffd_ops, +#endif + }; int shmem_init_fs_context(struct fs_context *fc) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e8341a45e7e8..e2570e72242e 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -495,6 +495,48 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) return pmd_alloc(mm, pud, address); } + +/* 0 for success, error code otherwise */ +static ssize_t uffd_def_is_dst_valid(struct vm_area_struct *dst_vma, + unsigned long dst_start, unsigned long len) +{ + return 0; +} + +/* Anon vma ops */ +static const struct vm_uffd_ops default_uffd_ops = { + .is_dst_valid = uffd_def_is_dst_valid, +}; + +static inline const struct vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma) +{ + if (vma->vm_ops && vma->vm_ops->userfaultfd_ops) + return vma->vm_ops->userfaultfd_ops; + + return &default_uffd_ops; +} + +static inline ssize_t +uffd_ctx_lock_and_validate_dst(struct userfaultfd_ctx *ctx, + struct vm_area_struct *dst_vma, unsigned long dst_start, + unsigned long len) +{ + const struct vm_uffd_ops *uffd_ops; + + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + down_read(&ctx->map_changing_lock); + if (atomic_read(&ctx->mmap_changing)) + return -EAGAIN; + + uffd_ops = vma_get_uffd_ops(dst_vma); + WARN_ON_ONCE(!uffd_ops || !uffd_ops->is_dst_valid); + return uffd_ops->is_dst_valid(dst_vma, dst_start, len); +} + #ifdef CONFIG_HUGETLB_PAGE /* * mfill_atomic processing for HUGETLB vmas. Note that this routine is @@ -538,13 +580,6 @@ static __always_inline ssize_t mfill_atomic_hugetlb( folio = NULL; vma_hpagesize = vma_kernel_pagesize(dst_vma); - /* - * Validate alignment based on huge page size - */ - err = -EINVAL; - if (dst_start & (vma_hpagesize - 1) || len & (vma_hpagesize - 1)) - goto out_unlock; - retry: /* * On routine entry dst_vma is set. If we had to drop mmap_lock and @@ -557,22 +592,8 @@ retry: goto out; } - /* - * If memory mappings are changing because of non-cooperative - * operation (e.g. mremap) running in parallel, bail out and - * request the user to retry later - */ - down_read(&ctx->map_changing_lock); - err = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) - goto out_unlock; - - err = -ENOENT; - if (!is_vm_hugetlb_page(dst_vma)) - goto out_unlock; - - err = -EINVAL; - if (vma_hpagesize != vma_kernel_pagesize(dst_vma)) + err = uffd_ctx_lock_and_validate_dst(ctx, dst_vma, dst_start, len); + if (err) goto out_unlock; } @@ -665,30 +686,6 @@ extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ -static inline ssize_t -uffd_ctx_lock_and_validate_dst(struct userfaultfd_ctx *ctx, - struct vm_area_struct *dst_vma) -{ - /* - * If memory mappings are changing because of non-cooperative - * operation (e.g. mremap) running in parallel, bail out and - * request the user to retry later - */ - down_read(&ctx->map_changing_lock); - if (atomic_read(&ctx->mmap_changing)) - return -EAGAIN; - - /* - * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but - * it will overwrite vm_ops, so vma_is_anonymous must return false. - */ - if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && - dst_vma->vm_flags & VM_SHARED)) - return -EINVAL; - - return 0; -} - static inline ssize_t uffd_failed_do_unlock(struct userfaultfd_ctx *ctx, struct vm_area_struct *dst, struct folio *folio, unsigned long src_addr) @@ -779,7 +776,7 @@ retry: goto out; } - err = uffd_ctx_lock_and_validate_dst(ctx, dst_vma); + err = uffd_ctx_lock_and_validate_dst(ctx, dst_vma, dst_start, len); if (err) goto out_unlock; -- 2.51.0