From fc870f1882f92b62990b37db966db8ce1ffb8fc4 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 23 Oct 2025 14:39:13 -0400 Subject: [PATCH] mm/userfaultfd: Add increment uffd_ops Different memory types may increment across a vma at different sizes (ie: hugetlb uses vma_kernel_pagesize(dst_vma)). Creating a uffd_ops to handle the size of the increment moves the memory types in the direction of using the same looping code. Each memory type gets their own uffd_ops->increment(). Signed-off-by: Liam R. Howlett --- include/linux/userfaultfd_k.h | 6 ++++++ mm/hugetlb.c | 8 ++++++++ mm/shmem.c | 1 + mm/userfaultfd.c | 32 +++++++++++++++++++++++--------- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index cefdae4d6c02..f65073ae8d1c 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -96,6 +96,7 @@ struct vm_uffd_ops { /* Required features below */ ssize_t (*is_dst_valid)(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long len); + unsigned long (*increment)(struct vm_area_struct *vma); }; #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) @@ -214,6 +215,11 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return vma->vm_flags & __VM_UFFD_FLAGS; } +static inline unsigned long mfill_size(struct vm_area_struct *vma) +{ + return PAGE_SIZE; +} + static inline bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, bool wp_async) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d8c30f401615..f27ce58bf6cc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5504,10 +5504,18 @@ static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf) } #ifdef CONFIG_USERFAULTFD + +static inline unsigned long hugetlb_mfill_size(struct vm_area_struct *vma) +{ + return vma_kernel_pagesize(vma); +} + static ssize_t hugetlb_is_dst_valid(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long len); + static const struct vm_uffd_ops hugetlb_uffd_ops = { .is_dst_valid = hugetlb_is_dst_valid, + .increment = hugetlb_mfill_size, }; #endif diff --git a/mm/shmem.c b/mm/shmem.c index f407786b621c..a08311bebfa3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -5214,6 +5214,7 @@ static int shmem_error_remove_folio(struct address_space *mapping, #ifdef CONFIG_USERFAULTFD static const struct vm_uffd_ops shmem_uffd_ops = { .is_dst_valid = shmem_is_dst_valid, + .increment = mfill_size, }; #endif diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e2570e72242e..cf1722a62ae2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -506,6 +506,7 @@ static ssize_t uffd_def_is_dst_valid(struct vm_area_struct *dst_vma, /* Anon vma ops */ static const struct vm_uffd_ops default_uffd_ops = { .is_dst_valid = uffd_def_is_dst_valid, + .increment = mfill_size, }; static inline const struct vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma) @@ -557,10 +558,11 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long src_addr, dst_addr; long copied; struct folio *folio; - unsigned long vma_hpagesize; pgoff_t idx; u32 hash; struct address_space *mapping; + const struct vm_uffd_ops *uffd_ops; + unsigned long increment; /* * There is no default zero huge page for all huge page sizes as @@ -578,7 +580,6 @@ static __always_inline ssize_t mfill_atomic_hugetlb( dst_addr = dst_start; copied = 0; folio = NULL; - vma_hpagesize = vma_kernel_pagesize(dst_vma); retry: /* @@ -597,6 +598,10 @@ retry: goto out_unlock; } + uffd_ops = vma_get_uffd_ops(dst_vma); + if (uffd_ops && uffd_ops->increment) + increment = uffd_ops->increment(dst_vma); + while (src_addr < src_start + len) { VM_WARN_ON_ONCE(dst_addr >= dst_start + len); @@ -613,7 +618,7 @@ retry: hugetlb_vma_lock_read(dst_vma); err = -ENOMEM; - dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize); + dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, increment); if (!dst_pte) { hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -654,9 +659,9 @@ retry: VM_WARN_ON_ONCE(folio); if (!err) { - dst_addr += vma_hpagesize; - src_addr += vma_hpagesize; - copied += vma_hpagesize; + dst_addr += increment; + src_addr += increment; + copied += increment; if (fatal_signal_pending(current)) err = -EINTR; @@ -750,6 +755,8 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, unsigned long src_addr, dst_addr; long copied; struct folio *folio; + const struct vm_uffd_ops *uffd_ops; + unsigned long increment; /* * Sanitize the command parameters: @@ -765,6 +772,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, dst_addr = dst_start; copied = 0; folio = NULL; + retry: /* * Make sure the vma is not shared, that the dst range is @@ -775,6 +783,12 @@ retry: err = PTR_ERR(dst_vma); goto out; } + uffd_ops = vma_get_uffd_ops(dst_vma); + if (WARN_ON_ONCE(!uffd_ops->increment)) { + increment = PAGE_SIZE; + } else { + increment = uffd_ops->increment(dst_vma); + } err = uffd_ctx_lock_and_validate_dst(ctx, dst_vma, dst_start, len); if (err) @@ -859,9 +873,9 @@ retry: } if (!err) { - dst_addr += PAGE_SIZE; - src_addr += PAGE_SIZE; - copied += PAGE_SIZE; + dst_addr += increment; + src_addr += increment; + copied += increment; if (fatal_signal_pending(current)) err = -EINTR; -- 2.51.0