mm/userfaultfd: don't lock anon_vma when performing UFFDIO_MOVE

author Lokesh Gidra <lokeshgidra@google.com>

Tue, 23 Sep 2025 07:10:19 +0000 (00:10 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Wed, 15 Oct 2025 04:28:49 +0000 (21:28 -0700)
author Lokesh Gidra <lokeshgidra@google.com>
Tue, 23 Sep 2025 07:10:19 +0000 (00:10 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Wed, 15 Oct 2025 04:28:49 +0000 (21:28 -0700)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 2764613a9b3dc03fda7b004d27c1fba18c487c2e..068e20711c4363cefb15c6aa381eeb4e1eab91ad 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2615,7 +2615,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
         pmd_t _dst_pmd, src_pmdval;
         struct page *src_page;
         struct folio *src_folio;
-       struct anon_vma *src_anon_vma;
         spinlock_t *src_ptl, *dst_ptl;
         pgtable_t src_pgtable;
         struct mmu_notifier_range range;
@@ -2664,23 +2663,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                                 src_addr + HPAGE_PMD_SIZE);
         mmu_notifier_invalidate_range_start(&range);
  
-       if (src_folio) {
+       if (src_folio)
                 folio_lock(src_folio);
  
-               /*
-                * split_huge_page walks the anon_vma chain without the page
-                * lock. Serialize against it with the anon_vma lock, the page
-                * lock is not enough.
-                */
-               src_anon_vma = folio_get_anon_vma(src_folio);
-               if (!src_anon_vma) {
-                       err = -EAGAIN;
-                       goto unlock_folio;
-               }
-               anon_vma_lock_write(src_anon_vma);
-       } else
-               src_anon_vma = NULL;
-
         dst_ptl = pmd_lockptr(mm, dst_pmd);
         double_pt_lock(src_ptl, dst_ptl);
         if (unlikely(!pmd_same(*src_pmd, src_pmdval) ||
@@ -2725,11 +2710,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
         pgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);
  unlock_ptls:
         double_pt_unlock(src_ptl, dst_ptl);
-       if (src_anon_vma) {
-               anon_vma_unlock_write(src_anon_vma);
-               put_anon_vma(src_anon_vma);
-       }
-unlock_folio:
         /* unblock rmap walks */
         if (src_folio)
                 folio_unlock(src_folio);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c

index 0630f188c847c3d34b3680d81f18fdab4bcb0e80..00122f42718ccce265e5c78017ae1ddffa89c06a 100644 (file)
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1035,8 +1035,7 @@ static inline bool is_pte_pages_stable(pte_t *dst_pte, pte_t *src_pte,
   */
  static struct folio *check_ptes_for_batched_move(struct vm_area_struct *src_vma,
                                                  unsigned long src_addr,
-                                                pte_t *src_pte, pte_t *dst_pte,
-                                                struct anon_vma *src_anon_vma)
+                                                pte_t *src_pte, pte_t *dst_pte)
  {
         pte_t orig_dst_pte, orig_src_pte;
         struct folio *folio;
@@ -1052,8 +1051,7 @@ static struct folio *check_ptes_for_batched_move(struct vm_area_struct *src_vma,
         folio = vm_normal_folio(src_vma, src_addr, orig_src_pte);
         if (!folio || !folio_trylock(folio))
                 return NULL;
-       if (!PageAnonExclusive(&folio->page) || folio_test_large(folio) ||
-           folio_anon_vma(folio) != src_anon_vma) {
+       if (!PageAnonExclusive(&folio->page) || folio_test_large(folio)) {
                 folio_unlock(folio);
                 return NULL;
         }
@@ -1061,9 +1059,8 @@ static struct folio *check_ptes_for_batched_move(struct vm_area_struct *src_vma,
  }
  
  /*
- * Moves src folios to dst in a batch as long as they share the same
- * anon_vma as the first folio, are not large, and can successfully
- * take the lock via folio_trylock().
+ * Moves src folios to dst in a batch as long as they are not large, and can
+ * successfully take the lock via folio_trylock().
   */
  static long move_present_ptes(struct mm_struct *mm,
                               struct vm_area_struct *dst_vma,
@@ -1073,8 +1070,7 @@ static long move_present_ptes(struct mm_struct *mm,
                               pte_t orig_dst_pte, pte_t orig_src_pte,
                               pmd_t *dst_pmd, pmd_t dst_pmdval,
                               spinlock_t *dst_ptl, spinlock_t *src_ptl,
-                             struct folio **first_src_folio, unsigned long len,
-                             struct anon_vma *src_anon_vma)
+                             struct folio **first_src_folio, unsigned long len)
  {
         int err = 0;
         struct folio *src_folio = *first_src_folio;
@@ -1132,8 +1128,8 @@ static long move_present_ptes(struct mm_struct *mm,
                 src_pte++;
  
                 folio_unlock(src_folio);
-               src_folio = check_ptes_for_batched_move(src_vma, src_addr, src_pte,
-                                                       dst_pte, src_anon_vma);
+               src_folio = check_ptes_for_batched_move(src_vma, src_addr,
+                                                       src_pte, dst_pte);
                 if (!src_folio)
                         break;
         }
@@ -1263,7 +1259,6 @@ static long move_pages_ptes(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd
         pmd_t dummy_pmdval;
         pmd_t dst_pmdval;
         struct folio *src_folio = NULL;
-       struct anon_vma *src_anon_vma = NULL;
         struct mmu_notifier_range range;
         long ret = 0;
  
@@ -1347,9 +1342,9 @@ retry:
                 }
  
                 /*
-                * Pin and lock both source folio and anon_vma. Since we are in
-                * RCU read section, we can't block, so on contention have to
-                * unmap the ptes, obtain the lock and retry.
+                * Pin and lock source folio. Since we are in RCU read section,
+                * we can't block, so on contention have to unmap the ptes,
+                * obtain the lock and retry.
                  */
                 if (!src_folio) {
                         struct folio *folio;
@@ -1423,33 +1418,11 @@ retry:
                         goto retry;
                 }
  
-               if (!src_anon_vma) {
-                       /*
-                        * folio_referenced walks the anon_vma chain
-                        * without the folio lock. Serialize against it with
-                        * the anon_vma lock, the folio lock is not enough.
-                        */
-                       src_anon_vma = folio_get_anon_vma(src_folio);
-                       if (!src_anon_vma) {
-                               /* page was unmapped from under us */
-                               ret = -EAGAIN;
-                               goto out;
-                       }
-                       if (!anon_vma_trylock_write(src_anon_vma)) {
-                               pte_unmap(src_pte);
-                               pte_unmap(dst_pte);
-                               src_pte = dst_pte = NULL;
-                               /* now we can block and wait */
-                               anon_vma_lock_write(src_anon_vma);
-                               goto retry;
-                       }
-               }
-
                 ret = move_present_ptes(mm, dst_vma, src_vma,
                                         dst_addr, src_addr, dst_pte, src_pte,
                                         orig_dst_pte, orig_src_pte, dst_pmd,
                                         dst_pmdval, dst_ptl, src_ptl, &src_folio,
-                                       len, src_anon_vma);
+                                       len);
         } else {
                 struct folio *folio = NULL;
  
@@ -1515,10 +1488,6 @@ retry:
         }
  
  out:
-       if (src_anon_vma) {
-               anon_vma_unlock_write(src_anon_vma);
-               put_anon_vma(src_anon_vma);
-       }
         if (src_folio) {
                 folio_unlock(src_folio);
                 folio_put(src_folio);
@@ -1792,15 +1761,6 @@ static void uffd_move_unlock(struct vm_area_struct *dst_vma,
   * virtual regions without knowing if there are transparent hugepage
   * in the regions or not, but preventing the risk of having to split
   * the hugepmd during the remap.
- *
- * If there's any rmap walk that is taking the anon_vma locks without
- * first obtaining the folio lock (the only current instance is
- * folio_referenced), they will have to verify if the folio->mapping
- * has changed after taking the anon_vma lock. If it changed they
- * should release the lock and retry obtaining a new anon_vma, because
- * it means the anon_vma was changed by move_pages() before the lock
- * could be obtained. This is the only additional complexity added to
- * the rmap code to provide this anonymous page remapping functionality.
   */
  ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
                    unsigned long src_start, unsigned long len, __u64 mode)
author	Lokesh Gidra <lokeshgidra@google.com>
	Tue, 23 Sep 2025 07:10:19 +0000 (00:10 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Wed, 15 Oct 2025 04:28:49 +0000 (21:28 -0700)
mm/huge_memory.c		patch \| blob \| history
mm/userfaultfd.c		patch \| blob \| history