]> www.infradead.org Git - users/hch/misc.git/commitdiff
mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled mTHP subpage...
authorLance Yang <lance.yang@linux.dev>
Tue, 30 Sep 2025 08:10:40 +0000 (16:10 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 7 Oct 2025 21:01:11 +0000 (14:01 -0700)
When splitting an mTHP and replacing a zero-filled subpage with the shared
zeropage, try_to_map_unused_to_zeropage() currently drops several
important PTE bits.

For userspace tools like CRIU, which rely on the soft-dirty mechanism for
incremental snapshots, losing the soft-dirty bit means modified pages are
missed, leading to inconsistent memory state after restore.

As pointed out by David, the more critical uffd-wp bit is also dropped.
This breaks the userfaultfd write-protection mechanism, causing writes to
be silently missed by monitoring applications, which can lead to data
corruption.

Preserve both the soft-dirty and uffd-wp bits from the old PTE when
creating the new zeropage mapping to ensure they are correctly tracked.

Link: https://lkml.kernel.org/r/20250930081040.80926-1-lance.yang@linux.dev
Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp")
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Dev Jain <dev.jain@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Mathew Brost <matthew.brost@intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/migrate.c

index ce83c2c3c2870dc6a84075a3a19db12e0388b0e9..e3065c9edb55ab238f0bdae74acd6cf8692e70bb 100644 (file)
@@ -296,8 +296,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list)
 }
 
 static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
-                                         struct folio *folio,
-                                         unsigned long idx)
+               struct folio *folio, pte_t old_pte, unsigned long idx)
 {
        struct page *page = folio_page(folio, idx);
        pte_t newpte;
@@ -306,7 +305,7 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
                return false;
        VM_BUG_ON_PAGE(!PageAnon(page), page);
        VM_BUG_ON_PAGE(!PageLocked(page), page);
-       VM_BUG_ON_PAGE(pte_present(ptep_get(pvmw->pte)), page);
+       VM_BUG_ON_PAGE(pte_present(old_pte), page);
 
        if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
            mm_forbids_zeropage(pvmw->vma->vm_mm))
@@ -322,6 +321,12 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
 
        newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
                                        pvmw->vma->vm_page_prot));
+
+       if (pte_swp_soft_dirty(old_pte))
+               newpte = pte_mksoft_dirty(newpte);
+       if (pte_swp_uffd_wp(old_pte))
+               newpte = pte_mkuffd_wp(newpte);
+
        set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
 
        dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
@@ -364,13 +369,13 @@ static bool remove_migration_pte(struct folio *folio,
                        continue;
                }
 #endif
+               old_pte = ptep_get(pvmw.pte);
                if (rmap_walk_arg->map_unused_to_zeropage &&
-                   try_to_map_unused_to_zeropage(&pvmw, folio, idx))
+                   try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx))
                        continue;
 
                folio_get(folio);
                pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
-               old_pte = ptep_get(pvmw.pte);
 
                entry = pte_to_swp_entry(old_pte);
                if (!is_migration_entry_young(entry))