]> www.infradead.org Git - nvme.git/commitdiff
mm: pgtable: fix incorrect reclaim of non-empty PTE pages
authorQi Zheng <zhengqi.arch@bytedance.com>
Tue, 11 Feb 2025 07:26:25 +0000 (15:26 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 18 Feb 2025 06:40:03 +0000 (22:40 -0800)
In zap_pte_range(), if the pte lock was released midway, the pte entries
may be refilled with physical pages by another thread, which may cause a
non-empty PTE page to be reclaimed and eventually cause the system to
crash.

To fix it, fall back to the slow path in this case to recheck if all pte
entries are still none.

Link: https://lkml.kernel.org/r/20250211072625.89188-1-zhengqi.arch@bytedance.com
Fixes: 6375e95f381e ("mm: pgtable: reclaim empty PTE page in madvise(MADV_DONTNEED)")
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reported-by: Christian Brauner <brauner@kernel.org>
Closes: https://lore.kernel.org/all/20250207-anbot-bankfilialen-acce9d79a2c7@brauner/
Reported-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Closes: https://lore.kernel.org/all/152296f3-5c81-4a94-97f3-004108fba7be@gmx.com/
Tested-by: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memory.c

index 539c0f7c6d5458791e723ac58e25e5b6b9f73c89..b4d3d4893267c8c9007a2b6e5419136ece85b59a 100644 (file)
@@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
        pmd_t pmdval;
        unsigned long start = addr;
        bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
-       bool direct_reclaim = false;
+       bool direct_reclaim = true;
        int nr;
 
 retry:
@@ -1734,8 +1734,10 @@ retry:
        do {
                bool any_skipped = false;
 
-               if (need_resched())
+               if (need_resched()) {
+                       direct_reclaim = false;
                        break;
+               }
 
                nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss,
                                      &force_flush, &force_break, &any_skipped);
@@ -1743,11 +1745,20 @@ retry:
                        can_reclaim_pt = false;
                if (unlikely(force_break)) {
                        addr += nr * PAGE_SIZE;
+                       direct_reclaim = false;
                        break;
                }
        } while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
 
-       if (can_reclaim_pt && addr == end)
+       /*
+        * Fast path: try to hold the pmd lock and unmap the PTE page.
+        *
+        * If the pte lock was released midway (retry case), or if the attempt
+        * to hold the pmd lock failed, then we need to recheck all pte entries
+        * to ensure they are still none, thereby preventing the pte entries
+        * from being repopulated by another thread.
+        */
+       if (can_reclaim_pt && direct_reclaim && addr == end)
                direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
 
        add_mm_rss_vec(mm, rss);