]> www.infradead.org Git - users/willy/pagecache.git/commitdiff
mm/mglru: fix PTE-mapped large folios
authorYu Zhao <yuzhao@google.com>
Tue, 31 Dec 2024 04:35:38 +0000 (21:35 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 26 Jan 2025 04:22:39 +0000 (20:22 -0800)
Count the accessed bits from PTEs mapping the same large folio as one
access rather than multiple accesses.

The last patch changed how folios accessed through page tables are
promoted: rather than getting promoted after the accessed bit is cleared
for the first time, a folio only gets promoted thereafter.  Counting the
accessed bits from the same large folio as multiple accesses can cause
that folio to be promoted prematurely, which in turn can cause
overprotection of single-use large folios.

This patch reduced the sys time of the kernel compilation by 95% CI [2,
5]% on Altra M128-30 with 3GB DRAM, 12GB zram, 16KB THPs and -j32.

Link: https://lkml.kernel.org/r/20241231043538.4075764-8-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reported-by: Barry Song <v-songbaohua@oppo.com>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: David Stevens <stevensd@chromium.org>
Cc: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/vmscan.c

index 9b8e0a9fc9d529a73a4e4becf3f4e5d3510c57c4..f5970fdd759c0b866884c6049fd158bca1e96685 100644 (file)
@@ -3423,29 +3423,55 @@ static bool suitable_to_scan(int total, int young)
        return young * n >= total;
 }
 
+static void walk_update_folio(struct lru_gen_mm_walk *walk, struct folio *folio,
+                             int new_gen, bool dirty)
+{
+       int old_gen;
+
+       if (!folio)
+               return;
+
+       if (dirty && !folio_test_dirty(folio) &&
+           !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
+             !folio_test_swapcache(folio)))
+               folio_mark_dirty(folio);
+
+       if (walk) {
+               old_gen = folio_update_gen(folio, new_gen);
+               if (old_gen >= 0 && old_gen != new_gen)
+                       update_batch_size(walk, folio, old_gen, new_gen);
+       } else if (lru_gen_set_refs(folio)) {
+               old_gen = folio_lru_gen(folio);
+               if (old_gen >= 0 && old_gen != new_gen)
+                       folio_activate(folio);
+       }
+}
+
 static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
                           struct mm_walk *args)
 {
        int i;
+       bool dirty;
        pte_t *pte;
        spinlock_t *ptl;
        unsigned long addr;
        int total = 0;
        int young = 0;
+       struct folio *last = NULL;
        struct lru_gen_mm_walk *walk = args->private;
        struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
        struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
        DEFINE_MAX_SEQ(walk->lruvec);
-       int old_gen, new_gen = lru_gen_from_seq(max_seq);
+       int gen = lru_gen_from_seq(max_seq);
        pmd_t pmdval;
 
-       pte = pte_offset_map_rw_nolock(args->mm, pmd, start & PMD_MASK, &pmdval,
-                                      &ptl);
+       pte = pte_offset_map_rw_nolock(args->mm, pmd, start & PMD_MASK, &pmdval, &ptl);
        if (!pte)
                return false;
+
        if (!spin_trylock(ptl)) {
                pte_unmap(pte);
-               return false;
+               return true;
        }
 
        if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
@@ -3474,19 +3500,23 @@ restart:
                if (!ptep_clear_young_notify(args->vma, addr, pte + i))
                        continue;
 
-               young++;
-               walk->mm_stats[MM_LEAF_YOUNG]++;
+               if (last != folio) {
+                       walk_update_folio(walk, last, gen, dirty);
 
-               if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
-                   !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-                     !folio_test_swapcache(folio)))
-                       folio_mark_dirty(folio);
+                       last = folio;
+                       dirty = false;
+               }
 
-               old_gen = folio_update_gen(folio, new_gen);
-               if (old_gen >= 0 && old_gen != new_gen)
-                       update_batch_size(walk, folio, old_gen, new_gen);
+               if (pte_dirty(ptent))
+                       dirty = true;
+
+               young++;
+               walk->mm_stats[MM_LEAF_YOUNG]++;
        }
 
+       walk_update_folio(walk, last, gen, dirty);
+       last = NULL;
+
        if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end))
                goto restart;
 
@@ -3500,13 +3530,15 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
                                  struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
 {
        int i;
+       bool dirty;
        pmd_t *pmd;
        spinlock_t *ptl;
+       struct folio *last = NULL;
        struct lru_gen_mm_walk *walk = args->private;
        struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
        struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
        DEFINE_MAX_SEQ(walk->lruvec);
-       int old_gen, new_gen = lru_gen_from_seq(max_seq);
+       int gen = lru_gen_from_seq(max_seq);
 
        VM_WARN_ON_ONCE(pud_leaf(*pud));
 
@@ -3559,20 +3591,23 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
                if (!pmdp_clear_young_notify(vma, addr, pmd + i))
                        goto next;
 
-               walk->mm_stats[MM_LEAF_YOUNG]++;
+               if (last != folio) {
+                       walk_update_folio(walk, last, gen, dirty);
 
-               if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
-                   !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-                     !folio_test_swapcache(folio)))
-                       folio_mark_dirty(folio);
+                       last = folio;
+                       dirty = false;
+               }
 
-               old_gen = folio_update_gen(folio, new_gen);
-               if (old_gen >= 0 && old_gen != new_gen)
-                       update_batch_size(walk, folio, old_gen, new_gen);
+               if (pmd_dirty(pmd[i]))
+                       dirty = true;
+
+               walk->mm_stats[MM_LEAF_YOUNG]++;
 next:
                i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1;
        } while (i <= MIN_LRU_BATCH);
 
+       walk_update_folio(walk, last, gen, dirty);
+
        arch_leave_lazy_mmu_mode();
        spin_unlock(ptl);
 done:
@@ -4107,9 +4142,11 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 {
        int i;
+       bool dirty;
        unsigned long start;
        unsigned long end;
        struct lru_gen_mm_walk *walk;
+       struct folio *last = NULL;
        int young = 1;
        pte_t *pte = pvmw->pte;
        unsigned long addr = pvmw->address;
@@ -4120,7 +4157,7 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
        struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
        struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
        DEFINE_MAX_SEQ(lruvec);
-       int old_gen, new_gen = lru_gen_from_seq(max_seq);
+       int gen = lru_gen_from_seq(max_seq);
 
        lockdep_assert_held(pvmw->ptl);
        VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
@@ -4174,24 +4211,21 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
                if (!ptep_clear_young_notify(vma, addr, pte + i))
                        continue;
 
-               young++;
+               if (last != folio) {
+                       walk_update_folio(walk, last, gen, dirty);
 
-               if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
-                   !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-                     !folio_test_swapcache(folio)))
-                       folio_mark_dirty(folio);
-
-               if (walk) {
-                       old_gen = folio_update_gen(folio, new_gen);
-                       if (old_gen >= 0 && old_gen != new_gen)
-                               update_batch_size(walk, folio, old_gen, new_gen);
-               } else if (lru_gen_set_refs(folio)) {
-                       old_gen = folio_lru_gen(folio);
-                       if (old_gen >= 0 && old_gen != new_gen)
-                               folio_activate(folio);
+                       last = folio;
+                       dirty = false;
                }
+
+               if (pte_dirty(ptent))
+                       dirty = true;
+
+               young++;
        }
 
+       walk_update_folio(walk, last, gen, dirty);
+
        arch_leave_lazy_mmu_mode();
 
        /* feedback from rmap walkers to page table walkers */