]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: introduce local state for lazy_mmu sections
authorKevin Brodsky <kevin.brodsky@arm.com>
Mon, 8 Sep 2025 07:39:26 +0000 (08:39 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 12 Sep 2025 00:26:02 +0000 (17:26 -0700)
arch_{enter,leave}_lazy_mmu_mode() currently have a stateless API (taking
and returning no value).  This is proving problematic in situations where
leave() needs to restore some context back to its original state (before
enter() was called).  In particular, this makes it difficult to support
the nesting of lazy_mmu sections - leave() does not know whether the
matching enter() call occurred while lazy_mmu was already enabled, and
whether to disable it or not.

This patch gives all architectures the chance to store local state while
inside a lazy_mmu section by making enter() return some value, storing it
in a local variable, and having leave() take that value.  That value is
typed lazy_mmu_state_t - each architecture defining
__HAVE_ARCH_ENTER_LAZY_MMU_MODE is free to define it as it sees fit.  For
now we define it as int everywhere, which is sufficient to support
nesting.

The diff is unfortunately rather large as all the API changes need to be
done atomically.  Main parts:

* Changing the prototypes of arch_{enter,leave}_lazy_mmu_mode()
  in generic and arch code, and introducing lazy_mmu_state_t.

* Introducing LAZY_MMU_{DEFAULT,NESTED} for future support of
  nesting. enter() always returns LAZY_MMU_DEFAULT for now.
  (linux/mm_types.h is not the most natural location for defining
  those constants, but there is no other obvious header that is
  accessible where arch's implement the helpers.)

* Changing all lazy_mmu sections to introduce a lazy_mmu_state
  local variable, having enter() set it and leave() take it. Most of
  these changes were generated using the following Coccinelle script:

@@
@@
{
+ lazy_mmu_state_t lazy_mmu_state;
...
- arch_enter_lazy_mmu_mode();
+ lazy_mmu_state = arch_enter_lazy_mmu_mode();
...
- arch_leave_lazy_mmu_mode();
+ arch_leave_lazy_mmu_mode(lazy_mmu_state);
...
}

* In a few cases (e.g. xen_flush_lazy_mmu()), a function knows that
  lazy_mmu is already enabled, and it temporarily disables it by
  calling leave() and then enter() again. Here we want to ensure
  that any operation between the leave() and enter() calls is
  completed immediately; for that reason we pass LAZY_MMU_DEFAULT to
  leave() to fully disable lazy_mmu. enter() will then re-enable it
  - this achieves the expected behaviour, whether nesting occurred
  before that function was called or not.

Note: it is difficult to provide a default definition of lazy_mmu_state_t
for architectures implementing lazy_mmu, because that definition would
need to be available in arch/x86/include/asm/paravirt_types.h and adding a
new generic #include there is very tricky due to the existing header soup.

Link: https://lkml.kernel.org/r/20250908073931.4159362-3-kevin.brodsky@arm.com
Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Yeoreum Yun <yeoreum.yun@arm.com>
Reviewed-by: Juergen Gross <jgross@suse.com> # arch/x86
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: David Hildenbrand <david@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
22 files changed:
arch/arm64/include/asm/pgtable.h
arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
arch/powerpc/mm/book3s64/hash_tlb.c
arch/powerpc/mm/book3s64/subpage_prot.c
arch/sparc/include/asm/tlbflush_64.h
arch/sparc/mm/tlb.c
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
fs/proc/task_mmu.c
include/linux/mm_types.h
include/linux/pgtable.h
mm/kasan/shadow.c
mm/madvise.c
mm/memory.c
mm/migrate_device.c
mm/mprotect.c
mm/mremap.c
mm/userfaultfd.c
mm/vmalloc.c
mm/vmscan.c

index 728d7b6ed20aef1152f7c1d46681013b1790c46e..816197d08165891c82610603f83345ba3d8a8a30 100644 (file)
@@ -81,7 +81,9 @@ static inline void queue_pte_barriers(void)
 }
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-static inline void arch_enter_lazy_mmu_mode(void)
+typedef int lazy_mmu_state_t;
+
+static inline lazy_mmu_state_t arch_enter_lazy_mmu_mode(void)
 {
        /*
         * lazy_mmu_mode is not supposed to permit nesting. But in practice this
@@ -96,12 +98,14 @@ static inline void arch_enter_lazy_mmu_mode(void)
         */
 
        if (in_interrupt())
-               return;
+               return LAZY_MMU_DEFAULT;
 
        set_thread_flag(TIF_LAZY_MMU);
+
+       return LAZY_MMU_DEFAULT;
 }
 
-static inline void arch_leave_lazy_mmu_mode(void)
+static inline void arch_leave_lazy_mmu_mode(lazy_mmu_state_t state)
 {
        if (in_interrupt())
                return;
index 176d7fd79eeb0c4f0d4473cdd89449ab02b8845a..c9f1e819e567738578e4c5cab023c5fc21f4a795 100644 (file)
@@ -25,13 +25,14 @@ DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
 extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
 
 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+typedef int lazy_mmu_state_t;
 
-static inline void arch_enter_lazy_mmu_mode(void)
+static inline lazy_mmu_state_t arch_enter_lazy_mmu_mode(void)
 {
        struct ppc64_tlb_batch *batch;
 
        if (radix_enabled())
-               return;
+               return LAZY_MMU_DEFAULT;
        /*
         * apply_to_page_range can call us this preempt enabled when
         * operating on kernel page tables.
@@ -39,9 +40,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
        preempt_disable();
        batch = this_cpu_ptr(&ppc64_tlb_batch);
        batch->active = 1;
+
+       return LAZY_MMU_DEFAULT;
 }
 
-static inline void arch_leave_lazy_mmu_mode(void)
+static inline void arch_leave_lazy_mmu_mode(lazy_mmu_state_t state)
 {
        struct ppc64_tlb_batch *batch;
 
index 21fcad97ae80dd46fe28b6f195150d82f52979d8..ee664f88e6797b99439794c7849348456dac6d31 100644 (file)
@@ -189,6 +189,7 @@ void hash__tlb_flush(struct mmu_gather *tlb)
  */
 void __flush_hash_table_range(unsigned long start, unsigned long end)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int hugepage_shift;
        unsigned long flags;
 
@@ -205,7 +206,7 @@ void __flush_hash_table_range(unsigned long start, unsigned long end)
         * way to do things but is fine for our needs here.
         */
        local_irq_save(flags);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        for (; start < end; start += PAGE_SIZE) {
                pte_t *ptep = find_init_mm_pte(start, &hugepage_shift);
                unsigned long pte;
@@ -217,12 +218,13 @@ void __flush_hash_table_range(unsigned long start, unsigned long end)
                        continue;
                hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift);
        }
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        local_irq_restore(flags);
 }
 
 void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte;
        pte_t *start_pte;
        unsigned long flags;
@@ -237,7 +239,7 @@ void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long
         * way to do things but is fine for our needs here.
         */
        local_irq_save(flags);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        start_pte = pte_offset_map(pmd, addr);
        if (!start_pte)
                goto out;
@@ -249,6 +251,6 @@ void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long
        }
        pte_unmap(start_pte);
 out:
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        local_irq_restore(flags);
 }
index ec98e526167e543a4f4e4b9342020ad1f0a1a826..4720f9f321af082e9ab2780af4adbd5d7e825866 100644 (file)
@@ -53,6 +53,7 @@ void subpage_prot_free(struct mm_struct *mm)
 static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
                             int npages)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pgd_t *pgd;
        p4d_t *p4d;
        pud_t *pud;
@@ -73,13 +74,13 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return;
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        for (; npages > 0; --npages) {
                pte_update(mm, addr, pte, 0, 0, 0);
                addr += PAGE_SIZE;
                ++pte;
        }
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(pte - 1, ptl);
 }
 
index cd144eb31bdd21dbf5efd1be28e273690deae12d..02c93a4e6af5cfdfd4deef3add374ef34fba5955 100644 (file)
@@ -40,10 +40,11 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+typedef int lazy_mmu_state_t;
 
 void flush_tlb_pending(void);
-void arch_enter_lazy_mmu_mode(void);
-void arch_leave_lazy_mmu_mode(void);
+lazy_mmu_state_t arch_enter_lazy_mmu_mode(void);
+void arch_leave_lazy_mmu_mode(lazy_mmu_state_t state);
 
 /* Local cpu only.  */
 void __flush_tlb_all(void);
index a35ddcca5e7668834dc187031b0fec4c6ae094b6..bf5094b770af0a935743fe3d3c79d6392bc5b378 100644 (file)
@@ -50,16 +50,18 @@ out:
        put_cpu_var(tlb_batch);
 }
 
-void arch_enter_lazy_mmu_mode(void)
+lazy_mmu_state_t arch_enter_lazy_mmu_mode(void)
 {
        struct tlb_batch *tb;
 
        preempt_disable();
        tb = this_cpu_ptr(&tlb_batch);
        tb->active = 1;
+
+       return LAZY_MMU_DEFAULT;
 }
 
-void arch_leave_lazy_mmu_mode(void)
+void arch_leave_lazy_mmu_mode(lazy_mmu_state_t state)
 {
        struct tlb_batch *tb = this_cpu_ptr(&tlb_batch);
 
index b5e59a7ba0d0e206e01a2f63b2dfa3dbbe6ef81d..65a0d394fba1864e629588074a2e064c6b5bebb7 100644 (file)
@@ -527,12 +527,14 @@ static inline void arch_end_context_switch(struct task_struct *next)
 }
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-static inline void arch_enter_lazy_mmu_mode(void)
+static inline lazy_mmu_state_t arch_enter_lazy_mmu_mode(void)
 {
        PVOP_VCALL0(mmu.lazy_mode.enter);
+
+       return LAZY_MMU_DEFAULT;
 }
 
-static inline void arch_leave_lazy_mmu_mode(void)
+static inline void arch_leave_lazy_mmu_mode(lazy_mmu_state_t state)
 {
        PVOP_VCALL0(mmu.lazy_mode.leave);
 }
index 37a8627d8277fbf2c1f9f1a4f4e3f2180596de1c..bc1af86868a3f7a6e71247493df12eec23d4a50d 100644 (file)
@@ -41,6 +41,8 @@ struct pv_info {
 };
 
 #ifdef CONFIG_PARAVIRT_XXL
+typedef int lazy_mmu_state_t;
+
 struct pv_lazy_ops {
        /* Set deferred update mode, used for batching operations. */
        void (*enter)(void);
index 26bbaf4b7330b4981e59467e9b05192b56b04d70..a245ba47a63149044b6916e26167ebcc2e2abe5a 100644 (file)
@@ -426,7 +426,7 @@ static void xen_start_context_switch(struct task_struct *prev)
        BUG_ON(preemptible());
 
        if (this_cpu_read(xen_lazy_mode) == XEN_LAZY_MMU) {
-               arch_leave_lazy_mmu_mode();
+               arch_leave_lazy_mmu_mode(LAZY_MMU_DEFAULT);
                set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
        }
        enter_lazy(XEN_LAZY_CPU);
index 2a4a8deaf612f5211defacc22dfed850a8e4889c..2039d5132ca37400ab3acfe3220ef16b053e2200 100644 (file)
@@ -2140,7 +2140,7 @@ static void xen_flush_lazy_mmu(void)
        preempt_disable();
 
        if (xen_get_lazy_mode() == XEN_LAZY_MMU) {
-               arch_leave_lazy_mmu_mode();
+               arch_leave_lazy_mmu_mode(LAZY_MMU_DEFAULT);
                arch_enter_lazy_mmu_mode();
        }
 
index ced01cf3c5ab3f520fb1e42a8058c263674e03dd..02aa55f83bae5af6a66f856947709d7d788da739 100644 (file)
@@ -2682,6 +2682,7 @@ out_unlock:
 static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
                                  unsigned long end, struct mm_walk *walk)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        struct pagemap_scan_private *p = walk->private;
        struct vm_area_struct *vma = walk->vma;
        unsigned long addr, flush_end = 0;
@@ -2700,7 +2701,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
                return 0;
        }
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) {
                /* Fast path for performing exclusive WP */
@@ -2770,7 +2771,7 @@ flush_and_return:
        if (flush_end)
                flush_tlb_range(vma, start, addr);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(start_pte, ptl);
 
        cond_resched();
index 3dbdddb83c0293574a8c2a203662445a477d29e0..4a441f78340d1cac94eb67f69e04fa1706dddde3 100644 (file)
@@ -1484,6 +1484,9 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
 extern void tlb_finish_mmu(struct mmu_gather *tlb);
 
+#define LAZY_MMU_DEFAULT       0
+#define LAZY_MMU_NESTED                1
+
 struct vm_fault;
 
 /**
index 8d6007123cdfd86b499cb6a9ea79cc38bf8f892e..df0eb898b3fc7d65f9bd10c442d2d3147c64ae81 100644 (file)
@@ -232,8 +232,10 @@ static inline int pmd_dirty(pmd_t pmd)
  * and the mode cannot be used in interrupt context.
  */
 #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode()     do {} while (0)
-#define arch_leave_lazy_mmu_mode()     do {} while (0)
+typedef int lazy_mmu_state_t;
+
+#define arch_enter_lazy_mmu_mode()     (LAZY_MMU_DEFAULT)
+#define arch_leave_lazy_mmu_mode(state)        ((void)(state))
 #endif
 
 #ifndef pte_batch_hint
index 5d2a876035d656809244a5a3e75a852aa2ba1de1..60b1b72f5ce1a74ef03bdc14b00c960111e1a7ba 100644 (file)
@@ -305,7 +305,7 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
        pte_t pte;
        int index;
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(LAZY_MMU_DEFAULT);
 
        index = PFN_DOWN(addr - data->start);
        page = data->pages[index];
@@ -482,7 +482,7 @@ static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
        pte_t pte;
        int none;
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(LAZY_MMU_DEFAULT);
 
        spin_lock(&init_mm.page_table_lock);
        pte = ptep_get(ptep);
index 35ed4ab0d7c53b94db11eb12e57d8b1b59134656..72c032f2cf567cf72738b5ee01c3a214b3b399b7 100644 (file)
@@ -357,6 +357,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                                unsigned long addr, unsigned long end,
                                struct mm_walk *walk)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        struct madvise_walk_private *private = walk->private;
        struct mmu_gather *tlb = private->tlb;
        bool pageout = private->pageout;
@@ -455,7 +456,7 @@ restart:
        if (!start_pte)
                return 0;
        flush_tlb_batched_pending(mm);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) {
                nr = 1;
                ptent = ptep_get(pte);
@@ -463,7 +464,7 @@ restart:
                if (++batch_count == SWAP_CLUSTER_MAX) {
                        batch_count = 0;
                        if (need_resched()) {
-                               arch_leave_lazy_mmu_mode();
+                               arch_leave_lazy_mmu_mode(lazy_mmu_state);
                                pte_unmap_unlock(start_pte, ptl);
                                cond_resched();
                                goto restart;
@@ -499,7 +500,7 @@ restart:
                                if (!folio_trylock(folio))
                                        continue;
                                folio_get(folio);
-                               arch_leave_lazy_mmu_mode();
+                               arch_leave_lazy_mmu_mode(lazy_mmu_state);
                                pte_unmap_unlock(start_pte, ptl);
                                start_pte = NULL;
                                err = split_folio(folio);
@@ -510,7 +511,7 @@ restart:
                                if (!start_pte)
                                        break;
                                flush_tlb_batched_pending(mm);
-                               arch_enter_lazy_mmu_mode();
+                               lazy_mmu_state = arch_enter_lazy_mmu_mode();
                                if (!err)
                                        nr = 0;
                                continue;
@@ -558,7 +559,7 @@ restart:
        }
 
        if (start_pte) {
-               arch_leave_lazy_mmu_mode();
+               arch_leave_lazy_mmu_mode(lazy_mmu_state);
                pte_unmap_unlock(start_pte, ptl);
        }
        if (pageout)
@@ -657,6 +658,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 {
        const cydp_t cydp_flags = CYDP_CLEAR_YOUNG | CYDP_CLEAR_DIRTY;
+       lazy_mmu_state_t lazy_mmu_state;
        struct mmu_gather *tlb = walk->private;
        struct mm_struct *mm = tlb->mm;
        struct vm_area_struct *vma = walk->vma;
@@ -677,7 +679,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
        if (!start_pte)
                return 0;
        flush_tlb_batched_pending(mm);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) {
                nr = 1;
                ptent = ptep_get(pte);
@@ -727,7 +729,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                                if (!folio_trylock(folio))
                                        continue;
                                folio_get(folio);
-                               arch_leave_lazy_mmu_mode();
+                               arch_leave_lazy_mmu_mode(lazy_mmu_state);
                                pte_unmap_unlock(start_pte, ptl);
                                start_pte = NULL;
                                err = split_folio(folio);
@@ -738,7 +740,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                                if (!start_pte)
                                        break;
                                flush_tlb_batched_pending(mm);
-                               arch_enter_lazy_mmu_mode();
+                               lazy_mmu_state = arch_enter_lazy_mmu_mode();
                                if (!err)
                                        nr = 0;
                                continue;
@@ -778,7 +780,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
        if (nr_swap)
                add_mm_counter(mm, MM_SWAPENTS, nr_swap);
        if (start_pte) {
-               arch_leave_lazy_mmu_mode();
+               arch_leave_lazy_mmu_mode(lazy_mmu_state);
                pte_unmap_unlock(start_pte, ptl);
        }
        cond_resched();
index 41e641823558cfecd5ec83b65f9fc3f11f07c661..9ec83417affc4215f7dae80b3eef07142a8bddfe 100644 (file)
@@ -1207,6 +1207,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
               pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
               unsigned long end)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        struct mm_struct *dst_mm = dst_vma->vm_mm;
        struct mm_struct *src_mm = src_vma->vm_mm;
        pte_t *orig_src_pte, *orig_dst_pte;
@@ -1254,7 +1255,7 @@ again:
        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
        orig_src_pte = src_pte;
        orig_dst_pte = dst_pte;
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        do {
                nr = 1;
@@ -1323,7 +1324,7 @@ again:
        } while (dst_pte += nr, src_pte += nr, addr += PAGE_SIZE * nr,
                 addr != end);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(orig_src_pte, src_ptl);
        add_mm_rss_vec(dst_mm, rss);
        pte_unmap_unlock(orig_dst_pte, dst_ptl);
@@ -1822,6 +1823,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        bool force_flush = false, force_break = false;
        struct mm_struct *mm = tlb->mm;
        int rss[NR_MM_COUNTERS];
@@ -1842,7 +1844,7 @@ retry:
                return addr;
 
        flush_tlb_batched_pending(mm);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        do {
                bool any_skipped = false;
 
@@ -1874,7 +1876,7 @@ retry:
                direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
 
        add_mm_rss_vec(mm, rss);
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
 
        /* Do the actual TLB flush before dropping ptl */
        if (force_flush) {
@@ -2811,6 +2813,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte, *mapped_pte;
        spinlock_t *ptl;
        int err = 0;
@@ -2818,7 +2821,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
        mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        do {
                BUG_ON(!pte_none(ptep_get(pte)));
                if (!pfn_modify_allowed(pfn, prot)) {
@@ -2828,7 +2831,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(mapped_pte, ptl);
        return err;
 }
@@ -3117,6 +3120,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte, *mapped_pte;
        int err = 0;
        spinlock_t *ptl;
@@ -3135,7 +3139,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                        return -EINVAL;
        }
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        if (fn) {
                do {
@@ -3148,7 +3152,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
        }
        *mask |= PGTBL_PTE_MODIFIED;
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
 
        if (mm != &init_mm)
                pte_unmap_unlock(mapped_pte, ptl);
index abd9f6850db651a7efec09f8d955668cb0e12f95..833ce5eafa4063b963e89c4f04f96032ffd05771 100644 (file)
@@ -59,6 +59,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
                                   unsigned long end,
                                   struct mm_walk *walk)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        struct migrate_vma *migrate = walk->private;
        struct folio *fault_folio = migrate->fault_page ?
                page_folio(migrate->fault_page) : NULL;
@@ -110,7 +111,7 @@ again:
        ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
        if (!ptep)
                goto again;
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        for (; addr < end; addr += PAGE_SIZE, ptep++) {
                struct dev_pagemap *pgmap;
@@ -287,7 +288,7 @@ next:
        if (unmapped)
                flush_tlb_range(walk->vma, start, end);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(ptep - 1, ptl);
 
        return 0;
index 113b489858341ff1d68af3596c1a695453493551..7bba651e5aa3215fd6ff0c7c6db7ef6479871cdd 100644 (file)
@@ -273,6 +273,7 @@ static long change_pte_range(struct mmu_gather *tlb,
                struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
                unsigned long end, pgprot_t newprot, unsigned long cp_flags)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte, oldpte;
        spinlock_t *ptl;
        long pages = 0;
@@ -293,7 +294,7 @@ static long change_pte_range(struct mmu_gather *tlb,
                target_node = numa_node_id();
 
        flush_tlb_batched_pending(vma->vm_mm);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
        do {
                nr_ptes = 1;
                oldpte = ptep_get(pte);
@@ -439,7 +440,7 @@ static long change_pte_range(struct mmu_gather *tlb,
                        }
                }
        } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(pte - 1, ptl);
 
        return pages;
index 35de0a7b910e085c3a2565c40aaa61d9e4a392de..a562d8cf1eee3120e39627487dba621d985abbb4 100644 (file)
@@ -193,6 +193,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr
 static int move_ptes(struct pagetable_move_control *pmc,
                unsigned long extent, pmd_t *old_pmd, pmd_t *new_pmd)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        struct vm_area_struct *vma = pmc->old;
        bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
        struct mm_struct *mm = vma->vm_mm;
@@ -256,7 +257,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
        if (new_ptl != old_ptl)
                spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
        flush_tlb_batched_pending(vma->vm_mm);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE,
                new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) {
@@ -301,7 +302,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
                }
        }
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        if (force_flush)
                flush_tlb_range(vma, old_end - len, old_end);
        if (new_ptl != old_ptl)
index af61b95c89e4ecb5bfb757d0fbe5d859918a8cce..2b0e26adc5cd2dc3e87c8f1254c9dfeee58cd601 100644 (file)
@@ -1076,6 +1076,7 @@ static long move_present_ptes(struct mm_struct *mm,
                              struct folio **first_src_folio, unsigned long len,
                              struct anon_vma *src_anon_vma)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int err = 0;
        struct folio *src_folio = *first_src_folio;
        unsigned long src_start = src_addr;
@@ -1100,7 +1101,7 @@ static long move_present_ptes(struct mm_struct *mm,
        /* It's safe to drop the reference now as the page-table is holding one. */
        folio_put(*first_src_folio);
        *first_src_folio = NULL;
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        while (true) {
                orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
@@ -1138,7 +1139,7 @@ static long move_present_ptes(struct mm_struct *mm,
                        break;
        }
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        if (src_addr > src_start)
                flush_tlb_range(src_vma, src_start, src_addr);
 
index 4249e1e019479c29f8d348204663d6c5a0ecd39f..9fc86ddf17118aa396e81a30ac93d3d5c7a500e8 100644 (file)
@@ -95,6 +95,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                        phys_addr_t phys_addr, pgprot_t prot,
                        unsigned int max_page_shift, pgtbl_mod_mask *mask)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte;
        u64 pfn;
        struct page *page;
@@ -105,7 +106,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (!pte)
                return -ENOMEM;
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        do {
                if (unlikely(!pte_none(ptep_get(pte)))) {
@@ -131,7 +132,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                pfn++;
        } while (pte += PFN_DOWN(size), addr += size, addr != end);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        *mask |= PGTBL_PTE_MODIFIED;
        return 0;
 }
@@ -354,12 +355,13 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                             pgtbl_mod_mask *mask)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        pte_t *pte;
        pte_t ptent;
        unsigned long size = PAGE_SIZE;
 
        pte = pte_offset_kernel(pmd, addr);
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        do {
 #ifdef CONFIG_HUGETLB_PAGE
@@ -378,7 +380,7 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
                WARN_ON(!pte_none(ptent) && !pte_present(ptent));
        } while (pte += (size >> PAGE_SHIFT), addr += size, addr != end);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        *mask |= PGTBL_PTE_MODIFIED;
 }
 
@@ -514,6 +516,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
                unsigned long end, pgprot_t prot, struct page **pages, int *nr,
                pgtbl_mod_mask *mask)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int err = 0;
        pte_t *pte;
 
@@ -526,7 +529,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
        if (!pte)
                return -ENOMEM;
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        do {
                struct page *page = pages[*nr];
@@ -548,7 +551,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
                (*nr)++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        *mask |= PGTBL_PTE_MODIFIED;
 
        return err;
index b2fc8b626d3dff9d34c2d747c4798e67a344305e..6a44289975bc8aac52a39ad0864568ffcd73614c 100644 (file)
@@ -3522,6 +3522,7 @@ static void walk_update_folio(struct lru_gen_mm_walk *walk, struct folio *folio,
 static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
                           struct mm_walk *args)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int i;
        bool dirty;
        pte_t *pte;
@@ -3551,7 +3552,7 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
                return false;
        }
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 restart:
        for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
                unsigned long pfn;
@@ -3592,7 +3593,7 @@ restart:
        if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end))
                goto restart;
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        pte_unmap_unlock(pte, ptl);
 
        return suitable_to_scan(total, young);
@@ -3601,6 +3602,7 @@ restart:
 static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
                                  struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int i;
        bool dirty;
        pmd_t *pmd;
@@ -3633,7 +3635,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
        if (!spin_trylock(ptl))
                goto done;
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        do {
                unsigned long pfn;
@@ -3680,7 +3682,7 @@ next:
 
        walk_update_folio(walk, last, gen, dirty);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
        spin_unlock(ptl);
 done:
        *first = -1;
@@ -4228,6 +4230,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
  */
 bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 {
+       lazy_mmu_state_t lazy_mmu_state;
        int i;
        bool dirty;
        unsigned long start;
@@ -4279,7 +4282,7 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
                }
        }
 
-       arch_enter_lazy_mmu_mode();
+       lazy_mmu_state = arch_enter_lazy_mmu_mode();
 
        pte -= (addr - start) / PAGE_SIZE;
 
@@ -4313,7 +4316,7 @@ bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 
        walk_update_folio(walk, last, gen, dirty);
 
-       arch_leave_lazy_mmu_mode();
+       arch_leave_lazy_mmu_mode(lazy_mmu_state);
 
        /* feedback from rmap walkers to page table walkers */
        if (mm_state && suitable_to_scan(i, young))