]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
lazy tlb: introduce lazy mm refcount helper functions
authorNicholas Piggin <npiggin@gmail.com>
Mon, 23 Aug 2021 23:59:18 +0000 (09:59 +1000)
committerStephen Rothwell <sfr@canb.auug.org.au>
Wed, 25 Aug 2021 23:33:55 +0000 (09:33 +1000)
Patch series "shoot lazy tlbs", v4.

On a 16-socket 192-core POWER8 system, a context switching benchmark with
as many software threads as CPUs (so each switch will go in and out of
idle), upstream can achieve a rate of about 1 million context switches per
second.  After this series it goes up to 118 million.

This patch (of 4):

Add explicit _lazy_tlb annotated functions for lazy mm refcounting.  This
makes lazy mm references more obvious, and allows explicit refcounting to
be removed if it is not used.

If a kernel thread's current lazy tlb mm happens to be the one it wants to
use, then kthread_use_mm() cleverly transfers the mm refcount from the
lazy tlb mm reference to the returned reference.  If the lazy tlb mm
reference is no longer identical to a normal reference, this trick does
not work, so that is changed to be explicit about the two references.

Link: https://lkml.kernel.org/r/20210605014216.446867-1-npiggin@gmail.com
Link: https://lkml.kernel.org/r/20210605014216.446867-2-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
arch/arm/mach-rpc/ecard.c
arch/powerpc/kernel/smp.c
arch/powerpc/mm/book3s64/radix_tlb.c
fs/exec.c
include/linux/sched/mm.h
kernel/cpu.c
kernel/exit.c
kernel/kthread.c
kernel/sched/core.c

index 827b50f1c73e6dac648f8023f3dc009a69b8e287..1b4a41aad7932f3a25aed4824fedcaa22e6cf3ff 100644 (file)
@@ -253,7 +253,7 @@ static int ecard_init_mm(void)
        current->mm = mm;
        current->active_mm = mm;
        activate_mm(active_mm, mm);
-       mmdrop(active_mm);
+       mmdrop_lazy_tlb(active_mm);
        ecard_init_pgtables(mm);
        return 0;
 }
index 447b78a87c8f2adcb5f46c0073b7cdbea84ec275..a492170b5ba0b2941bb4f7541468b18f99c5833c 100644 (file)
@@ -1556,7 +1556,7 @@ void start_secondary(void *unused)
        if (IS_ENABLED(CONFIG_PPC32))
                setup_kup();
 
-       mmgrab(&init_mm);
+       mmgrab_lazy_tlb(&init_mm);
        current->active_mm = &init_mm;
 
        smp_store_cpu_info(cpu);
index aefc100d79a7c0f01ef0fbe645a44a9d2d62e126..2710a61d7ef20cfd906bf4fb70ee1b4b27ed556f 100644 (file)
@@ -785,10 +785,10 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
        if (current->active_mm == mm) {
                WARN_ON_ONCE(current->mm != NULL);
                /* Is a kernel thread and is using mm as the lazy tlb */
-               mmgrab(&init_mm);
+               mmgrab_lazy_tlb(&init_mm);
                current->active_mm = &init_mm;
                switch_mm_irqs_off(mm, &init_mm, current);
-               mmdrop(mm);
+               mmdrop_lazy_tlb(mm);
        }
 
        /*
index 38f63451b92827cef1aebaf948848d286b04900a..17ddaad5462fe0edbff55f7deea0f3ee04304ef1 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1026,9 +1026,9 @@ static int exec_mmap(struct mm_struct *mm)
                setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
                mm_update_next_owner(old_mm);
                mmput(old_mm);
-               return 0;
+       } else {
+               mmdrop_lazy_tlb(active_mm);
        }
-       mmdrop(active_mm);
        return 0;
 }
 
index 5561486fddef7a31675958319a240a9369e2bdd7..f7a0b347fecbd0c47e6af8b7ae642bfe2f7046b9 100644 (file)
@@ -49,6 +49,17 @@ static inline void mmdrop(struct mm_struct *mm)
                __mmdrop(mm);
 }
 
+/* Helpers for lazy TLB mm refcounting */
+static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
+{
+       mmgrab(mm);
+}
+
+static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
+{
+       mmdrop(mm);
+}
+
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
index 804b847912dc03228ff9e28330fcb0fd02149c92..79882ce1f2b5354cb7e1d4a33c8be95f5d306738 100644 (file)
@@ -603,7 +603,7 @@ static int finish_cpu(unsigned int cpu)
         */
        if (mm != &init_mm)
                idle->active_mm = &init_mm;
-       mmdrop(mm);
+       mmdrop_lazy_tlb(mm);
        return 0;
 }
 
index 9a89e7f36acb4382efca6855b4c01b74820a8dd5..3e9ec041a4e596524864cf3607829bc13ada933a 100644 (file)
@@ -475,7 +475,7 @@ static void exit_mm(void)
                __set_current_state(TASK_RUNNING);
                mmap_read_lock(mm);
        }
-       mmgrab(mm);
+       mmgrab_lazy_tlb(mm);
        BUG_ON(mm != current->active_mm);
        /* more a memory barrier than a real lock */
        task_lock(current);
index 5b37a8567168b37ae0d67087d75e49020f1986b0..e82a17863b098d54e7ed00a78d5a322ee4c6645b 100644 (file)
@@ -1350,14 +1350,14 @@ void kthread_use_mm(struct mm_struct *mm)
        WARN_ON_ONCE(!(tsk->flags & PF_KTHREAD));
        WARN_ON_ONCE(tsk->mm);
 
+       mmgrab(mm);
+
        task_lock(tsk);
        /* Hold off tlb flush IPIs while switching mm's */
        local_irq_disable();
        active_mm = tsk->active_mm;
-       if (active_mm != mm) {
-               mmgrab(mm);
+       if (active_mm != mm)
                tsk->active_mm = mm;
-       }
        tsk->mm = mm;
        membarrier_update_current_mm(mm);
        switch_mm_irqs_off(active_mm, mm, tsk);
@@ -1377,7 +1377,7 @@ void kthread_use_mm(struct mm_struct *mm)
         * mmdrop(), or explicitly with smp_mb().
         */
        if (active_mm != mm)
-               mmdrop(active_mm);
+               mmdrop_lazy_tlb(active_mm);
        else
                smp_mb();
 
@@ -1411,10 +1411,13 @@ void kthread_unuse_mm(struct mm_struct *mm)
        local_irq_disable();
        tsk->mm = NULL;
        membarrier_update_current_mm(NULL);
+       mmgrab_lazy_tlb(mm);
        /* active_mm is still 'mm' */
        enter_lazy_tlb(mm, tsk);
        local_irq_enable();
        task_unlock(tsk);
+
+       mmdrop(mm);
 }
 EXPORT_SYMBOL_GPL(kthread_unuse_mm);
 
index 20ffcc04413449d6eedc709e1dc8096f50a1cc65..030348d3e6d290e8bbd9dc39c119caabaf5136c3 100644 (file)
@@ -4572,13 +4572,14 @@ static struct rq *finish_task_switch(struct task_struct *prev)
         * rq->curr, before returning to userspace, so provide them here:
         *
         * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-        *   provided by mmdrop(),
+        *   provided by mmdrop_lazy_tlb(),
         * - a sync_core for SYNC_CORE.
         */
        if (mm) {
                membarrier_mm_sync_core_before_usermode(mm);
-               mmdrop(mm);
+               mmdrop_lazy_tlb(mm);
        }
+
        if (unlikely(prev_state == TASK_DEAD)) {
                if (prev->sched_class->task_dead)
                        prev->sched_class->task_dead(prev);
@@ -4641,9 +4642,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
        /*
         * kernel -> kernel   lazy + transfer active
-        *   user -> kernel   lazy + mmgrab() active
+        *   user -> kernel   lazy + mmgrab_lazy_tlb() active
         *
-        * kernel ->   user   switch + mmdrop() active
+        * kernel ->   user   switch + mmdrop_lazy_tlb() active
         *   user ->   user   switch
         */
        if (!next->mm) {                                // to kernel
@@ -4651,7 +4652,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
                next->active_mm = prev->active_mm;
                if (prev->mm)                           // from user
-                       mmgrab(prev->active_mm);
+                       mmgrab_lazy_tlb(prev->active_mm);
                else
                        prev->active_mm = NULL;
        } else {                                        // to user
@@ -4667,7 +4668,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
                switch_mm_irqs_off(prev->active_mm, next->mm, next);
 
                if (!prev->mm) {                        // from kernel
-                       /* will mmdrop() in finish_task_switch(). */
+                       /* will mmdrop_lazy_tlb() in finish_task_switch(). */
                        rq->prev_mm = prev->active_mm;
                        prev->active_mm = NULL;
                }
@@ -9037,7 +9038,7 @@ void __init sched_init(void)
        /*
         * The boot idle thread does lazy MMU switching as well:
         */
-       mmgrab(&init_mm);
+       mmgrab_lazy_tlb(&init_mm);
        enter_lazy_tlb(&init_mm, current);
 
        /*