]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
cpu/hotplug: Add synchronization state
authorThomas Gleixner <tglx@linutronix.de>
Wed, 29 Mar 2023 09:40:10 +0000 (11:40 +0200)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 30 Mar 2023 12:27:07 +0000 (14:27 +0200)
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/Kconfig
include/linux/cpuhotplug.h
kernel/cpu.c
kernel/smpboot.c

index e3511afbb7f2f493854242c16c2429a83800984a..acd39cdbb8ad30ee7e1b90107d83c059f23b11fa 100644 (file)
@@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS
 config HOTPLUG_SMT
        bool
 
+config HOTPLUG_CORE_SYNC
+       bool
+
 config GENERIC_ENTRY
        bool
 
index c6fab004104a80199e65add70ba9a466068b0a20..46281352bba409c24b80ee6474c50f3a7683147c 100644 (file)
@@ -133,6 +133,7 @@ enum cpuhp_state {
        CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BP_PREPARE_DYN,
        CPUHP_BP_PREPARE_DYN_END                = CPUHP_BP_PREPARE_DYN + 20,
+
        CPUHP_BRINGUP_CPU,
 
        /*
@@ -519,4 +520,9 @@ void cpuhp_online_idle(enum cpuhp_state state);
 static inline void cpuhp_online_idle(enum cpuhp_state state) { }
 #endif
 
+void cpuhp_ap_sync_alive(void);
+void cpuhp_ap_report_dead(void);
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu);
+
 #endif
index 42621a99b9f345767f3461d84dceaa133eb4aa3d..15f08cef65338a8824dff36886d0ab1369aff5b5 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/kthread.h>
@@ -59,6 +60,7 @@
  * @last:      For multi-instance rollback, remember how far we got
  * @cb_state:  The state for a single callback (install/uninstall)
  * @result:    Result of the operation
+ * @ap_sync_state:     State for AP synchronization
  * @done_up:   Signal completion to the issuer of the task for cpu-up
  * @done_down: Signal completion to the issuer of the task for cpu-down
  */
@@ -76,6 +78,7 @@ struct cpuhp_cpu_state {
        struct hlist_node       *last;
        enum cpuhp_state        cb_state;
        int                     result;
+       atomic_t                ap_sync_state;
        struct completion       done_up;
        struct completion       done_down;
 #endif
@@ -276,6 +279,205 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state)
        return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
 }
 
+/* Synchronization state management */
+enum cpuhp_sync_state {
+       CPUHP_SYNC_STATE_DEAD,
+       CPUHP_SYNC_STATE_ALIVE,
+       CPUHP_SYNC_STATE_ONLINE,
+};
+
+enum cpuhp_sync_ctrl {
+       CPUHP_SYNC_CTRL_TIMEOUT = 0x100,
+       CPUHP_SYNC_CTRL_WAIT    = 0x200,
+       CPUHP_SYNC_CTRL_MASK    = CPUHP_SYNC_CTRL_TIMEOUT | CPUHP_SYNC_CTRL_WAIT,
+};
+
+/**
+ * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
+ * @state:     The synchronization state to set
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+static void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
+{
+       atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+       int new, cur = atomic_read(st);
+
+       do {
+               /* Preserve the control bits */
+               new = state | (cur & CPUHP_SYNC_CTRL_MASK);
+       } while (!atomic_try_cmpxchg(st, &cur, new));
+}
+
+/**
+ * cpuhp_ap_report_dead - Update synchronization state to DEAD
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+void cpuhp_ap_report_dead(void)
+{
+       cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_DEAD);
+}
+
+/**
+ * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
+ *
+ * Updates the AP synchronization state to CPUHP_SYNC_ALIVE and waits
+ * for the BP to release the CPUHP_SYNC_CTRL_WAIT bit.
+ */
+void cpuhp_ap_sync_alive(void)
+{
+       atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+       cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_ALIVE);
+
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC))
+               return;
+       /* Wait for the control CPU to clear the wait bit. */
+       while (atomic_read(st) & CPUHP_SYNC_CTRL_WAIT)
+               cpu_relax();
+}
+
+static int cpuhp_can_boot_ap(unsigned int cpu)
+{
+       atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+       int sync = atomic_read(st);
+
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC))
+               return 0;
+
+again:
+       switch (sync & ~CPUHP_SYNC_CTRL_MASK) {
+       case CPUHP_SYNC_STATE_DEAD:
+               /*
+                * In case that the bringup or the dead reporting timed
+                * out, there is nothing to clean up.
+                */
+               break;
+       case CPUHP_SYNC_STATE_ALIVE:
+               /*
+                * AP bringup timed out. AP is stuck in cpuhp_ap_sync_alive().
+                * Nothing to clean up.
+                */
+               break;
+       default:
+               /*
+                * AP failed to reach the dead state. Let the caller try
+                * again.
+                */
+               return -EAGAIN;
+       }
+
+       /*
+        * Try to reset the state and set the wait control bit so the AP
+        * waits in cpuhp_ap_sync_alive() until the control CPU releases
+        * it.
+        */
+       if (!atomic_try_cmpxchg(st, &sync, CPUHP_SYNC_STATE_DEAD | CPUHP_SYNC_CTRL_WAIT))
+               goto again;
+
+       return 0;
+}
+
+static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
+                                     bool bringup, bool setwait)
+{
+       atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+       unsigned long left = 10 * HZ;
+       unsigned long tosleep = 0;
+       int sync, new;
+
+       sync = atomic_read(st);
+       while (1) {
+               if ((sync & ~CPUHP_SYNC_CTRL_MASK) == state) {
+                       new = state;
+                       if (setwait)
+                               new |= CPUHP_SYNC_CTRL_WAIT;
+                       if (!atomic_try_cmpxchg(st, &sync, new))
+                               continue;
+                       return true;
+               }
+
+               /* Timeout? */
+               if (!left) {
+                       new = state;
+                       /* Set timeout and wait bit */
+                       new |= CPUHP_SYNC_CTRL_TIMEOUT | CPUHP_SYNC_CTRL_WAIT;
+                       if (!atomic_try_cmpxchg(st, &sync, new))
+                               continue;
+                       return false;
+               }
+
+               /* First attempt? */
+               if (!tosleep) {
+                       udelay(50);
+                       tosleep = 1;
+               } else {
+                       /* Wait increasingly long */
+                       schedule_timeout_uninterruptible(tosleep);
+                       if (left <= tosleep)
+                               tosleep = left;
+                       left -= tosleep;
+                       tosleep = DIV_ROUND_UP(tosleep * 11, 10);
+               }
+               sync = atomic_read(st);
+       }
+       return true;
+}
+
+void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
+
+/*
+ * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
+ * because the AP cannot issue complete() so early in the bringup.
+ */
+static int cpuhp_bp_sync_alive(unsigned int cpu)
+{
+       int ret = 0;
+
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC))
+               return 0;
+
+       if (!cpuhp_wait_for_sync_state(cpu, CPUHP_SYNC_STATE_ALIVE, true, false)) {
+               pr_err("CPU%u failed to report alive state\n", cpu);
+               ret = -EIO;
+       }
+
+       /* Let the architecture cleanup kick alive mechanics */
+       arch_cpuhp_cleanup_kick_cpu(cpu);
+       return 0;
+}
+
+void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+
+/*
+ * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
+ * because the AP cannot issue complete() at this stage.
+ */
+static void cpuhp_bp_sync_dead(unsigned int cpu)
+{
+       if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC))
+               return;
+
+       if (cpuhp_wait_for_sync_state(cpu, CPUHP_SYNC_STATE_DEAD, false, true)) {
+               /* CPU reached dead state. Invoke the cleanup function */
+               arch_cpuhp_cleanup_dead_cpu(cpu);
+       }
+
+       /*
+        * There is nothing what can be done to undo this teardown
+        * operation other than not invoking the cleanup function. It might
+        * be invoked in the next bringup if the AP reported dead state by
+        * then.
+        */
+       pr_err("CPU%u failed to report dead state\n", cpu);
+}
+
+static __init void cpuhp_bp_init_sync_state(void)
+{
+       atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), CPUHP_SYNC_STATE_ONLINE);
+}
+
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 bool cpuhp_tasks_frozen;
@@ -588,6 +790,10 @@ static int bringup_cpu(unsigned int cpu)
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;
 
+       ret = cpuhp_can_boot_ap(cpu);
+       if (ret)
+               return ret;
+
        /*
         * Reset stale stack state from the last time this CPU was online.
         */
@@ -606,6 +812,10 @@ static int bringup_cpu(unsigned int cpu)
        if (ret)
                goto out_unlock;
 
+       ret = cpuhp_bp_sync_alive(cpu);
+       if (ret)
+               goto out_unlock;
+
        ret = bringup_wait_for_ap_online(cpu);
        if (ret)
                goto out_unlock;
@@ -1109,6 +1319,8 @@ static int takedown_cpu(unsigned int cpu)
        /* This actually kills the CPU. */
        __cpu_die(cpu);
 
+       cpuhp_bp_sync_dead(cpu);
+
        tick_cleanup_dead_cpu(cpu);
        rcutree_migrate_callbacks(cpu);
        return 0;
@@ -1355,8 +1567,10 @@ void cpuhp_online_idle(enum cpuhp_state state)
        if (state != CPUHP_AP_ONLINE_IDLE)
                return;
 
+       cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_ONLINE);
+
        /*
-        * Unpart the stopper thread before we start the idle loop (and start
+        * Unpark the stopper thread before we start the idle loop (and start
         * scheduling); this ensures the stopper task is always available.
         */
        stop_machine_unpark(smp_processor_id());
@@ -1750,6 +1964,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
                .startup.single         = timers_prepare_cpu,
                .teardown.single        = timers_dead_cpu,
        },
+
        /* Kicks the plugged cpu into life */
        [CPUHP_BRINGUP_CPU] = {
                .name                   = "cpu:bringup",
@@ -2722,6 +2937,7 @@ void __init boot_cpu_hotplug_init(void)
 {
 #ifdef CONFIG_SMP
        cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
+       cpuhp_bp_init_sync_state();
 #endif
        this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
        this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
index 2c7396da470c5127fb8329d2d4a50d1a819b60cd..3dcfd3f04ed0f9a73059430c84c856bb106d1a8c 100644 (file)
@@ -326,6 +326,7 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 
+#ifndef CONFIG_HOTPLUG_CORE_SYNC
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
 /*
@@ -488,3 +489,4 @@ bool cpu_report_death(void)
 }
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */