From cbce9c181ba39885dd5765e259aebd4927ba36f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 Mar 2023 11:40:10 +0200 Subject: [PATCH] cpu/hotplug: Add synchronization state Signed-off-by: Thomas Gleixner --- arch/Kconfig | 3 + include/linux/cpuhotplug.h | 6 + kernel/cpu.c | 218 ++++++++++++++++++++++++++++++++++++- kernel/smpboot.c | 2 + 4 files changed, 228 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index e3511afbb7f2f..acd39cdbb8ad3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +config HOTPLUG_CORE_SYNC + bool + config GENERIC_ENTRY bool diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a8..46281352bba40 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -133,6 +133,7 @@ enum cpuhp_state { CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, + CPUHP_BRINGUP_CPU, /* @@ -519,4 +520,9 @@ void cpuhp_online_idle(enum cpuhp_state state); static inline void cpuhp_online_idle(enum cpuhp_state state) { } #endif +void cpuhp_ap_sync_alive(void); +void cpuhp_ap_report_dead(void); +void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu); +void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu); + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 42621a99b9f34..15f08cef65338 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,7 @@ * @last: For multi-instance rollback, remember how far we got * @cb_state: The state for a single callback (install/uninstall) * @result: Result of the operation + * @ap_sync_state: State for AP synchronization * @done_up: Signal completion to the issuer of the task for cpu-up * @done_down: Signal completion to the issuer of the task for cpu-down */ @@ -76,6 +78,7 @@ struct cpuhp_cpu_state { struct hlist_node *last; enum cpuhp_state cb_state; int result; + atomic_t ap_sync_state; struct completion done_up; struct completion done_down; #endif @@ -276,6 +279,205 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state) return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } +/* Synchronization state management */ +enum cpuhp_sync_state { + CPUHP_SYNC_STATE_DEAD, + CPUHP_SYNC_STATE_ALIVE, + CPUHP_SYNC_STATE_ONLINE, +}; + +enum cpuhp_sync_ctrl { + CPUHP_SYNC_CTRL_TIMEOUT = 0x100, + CPUHP_SYNC_CTRL_WAIT = 0x200, + CPUHP_SYNC_CTRL_MASK = CPUHP_SYNC_CTRL_TIMEOUT | CPUHP_SYNC_CTRL_WAIT, +}; + +/** + * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown + * @state: The synchronization state to set + * + * No synchronization point. Just update of the synchronization state. + */ +static void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + int new, cur = atomic_read(st); + + do { + /* Preserve the control bits */ + new = state | (cur & CPUHP_SYNC_CTRL_MASK); + } while (!atomic_try_cmpxchg(st, &cur, new)); +} + +/** + * cpuhp_ap_report_dead - Update synchronization state to DEAD + * + * No synchronization point. Just update of the synchronization state. + */ +void cpuhp_ap_report_dead(void) +{ + cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_DEAD); +} + +/** + * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive + * + * Updates the AP synchronization state to CPUHP_SYNC_ALIVE and waits + * for the BP to release the CPUHP_SYNC_CTRL_WAIT bit. + */ +void cpuhp_ap_sync_alive(void) +{ + atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + + cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_ALIVE); + + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC)) + return; + /* Wait for the control CPU to clear the wait bit. */ + while (atomic_read(st) & CPUHP_SYNC_CTRL_WAIT) + cpu_relax(); +} + +static int cpuhp_can_boot_ap(unsigned int cpu) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + int sync = atomic_read(st); + + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC)) + return 0; + +again: + switch (sync & ~CPUHP_SYNC_CTRL_MASK) { + case CPUHP_SYNC_STATE_DEAD: + /* + * In case that the bringup or the dead reporting timed + * out, there is nothing to clean up. + */ + break; + case CPUHP_SYNC_STATE_ALIVE: + /* + * AP bringup timed out. AP is stuck in cpuhp_ap_sync_alive(). + * Nothing to clean up. + */ + break; + default: + /* + * AP failed to reach the dead state. Let the caller try + * again. + */ + return -EAGAIN; + } + + /* + * Try to reset the state and set the wait control bit so the AP + * waits in cpuhp_ap_sync_alive() until the control CPU releases + * it. + */ + if (!atomic_try_cmpxchg(st, &sync, CPUHP_SYNC_STATE_DEAD | CPUHP_SYNC_CTRL_WAIT)) + goto again; + + return 0; +} + +static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, + bool bringup, bool setwait) +{ + atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); + unsigned long left = 10 * HZ; + unsigned long tosleep = 0; + int sync, new; + + sync = atomic_read(st); + while (1) { + if ((sync & ~CPUHP_SYNC_CTRL_MASK) == state) { + new = state; + if (setwait) + new |= CPUHP_SYNC_CTRL_WAIT; + if (!atomic_try_cmpxchg(st, &sync, new)) + continue; + return true; + } + + /* Timeout? */ + if (!left) { + new = state; + /* Set timeout and wait bit */ + new |= CPUHP_SYNC_CTRL_TIMEOUT | CPUHP_SYNC_CTRL_WAIT; + if (!atomic_try_cmpxchg(st, &sync, new)) + continue; + return false; + } + + /* First attempt? */ + if (!tosleep) { + udelay(50); + tosleep = 1; + } else { + /* Wait increasingly long */ + schedule_timeout_uninterruptible(tosleep); + if (left <= tosleep) + tosleep = left; + left -= tosleep; + tosleep = DIV_ROUND_UP(tosleep * 11, 10); + } + sync = atomic_read(st); + } + return true; +} + +void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } + +/* + * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up + * because the AP cannot issue complete() so early in the bringup. + */ +static int cpuhp_bp_sync_alive(unsigned int cpu) +{ + int ret = 0; + + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC)) + return 0; + + if (!cpuhp_wait_for_sync_state(cpu, CPUHP_SYNC_STATE_ALIVE, true, false)) { + pr_err("CPU%u failed to report alive state\n", cpu); + ret = -EIO; + } + + /* Let the architecture cleanup kick alive mechanics */ + arch_cpuhp_cleanup_kick_cpu(cpu); + return 0; +} + +void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } + +/* + * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down + * because the AP cannot issue complete() at this stage. + */ +static void cpuhp_bp_sync_dead(unsigned int cpu) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC)) + return; + + if (cpuhp_wait_for_sync_state(cpu, CPUHP_SYNC_STATE_DEAD, false, true)) { + /* CPU reached dead state. Invoke the cleanup function */ + arch_cpuhp_cleanup_dead_cpu(cpu); + } + + /* + * There is nothing what can be done to undo this teardown + * operation other than not invoking the cleanup function. It might + * be invoked in the next bringup if the AP reported dead state by + * then. + */ + pr_err("CPU%u failed to report dead state\n", cpu); +} + +static __init void cpuhp_bp_init_sync_state(void) +{ + atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), CPUHP_SYNC_STATE_ONLINE); +} + /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); bool cpuhp_tasks_frozen; @@ -588,6 +790,10 @@ static int bringup_cpu(unsigned int cpu) struct task_struct *idle = idle_thread_get(cpu); int ret; + ret = cpuhp_can_boot_ap(cpu); + if (ret) + return ret; + /* * Reset stale stack state from the last time this CPU was online. */ @@ -606,6 +812,10 @@ static int bringup_cpu(unsigned int cpu) if (ret) goto out_unlock; + ret = cpuhp_bp_sync_alive(cpu); + if (ret) + goto out_unlock; + ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; @@ -1109,6 +1319,8 @@ static int takedown_cpu(unsigned int cpu) /* This actually kills the CPU. */ __cpu_die(cpu); + cpuhp_bp_sync_dead(cpu); + tick_cleanup_dead_cpu(cpu); rcutree_migrate_callbacks(cpu); return 0; @@ -1355,8 +1567,10 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + cpuhp_ap_update_sync_state(CPUHP_SYNC_STATE_ONLINE); + /* - * Unpart the stopper thread before we start the idle loop (and start + * Unpark the stopper thread before we start the idle loop (and start * scheduling); this ensures the stopper task is always available. */ stop_machine_unpark(smp_processor_id()); @@ -1750,6 +1964,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, + /* Kicks the plugged cpu into life */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", @@ -2722,6 +2937,7 @@ void __init boot_cpu_hotplug_init(void) { #ifdef CONFIG_SMP cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); + cpuhp_bp_init_sync_state(); #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 2c7396da470c5..3dcfd3f04ed0f 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -326,6 +326,7 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) } EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); +#ifndef CONFIG_HOTPLUG_CORE_SYNC static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); /* @@ -488,3 +489,4 @@ bool cpu_report_death(void) } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#endif /* !CONFIG_HOTPLUG_CORE_SYNC */ -- 2.50.1