x86/smpboot: Switch to core state synchronization
authorThomas Gleixner <tglx@linutronix.de>
Wed, 29 Mar 2023 14:17:45 +0000 (16:17 +0200)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 30 Mar 2023 12:27:07 +0000 (14:27 +0200)
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/Kconfig
arch/x86/include/asm/smp.h
arch/x86/kernel/smp.c
arch/x86/kernel/smpboot.c
arch/x86/xen/smp_hvm.c
arch/x86/xen/smp_pv.c

index a8d0b3fa2a726557f75cd76484f420c043cc0320..121fcae2bc2ceb8f2ce892baee16a59b6d39906e 100644 (file)
@@ -272,6 +272,7 @@ config X86
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_USER_RETURN_NOTIFIER
        select HAVE_GENERIC_VDSO
+       select HOTPLUG_CORE_SYNC                if SMP
        select HOTPLUG_SMT                      if SMP
        select IRQ_FORCED_THREADING
        select NEED_PER_CPU_EMBED_FIRST_CHUNK
index 176244110d1e5a4bc212094b7b29b21cda54fec8..7687093a4ebb1a2b7f2b977c6ee66ab2664bd21c 100644 (file)
@@ -38,6 +38,7 @@ struct smp_ops {
        void (*crash_stop_other_cpus)(void);
        void (*smp_send_reschedule)(int cpu);
 
+       void (*cleanup_dead_cpu)(unsigned cpu);
        int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
        int (*cpu_disable)(void);
        void (*cpu_die)(unsigned int cpu);
@@ -90,7 +91,8 @@ static inline int __cpu_disable(void)
 
 static inline void __cpu_die(unsigned int cpu)
 {
-       smp_ops.cpu_die(cpu);
+       if (smp_ops.cpu_die)
+               smp_ops.cpu_die(cpu);
 }
 
 static inline void play_dead(void)
@@ -122,8 +124,6 @@ void native_smp_cpus_done(unsigned int max_cpus);
 int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
 void hlt_play_dead(void);
 void native_play_dead(void);
 void play_dead_common(void);
index 375b33ecafa27ac425486e9cddf79e77ddc20f34..4f6375b4ba9a181da8134b2bb16e6d7058a6b834 100644 (file)
@@ -269,7 +269,6 @@ struct smp_ops smp_ops = {
        .smp_send_reschedule    = native_smp_send_reschedule,
 
        .cpu_up                 = native_cpu_up,
-       .cpu_die                = native_cpu_die,
        .cpu_disable            = native_cpu_disable,
        .play_dead              = native_play_dead,
 
index 7a00092c8ba75a818a9ac6e7c66bfdc55f6c7492..0229636b607890fe09c356f42d73e5b0aeb33fff 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
 #include <linux/stackprotector.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/acpi.h>
 #include <asm/cacheinfo.h>
@@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-/* All of these masks are initialized in setup_cpu_local_masks() */
-static cpumask_var_t cpu_initialized_mask;
-static cpumask_var_t cpu_callout_mask;
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
@@ -169,8 +167,8 @@ static void smp_callin(void)
        int cpuid = smp_processor_id();
 
        /*
-        * If waken up by an INIT in an 82489DX configuration
-        * cpu_callout_mask guarantees we don't get here before an
+        * If waken up by an INIT in an 82489DX configuration the alive
+        * synchronization guarantees we don't get here before an
         * INIT_deassert IPI reaches our local APIC, so it is now safe to
         * touch our local APIC.
         *
@@ -212,17 +210,6 @@ static void smp_callin(void)
        notify_cpu_starting(cpuid);
 }
 
-static void wait_for_master_cpu(int cpu)
-{
-       /*
-        * Wait for release by control CPU before continuing with AP
-        * initialization.
-        */
-       WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
-       while (!cpumask_test_cpu(cpu, cpu_callout_mask))
-               cpu_relax();
-}
-
 /*
  * Activate a secondary processor.
  */
@@ -243,11 +230,10 @@ static void notrace start_secondary(void *unused)
        cpu_init_exception_handling();
 
        /*
-        * Sync point with wait_cpu_initialized(). Sets AP in
-        * cpu_initialized_mask and then waits for the control CPU
-        * to release it.
+        * Sync point with the hotplug core. Sets the sync state to ALIVE
+        * and waits for the control CPU to release it.
         */
-       wait_for_master_cpu(raw_smp_processor_id());
+       cpuhp_ap_sync_alive();
 
        cpu_init();
        rcu_cpu_starting(raw_smp_processor_id());
@@ -274,7 +260,6 @@ static void notrace start_secondary(void *unused)
        set_cpu_online(smp_processor_id(), true);
        lapic_online();
        unlock_vector_lock();
-       cpu_set_state_online(smp_processor_id());
        x86_platform.nmi_init();
 
        /* enable local interrupts */
@@ -725,9 +710,10 @@ static void impress_friends(void)
         * Allow the user to impress friends.
         */
        pr_debug("Before bogomips\n");
-       for_each_possible_cpu(cpu)
-               if (cpumask_test_cpu(cpu, cpu_callout_mask))
+       for_each_possible_cpu(cpu) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
                        bogosum += cpu_data(cpu).loops_per_jiffy;
+       }
        pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
                num_online_cpus(),
                bogosum/(500000/HZ),
@@ -999,6 +985,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 {
        unsigned long start_ip = real_mode_header->trampoline_start;
+       int ret;
 
 #ifdef CONFIG_X86_64
        /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1039,13 +1026,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
                }
        }
 
-       /*
-        * AP might wait on cpu_callout_mask in cpu_init() with
-        * cpu_initialized_mask set if previous attempt to online
-        * it timed-out. Clear cpu_initialized_mask so that after
-        * INIT/SIPI it could start with a clean state.
-        */
-       cpumask_clear_cpu(cpu, cpu_initialized_mask);
        smp_mb();
 
        /*
@@ -1056,47 +1036,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
         * - Use an INIT boot APIC message
         */
        if (apic->wakeup_secondary_cpu_64)
-               return apic->wakeup_secondary_cpu_64(apicid, start_ip);
+               ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
        else if (apic->wakeup_secondary_cpu)
-               return apic->wakeup_secondary_cpu(apicid, start_ip);
-
-       return wakeup_secondary_cpu_via_init(apicid, start_ip);
-}
-
-static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
-{
-       unsigned long timeout;
-
-       /*
-        * Wait up to 10s for the CPU to report in.
-        */
-       timeout = jiffies + 10*HZ;
-       while (time_before(jiffies, timeout)) {
-               if (cpumask_test_cpu(cpu, mask))
-                       return 0;
-
-               schedule();
-       }
-       return -1;
-}
-
-/*
- * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
- * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
- * to proceed.  The AP will then proceed past setting its 'callin' bit
- * and end up waiting in check_tsc_sync_target() until we reach
- * wait_cpu_online() to tend to it.
- */
-static int wait_cpu_initialized(unsigned int cpu)
-{
-       /*
-        * Wait for first sign of life from AP.
-        */
-       if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
-               return -1;
+               ret = apic->wakeup_secondary_cpu(apicid, start_ip);
+       else
+               ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
-       cpumask_set_cpu(cpu, cpu_callout_mask);
-       return 0;
+       /* If the wakeup mechanism failed, cleanup the warm reset vector */
+       if (ret)
+               arch_cpuhp_cleanup_kick_cpu(cpu);
+       return ret;
 }
 
 static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
@@ -1121,11 +1070,6 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
         */
        mtrr_save_state();
 
-       /* x86 CPUs take themselves offline, so delayed offline is OK. */
-       err = cpu_check_up_prepare(cpu);
-       if (err && err != -EBUSY)
-               return err;
-
        /* the FPU context is blank, nobody can own it */
        per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
 
@@ -1142,17 +1086,20 @@ static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
 
 int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-       int ret;
-
-       ret = native_kick_ap(cpu, tidle);
-       if (!ret)
-               ret = wait_cpu_initialized(cpu);
+       return native_kick_ap(cpu, tidle);
+}
 
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
+{
        /* Cleanup possible dangling ends... */
-       if (x86_platform.legacy.warm_reset)
+       if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
                smpboot_restore_warm_reset_vector();
+}
 
-       return ret;
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+       if (smp_ops.cleanup_dead_cpu)
+               smp_ops.cleanup_dead_cpu(cpu);
 }
 
 /**
@@ -1344,9 +1291,6 @@ void __init native_smp_prepare_boot_cpu(void)
        if (!IS_ENABLED(CONFIG_SMP))
                switch_gdt_and_percpu_base(me);
 
-       /* already set me in cpu_online_mask in boot_cpu_init() */
-       cpumask_set_cpu(me, cpu_callout_mask);
-       cpu_set_state_online(me);
        native_pv_lock_init();
 }
 
@@ -1473,8 +1417,6 @@ __init void prefill_possible_map(void)
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
-       alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-       alloc_bootmem_cpumask_var(&cpu_callout_mask);
        alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
 }
 
@@ -1536,9 +1478,6 @@ static void remove_siblinginfo(int cpu)
 static void remove_cpu_from_maps(int cpu)
 {
        set_cpu_online(cpu, false);
-       cpumask_clear_cpu(cpu, cpu_callout_mask);
-       /* was set by cpu_init() */
-       cpumask_clear_cpu(cpu, cpu_initialized_mask);
        numa_remove_cpu(cpu);
 }
 
@@ -1589,36 +1528,11 @@ int native_cpu_disable(void)
        return 0;
 }
 
-int common_cpu_die(unsigned int cpu)
-{
-       int ret = 0;
-
-       /* We don't do anything here: idle task is faking death itself. */
-
-       /* They ack this in play_dead() by setting CPU_DEAD */
-       if (cpu_wait_death(cpu, 5)) {
-               if (system_state == SYSTEM_RUNNING)
-                       pr_info("CPU %u is now offline\n", cpu);
-       } else {
-               pr_err("CPU %u didn't die...\n", cpu);
-               ret = -1;
-       }
-
-       return ret;
-}
-
-void native_cpu_die(unsigned int cpu)
-{
-       common_cpu_die(cpu);
-}
-
 void play_dead_common(void)
 {
        idle_task_exit();
 
-       /* Ack it */
-       (void)cpu_report_death();
-
+       cpuhp_ap_report_dead();
        /*
         * With physical CPU hotplug, we should halt the cpu
         */
@@ -1720,12 +1634,6 @@ int native_cpu_disable(void)
        return -ENOSYS;
 }
 
-void native_cpu_die(unsigned int cpu)
-{
-       /* We said "no" in __cpu_disable */
-       BUG();
-}
-
 void native_play_dead(void)
 {
        BUG();
index b70afdff419ca3cc9591e7d5e180b765271ed34a..0a7d2e5c6b124123c6d568d20c77a66cfd7a1177 100644 (file)
@@ -55,18 +55,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
 {
-       if (common_cpu_die(cpu) == 0) {
-               if (xen_have_vector_callback) {
-                       xen_smp_intr_free(cpu);
-                       xen_uninit_lock_cpu(cpu);
-                       xen_teardown_timer(cpu);
-               }
+       if (xen_have_vector_callback) {
+               xen_smp_intr_free(cpu);
+               xen_uninit_lock_cpu(cpu);
+               xen_teardown_timer(cpu);
        }
 }
 #else
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu, bool timeout)
 {
        BUG();
 }
@@ -77,7 +75,7 @@ void __init xen_hvm_smp_init(void)
        smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
        smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
        smp_ops.smp_cpus_done = xen_smp_cpus_done;
-       smp_ops.cpu_die = xen_hvm_cpu_die;
+       smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;
 
        if (!xen_have_vector_callback) {
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
index be40927667cf2dcecb18006a76bac9d108d88d4a..eae78946a5df4d91a63b6f53e560b2fa6a44f1d8 100644 (file)
@@ -62,6 +62,7 @@ static void cpu_bringup(void)
        int cpu;
 
        cr4_init();
+       cpuhp_ap_sync_alive();
        cpu_init();
        touch_softlockup_watchdog();
 
@@ -83,8 +84,6 @@ static void cpu_bringup(void)
 
        set_cpu_online(cpu, true);
 
-       cpu_set_state_online(cpu);  /* Implies full memory barrier. */
-
        /* We can take interrupts now: we're officially "up". */
        local_irq_enable();
 }
@@ -323,14 +322,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
 
        xen_setup_runstate_info(cpu);
 
-       /*
-        * PV VCPUs are always successfully taken down (see 'while' loop
-        * in xen_cpu_die()), so -EBUSY is an error.
-        */
-       rc = cpu_check_up_prepare(cpu);
-       if (rc)
-               return rc;
-
        /* make sure interrupts start blocked */
        per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 
@@ -364,18 +355,18 @@ static int xen_pv_cpu_disable(void)
 
 static void xen_pv_cpu_die(unsigned int cpu)
 {
-       while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
-                                 xen_vcpu_nr(cpu), NULL)) {
+       while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) {
                __set_current_state(TASK_UNINTERRUPTIBLE);
                schedule_timeout(HZ/10);
        }
+}
 
-       if (common_cpu_die(cpu) == 0) {
-               xen_smp_intr_free(cpu);
-               xen_uninit_lock_cpu(cpu);
-               xen_teardown_timer(cpu);
-               xen_pmu_finish(cpu);
-       }
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+       xen_smp_intr_free(cpu);
+       xen_uninit_lock_cpu(cpu);
+       xen_teardown_timer(cpu);
+       xen_pmu_finish(cpu);
 }
 
 static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -397,6 +388,11 @@ static void xen_pv_cpu_die(unsigned int cpu)
        BUG();
 }
 
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+       BUG();
+}
+
 static void __noreturn xen_pv_play_dead(void)
 {
        BUG();
@@ -437,6 +433,7 @@ static const struct smp_ops xen_smp_ops __initconst = {
 
        .cpu_up = xen_pv_cpu_up,
        .cpu_die = xen_pv_cpu_die,
+       .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
        .cpu_disable = xen_pv_cpu_disable,
        .play_dead = xen_pv_play_dead,