]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
x86/smpboot: Split up native_cpu_up into separate phases and document them
authorDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 23 Feb 2023 19:11:33 +0000 (19:11 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Sat, 25 Feb 2023 07:58:48 +0000 (07:58 +0000)
There are four logical parts to what native_cpu_up() does on the BSP (or
on the controlling CPU for a later hotplug):

 1) Wake the AP by sending the INIT/SIPI/SIPI sequence.

 2) Wait for the AP to make it as far as wait_for_master_cpu() which
    sets that CPU's bit in cpu_initialized_mask, then sets the bit in
    cpu_callout_mask to let the AP proceed through cpu_init().

 3) Wait for the AP to finish cpu_init() and get as far as the
    smp_callin() call, which sets that CPU's bit in cpu_callin_mask.

 4) Perform the TSC synchronization and wait for the AP to actually
    mark itself online in cpu_online_mask.

In preparation to allow these phases to operate in parallel on multiple
APs, split them out into separate functions and document the interactions
a little more clearly in both the BSP and AP code paths.

No functional change intended.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Kim Phillips <kim.phillips@amd.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
arch/x86/kernel/smpboot.c

index 3a793772a2aa9acdb750669bcd1efa96aa687ce3..b18c1385e181b85e302569523bda20a6962aa9d5 100644 (file)
@@ -204,6 +204,10 @@ static void smp_callin(void)
 
        wmb();
 
+       /*
+        * This runs the AP through all the cpuhp states to its target
+        * state (CPUHP_ONLINE in the case of serial bringup).
+        */
        notify_cpu_starting(cpuid);
 
        /*
@@ -231,17 +235,32 @@ static void notrace start_secondary(void *unused)
        load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 #endif
+       /*
+        * Sync point with do_wait_cpu_initialized(). Before proceeding through
+        * cpu_init(), the AP will call wait_for_master_cpu() which sets its
+        * own bit in cpu_initialized_mask and then waits for the BSP to set
+        * its bit in cpu_callout_mask to release it.
+        */
        cpu_init_secondary();
        rcu_cpu_starting(raw_smp_processor_id());
        x86_cpuinit.early_percpu_clock_init();
+
+       /*
+        * Sync point with do_wait_cpu_callin(). The AP doesn't wait here
+        * but just sets the bit to let the controlling CPU (BSP) know that
+        * it's got this far.
+        */
        smp_callin();
 
        enable_start_cpu0 = 0;
 
        /* otherwise gcc will move up smp_processor_id before the cpu_init */
        barrier();
+
        /*
-        * Check TSC synchronization with the boot CPU:
+        * Check TSC synchronization with the boot CPU (or whichever CPU
+        * is controlling the bringup). It will do its part of this from
+        * do_wait_cpu_online(), making it an implicit sync point.
         */
        check_tsc_sync_target();
 
@@ -254,6 +273,7 @@ static void notrace start_secondary(void *unused)
         * half valid vector space.
         */
        lock_vector_lock();
+       /* Sync point with do_wait_cpu_online() */
        set_cpu_online(smp_processor_id(), true);
        lapic_online();
        unlock_vector_lock();
@@ -1083,7 +1103,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        unsigned long start_ip = real_mode_header->trampoline_start;
 
        unsigned long boot_error = 0;
-       unsigned long timeout;
 
 #ifdef CONFIG_X86_64
        /* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1144,55 +1163,94 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
                boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
                                                     cpu0_nmi_registered);
 
-       if (!boot_error) {
-               /*
-                * Wait 10s total for first sign of life from AP
-                */
-               boot_error = -1;
-               timeout = jiffies + 10*HZ;
-               while (time_before(jiffies, timeout)) {
-                       if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
-                               /*
-                                * Tell AP to proceed with initialization
-                                */
-                               cpumask_set_cpu(cpu, cpu_callout_mask);
-                               boot_error = 0;
-                               break;
-                       }
-                       schedule();
-               }
-       }
+       return boot_error;
+}
 
-       if (!boot_error) {
-               /*
-                * Wait till AP completes initial initialization
-                */
-               while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
-                       /*
-                        * Allow other tasks to run while we wait for the
-                        * AP to come online. This also gives a chance
-                        * for the MTRR work(triggered by the AP coming online)
-                        * to be completed in the stop machine context.
-                        */
-                       schedule();
-               }
+static int do_wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
+{
+       unsigned long timeout;
+
+       /*
+        * Wait up to 10s for the CPU to report in.
+        */
+       timeout = jiffies + 10*HZ;
+       while (time_before(jiffies, timeout)) {
+               if (cpumask_test_cpu(cpu, mask))
+                       return 0;
+
+               schedule();
        }
+       return -1;
+}
 
-       if (x86_platform.legacy.warm_reset) {
-               /*
-                * Cleanup possible dangling ends...
-                */
-               smpboot_restore_warm_reset_vector();
+/*
+ * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
+ * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
+ * to proceed.  The AP will then proceed past setting its 'callin' bit
+ * and end up waiting in check_tsc_sync_target() until we reach
+ * do_wait_cpu_online() to tend to it.
+ */
+static int do_wait_cpu_initialized(unsigned int cpu)
+{
+       /*
+        * Wait for first sign of life from AP.
+        */
+       if (do_wait_cpu_cpumask(cpu, cpu_initialized_mask))
+               return -1;
+
+       cpumask_set_cpu(cpu, cpu_callout_mask);
+       return 0;
+}
+
+/*
+ * Bringup step three: Wait for the target AP to reach smp_callin().
+ * The AP is not waiting for us here so we don't need to parallelise
+ * this step. Not entirely clear why we care about this, since we just
+ * proceed directly to TSC synchronization which is the next sync
+ * point with the AP anyway.
+ */
+static int do_wait_cpu_callin(unsigned int cpu)
+{
+       /*
+        * Wait till AP completes initial initialization.
+        */
+       return do_wait_cpu_cpumask(cpu, cpu_callin_mask);
+}
+
+/*
+ * Bringup step four: Synchronize the TSC and wait for the target AP
+ * to reach set_cpu_online() in start_secondary().
+ */
+static int do_wait_cpu_online(unsigned int cpu)
+{
+       unsigned long flags;
+
+       /*
+        * Check TSC synchronization with the AP (keep irqs disabled
+        * while doing so):
+        */
+       local_irq_save(flags);
+       check_tsc_sync_source(cpu);
+       local_irq_restore(flags);
+
+       /*
+        * Wait for the AP to mark itself online. Not entirely
+        * clear why we care, since the generic cpuhp code will
+        * wait for it to each CPUHP_AP_ONLINE_IDLE before going
+        * ahead with the rest of the bringup anyway.
+        */
+       while (!cpu_online(cpu)) {
+               cpu_relax();
+               touch_nmi_watchdog();
        }
 
-       return boot_error;
+       return 0;
 }
 
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+static int do_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
        int apicid = apic->cpu_present_to_apicid(cpu);
        int cpu0_nmi_registered = 0;
-       unsigned long flags;
        int err, ret = 0;
 
        lockdep_assert_irqs_enabled();
@@ -1239,19 +1297,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
                goto unreg_nmi;
        }
 
-       /*
-        * Check TSC synchronization with the AP (keep irqs disabled
-        * while doing so):
-        */
-       local_irq_save(flags);
-       check_tsc_sync_source(cpu);
-       local_irq_restore(flags);
-
-       while (!cpu_online(cpu)) {
-               cpu_relax();
-               touch_nmi_watchdog();
-       }
-
 unreg_nmi:
        /*
         * Clean up the nmi handler. Do this after the callin and callout sync
@@ -1263,6 +1308,34 @@ unreg_nmi:
        return ret;
 }
 
+int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+       int ret;
+
+       ret = do_cpu_up(cpu, tidle);
+       if (ret)
+               return ret;
+
+       ret = do_wait_cpu_initialized(cpu);
+       if (ret)
+               return ret;
+
+       ret = do_wait_cpu_callin(cpu);
+       if (ret)
+               return ret;
+
+       ret = do_wait_cpu_online(cpu);
+
+       if (x86_platform.legacy.warm_reset) {
+               /*
+                * Cleanup possible dangling ends...
+                */
+               smpboot_restore_warm_reset_vector();
+       }
+
+       return ret;
+}
+
 /**
  * arch_disable_smp_support() - disables SMP support for x86 at runtime
  */