]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
cpu/hotplug: Add dynamic parallel bringup states before CPUHP_BRINGUP_CPU
authorDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 28 Jan 2021 13:25:18 +0000 (13:25 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Tue, 1 Feb 2022 12:38:54 +0000 (12:38 +0000)
If the platform registers these states, bring all CPUs to each registered
state in turn, before the final bringup to CPUHP_BRINGUP_CPU. This allows
the architecture to parallelise the slow asynchronous tasks like sending
INIT/SIPI and waiting for the AP to come to life.

There is a subtlety here: even with an empty CPUHP_BP_PARALLEL_DYN step,
this means that *all* CPUs are brought through the prepare states and to
CPUHP_BP_PREPARE_DYN before any of them are taken to CPUHP_BRINGUP_CPU
and then are allowed to run for themselves to CPUHP_ONLINE.

So any combination of prepare/start calls which depend on A-B ordering
for each CPU in turn, such as the X2APIC code which used to allocate a
cluster mask 'just in case' and store it in a global variable in the
prep stage, then potentially consume that preallocated structure from
the AP and set the global pointer to NULL to be reallocated in
CPUHP_X2APIC_PREPARE for the next CPU... would explode horribly.

We believe that X2APIC was the only such case, for x86. But this is why
it remains an architecture opt-in. For now.

Note that the new parallel stages do *not* yet bring each AP to the
CPUHP_BRINGUP_CPU state. The final loop in bringup_nonboot_cpus() is
untouched, bringing each AP in turn from the final PARALLEL_DYN state
(or all the way from CPUHP_OFFLINE) to CPUHP_BRINGUP_CPU and then
waiting for that AP to do its own processing and reach CPUHP_ONLINE
before releasing the next. Parallelising that part by bringing them all
to CPUHP_BRINGUP_CPU and then waiting for them all is an exercise for
the future.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
include/linux/cpuhotplug.h
kernel/cpu.c

index 411a428ace4d4bb0d631639fb9609bf4b1ff68fa..128b3b7c71f7f6820e621c682e85a960d0fb191c 100644 (file)
@@ -131,6 +131,8 @@ enum cpuhp_state {
        CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BP_PREPARE_DYN,
        CPUHP_BP_PREPARE_DYN_END                = CPUHP_BP_PREPARE_DYN + 20,
+       CPUHP_BP_PARALLEL_DYN,
+       CPUHP_BP_PARALLEL_DYN_END               = CPUHP_BP_PARALLEL_DYN + 4,
        CPUHP_BRINGUP_CPU,
 
        /*
index 407a2568f35ebb77424320b10287dd499c9572b9..cc6f9bb91fb25dfa0f59c464ed1e0fa4c331d6a4 100644 (file)
@@ -1469,6 +1469,24 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu)
 void bringup_nonboot_cpus(unsigned int setup_max_cpus)
 {
        unsigned int cpu;
+       int n = setup_max_cpus - num_online_cpus();
+
+       /* ∀ parallel pre-bringup state, bring N CPUs to it */
+       if (n > 0) {
+               enum cpuhp_state st = CPUHP_BP_PARALLEL_DYN;
+
+               while (st <= CPUHP_BP_PARALLEL_DYN_END &&
+                      cpuhp_hp_states[st].name) {
+                       int i = n;
+
+                       for_each_present_cpu(cpu) {
+                               cpu_up(cpu, st);
+                               if (!--i)
+                                       break;
+                       }
+                       st++;
+               }
+       }
 
        for_each_present_cpu(cpu) {
                if (num_online_cpus() >= setup_max_cpus)
@@ -1836,6 +1854,10 @@ static int cpuhp_reserve_state(enum cpuhp_state state)
                step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
                end = CPUHP_BP_PREPARE_DYN_END;
                break;
+       case CPUHP_BP_PARALLEL_DYN:
+               step = cpuhp_hp_states + CPUHP_BP_PARALLEL_DYN;
+               end = CPUHP_BP_PARALLEL_DYN_END;
+               break;
        default:
                return -EINVAL;
        }
@@ -1860,14 +1882,15 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
        /*
         * If name is NULL, then the state gets removed.
         *
-        * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
+        * CPUHP_AP_ONLINE_DYN and CPUHP_BP_P*_DYN are handed out on
         * the first allocation from these dynamic ranges, so the removal
         * would trigger a new allocation and clear the wrong (already
         * empty) state, leaving the callbacks of the to be cleared state
         * dangling, which causes wreckage on the next hotplug operation.
         */
        if (name && (state == CPUHP_AP_ONLINE_DYN ||
-                    state == CPUHP_BP_PREPARE_DYN)) {
+                    state == CPUHP_BP_PREPARE_DYN ||
+                    state == CPUHP_BP_PARALLEL_DYN)) {
                ret = cpuhp_reserve_state(state);
                if (ret < 0)
                        return ret;