As pointed out by commit
  de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread")
init_idle() can and will be invoked more than once on the same idle
task. At boot time, it is invoked for the boot CPU thread by
sched_init(). Then smp_init() creates the threads for all the secondary
CPUs and invokes init_idle() on them.
As the hotplug machinery brings the secondaries to life, it will issue
calls to idle_thread_get(), which itself invokes init_idle() yet again.
In this case it's invoked twice more per secondary: at _cpu_up(), and at
bringup_cpu().
Given smp_init() already initializes the idle tasks for all *possible*
CPUs, no further initialization should be required. Now, removing
init_idle() from idle_thread_get() exposes some interesting expectations
with regards to the idle task's preempt_count: the secondary startup always
issues a preempt_disable(), requiring some reset of the preempt count to 0
between hot-unplug and hotplug, which is currently served by
idle_thread_get() -> idle_init().
Given the idle task is supposed to have preemption disabled once and never
see it re-enabled, it seems that what we actually want is to initialize its
preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove
init_idle() from idle_thread_get().
Secondary startups were patched via coccinelle:
  @begone@
  @@
  -preempt_disable();
  ...
  cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.com
 
        DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
              cpuid, current, current->active_mm));
 
-       preempt_disable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 
        pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
        local_irq_enable();
-       preempt_disable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 
 #endif
        pr_debug("CPU%u: Booted secondary processor\n", cpu);
 
-       preempt_disable();
        trace_hardirqs_off();
 
        /*
 
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-       task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+       task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
 
                init_gic_priority_masking();
 
        rcu_cpu_starting(cpu);
-       preempt_disable();
        trace_hardirqs_off();
 
        /*
 
        pr_info("CPU%u Online: %s...\n", cpu, __func__);
 
        local_irq_enable();
-       preempt_disable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 
 #endif
        efi_map_pal_code();
        cpu_init();
-       preempt_disable();
        smp_callin();
 
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
         */
 
        calibrate_delay();
-       preempt_disable();
        cpu = smp_processor_id();
        cpu_data[cpu].udelay_val = loops_per_jiffy;
 
 
        set_cpu_online(cpu, true);
 
        local_irq_enable();
-
-       preempt_disable();
        /*
         * OK, it's off to the idle thread for us
         */
 
 #endif
 
        smp_cpu_init(slave_id);
-       preempt_disable();
 
        flush_cache_all_local(); /* start with known state */
        flush_tlb_all_local(NULL);
 
        smp_store_cpu_info(cpu);
        set_dec(tb_ticks_per_jiffy);
        rcu_cpu_starting(cpu);
-       preempt_disable();
        cpu_callin_map[cpu] = 1;
 
        if (smp_ops->setup_cpu)
 
         * Disable preemption before enabling interrupts, so we don't try to
         * schedule a CPU that hasn't actually started yet.
         */
-       preempt_disable();
        local_irq_enable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
 #define init_task_preempt_count(p)     do { } while (0)
 
 #define init_idle_preempt_count(p, cpu)        do { \
-       S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+       S390_lowcore.preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
 #define init_task_preempt_count(p)     do { } while (0)
 
 #define init_idle_preempt_count(p, cpu)        do { \
-       S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+       S390_lowcore.preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
 
        restore_access_regs(S390_lowcore.access_regs_save_area);
        cpu_init();
        rcu_cpu_starting(cpu);
-       preempt_disable();
        init_cpu_timer();
        vtime_init();
        vdso_getcpu_init();
 
 
        per_cpu_trap_init();
 
-       preempt_disable();
-
        notify_cpu_starting(cpu);
 
        local_irq_enable();
 
         */
        arch_cpu_pre_starting(arg);
 
-       preempt_disable();
        cpu = smp_processor_id();
 
        notify_cpu_starting(cpu);
 
 
        set_cpu_online(cpuid, true);
 
-       /* idle thread is expected to have preempt disabled */
-       preempt_disable();
-
        local_irq_enable();
 
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 
 #define init_task_preempt_count(p) do { } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-       per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+       per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
 } while (0)
 
 /*
 
        cpu_init();
        rcu_cpu_starting(raw_smp_processor_id());
        x86_cpuinit.early_percpu_clock_init();
-       preempt_disable();
        smp_callin();
 
        enable_start_cpu0 = 0;
 
        cpumask_set_cpu(cpu, mm_cpumask(mm));
        enter_lazy_tlb(mm, current);
 
-       preempt_disable();
        trace_hardirqs_off();
 
        calibrate_delay();
 
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-       task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+       task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static __always_inline void set_preempt_need_resched(void)
 
         * time - but meanwhile we still have a functioning scheduler.
         */
        sched_init();
-       /*
-        * Disable preemption - early bootup scheduling is extremely
-        * fragile until we cpu_idle() for the first time.
-        */
-       preempt_disable();
+
        if (WARN(!irqs_disabled(),
                 "Interrupts were enabled *very* early, fixing it\n"))
                local_irq_disable();
 
        }
 }
 
-struct task_struct *fork_idle(int cpu)
+struct task_struct * __init fork_idle(int cpu)
 {
        struct task_struct *task;
        struct kernel_clone_args args = {
 
  * NOTE: this function does not set the idle thread's NEED_RESCHED
  * flag, to make booting more robust.
  */
-void init_idle(struct task_struct *idle, int cpu)
+void __init init_idle(struct task_struct *idle, int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
 
 
        if (!tsk)
                return ERR_PTR(-ENOMEM);
-       init_idle(tsk, cpu);
        return tsk;
 }