We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.
Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.
(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)
Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org
 void arch_cpu_idle(void)
 {
        wtint(0);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_dead(void)
 
                arm_pm_idle();
        else
                cpu_do_idle();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_prepare(void)
 
         * tricks
         */
        cpu_do_idle();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 
 #ifdef CONFIG_CPU_PM_STOP
        asm volatile("stop\n");
 #endif
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 #endif
 
  */
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        __asm__("sleep");
 }
 
 
 {
        __vmwait();
        /*  interrupts wake us up, but irqs are still disabled */
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
 
        if (mark_idle)
                (*mark_idle)(1);
 
-       safe_halt();
+       raw_safe_halt();
 
        if (mark_idle)
                (*mark_idle)(0);
 
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 {
        unsigned long cfg = read_c0_conf();
        write_c0_conf(cfg | R30XX_CONF_HALT);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 static void __cpuidle r39xx_wait(void)
 {
        if (!need_resched())
                write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void __cpuidle r4k_wait(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        __r4k_wait();
 }
 
                "       .set    arch=r4000      \n"
                "       wait                    \n"
                "       .set    pop             \n");
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
                "       wait                                            \n"
                "       mtc0    $1, $12         # stalls until W stage  \n"
                "       .set    pop                                     \n");
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
        if (cpu_wait)
                cpu_wait();
        else
-               local_irq_enable();
+               raw_local_irq_enable();
 }
 
 #ifdef CONFIG_CPU_IDLE
 
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
 
  */
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
        if (mfspr(SPR_UPR) & SPR_UPR_PMP)
                mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
 }
 
 
 void __cpuidle arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 
        /* nop on real hardware, qemu will idle sleep. */
        asm volatile("or %%r10,%%r10,%%r10\n":::);
 
                 * interrupts enabled, some don't.
                 */
                if (irqs_disabled())
-                       local_irq_enable();
+                       raw_local_irq_enable();
        } else {
-               local_irq_enable();
+               raw_local_irq_enable();
                /*
                 * Go into low thread priority and possibly
                 * low power mode.
 
 void arch_cpu_idle(void)
 {
        wait_for_interrupt();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void show_regs(struct pt_regs *regs)
 
                PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
        clear_cpu_flag(CIF_NOHZ_DELAY);
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
        /* Call the assembler magic in entry.S */
        psw_idle(idle, psw_mask);
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 
        /* Account time spent with enabled wait psw loaded as idle time. */
        raw_write_seqcount_begin(&idle->seqcount);
 void arch_cpu_idle(void)
 {
        enabled_wait();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
 
 void default_idle(void)
 {
        set_bl_bit();
-       local_irq_enable();
+       raw_local_irq_enable();
        /* Isn't this racy ? */
        cpu_sleep();
        clear_bl_bit();
 
        register unsigned int address = (unsigned int)leon3_irqctrl_regs;
 
        /* Interrupts need to be enabled to not hang the CPU */
-       local_irq_enable();
+       raw_local_irq_enable();
 
        __asm__ __volatile__ (
                "wr     %%g0, %%asr19\n"
 static void pmc_leon_idle(void)
 {
        /* Interrupts need to be enabled to not hang the CPU */
-       local_irq_enable();
+       raw_local_irq_enable();
 
        /* For systems without power-down, this will be no-op */
        __asm__ __volatile__ ("wr       %g0, %asr19\n\t");
 
 {
        if (sparc_idle)
                (*sparc_idle)();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
 
 {
        if (tlb_type != hypervisor) {
                touch_nmi_watchdog();
-               local_irq_enable();
+               raw_local_irq_enable();
        } else {
                unsigned long pstate;
 
-               local_irq_enable();
+               raw_local_irq_enable();
 
                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
                  * the cpu sleep hypervisor call.
 
 {
        cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
        um_idle_sleep();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 int __cant_sleep(void) {
 
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
-       trace_hardirqs_on();
-
        mds_idle_clear_cpu_buffers();
        /* "mwait %eax, %ecx;" */
        asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
 
  */
 void __cpuidle default_idle(void)
 {
-       safe_halt();
+       raw_safe_halt();
 }
 #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
 /*
  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
  * states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
  */
 static void amd_e400_idle(void)
 {
         * The switch back from broadcast mode needs to be called with
         * interrupts disabled.
         */
-       local_irq_disable();
+       raw_local_irq_disable();
        tick_broadcast_exit();
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /*
                if (!need_resched())
                        __sti_mwait(0, 0);
                else
-                       local_irq_enable();
+                       raw_local_irq_enable();
        } else {
-               local_irq_enable();
+               raw_local_irq_enable();
        }
        __current_clr_polling();
 }
 
 void __weak arch_cpu_idle(void)
 {
        cpu_idle_force_poll = 1;
-       local_irq_enable();
+       raw_local_irq_enable();
 }
 
 /**
 
                trace_cpu_idle(1, smp_processor_id());
                stop_critical_timings();
+
+               /*
+                * arch_cpu_idle() is supposed to enable IRQs, however
+                * we can't do that because of RCU and tracing.
+                *
+                * Trace IRQs enable here, then switch off RCU, and have
+                * arch_cpu_idle() use raw_local_irq_enable(). Note that
+                * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+                * last -- this is very similar to the entry code.
+                */
+               trace_hardirqs_on_prepare();
+               lockdep_hardirqs_on_prepare(_THIS_IP_);
                rcu_idle_enter();
+               lockdep_hardirqs_on(_THIS_IP_);
+
                arch_cpu_idle();
+
+               /*
+                * OK, so IRQs are enabled here, but RCU needs them disabled to
+                * turn itself back on.. funny thing is that disabling IRQs
+                * will cause tracing, which needs RCU. Jump through hoops to
+                * make it 'work'.
+                */
+               raw_local_irq_disable();
+               lockdep_hardirqs_off(_THIS_IP_);
                rcu_idle_exit();
+               lockdep_hardirqs_on(_THIS_IP_);
+               raw_local_irq_enable();
+
                start_critical_timings();
                trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
        }