]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
panic, x86: Allow CPUs to save registers even if looping in NMI context
authorHidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)
committerChuck Anderson <chuck.anderson@oracle.com>
Mon, 27 Feb 2017 19:26:17 +0000 (11:26 -0800)
Orabug: 25505543

Currently, kdump_nmi_shootdown_cpus(), a subroutine of crash_kexec(),
sends an NMI IPI to CPUs which haven't called panic() to stop them,
save their register information and do some cleanups for crash dumping.
However, if such a CPU is infinitely looping in NMI context, we fail to
save its register information into the crash dump.

For example, this can happen when unknown NMIs are broadcast to all
CPUs as follows:

  CPU 0                             CPU 1
  ===========================       ==========================
  receive an unknown NMI
  unknown_nmi_error()
    panic()                         receive an unknown NMI
      spin_trylock(&panic_lock)     unknown_nmi_error()
      crash_kexec()                   panic()
                                        spin_trylock(&panic_lock)
                                        panic_smp_self_stop()
                                          infinite loop
        kdump_nmi_shootdown_cpus()
          issue NMI IPI -----------> blocked until IRET
                                          infinite loop...

Here, since CPU 1 is in NMI context, the second NMI from CPU 0 is
blocked until CPU 1 executes IRET. However, CPU 1 never executes IRET,
so the NMI is not handled and the callback function to save registers is
never called.

In practice, this can happen on some servers which broadcast NMIs to all
CPUs when the NMI button is pushed.

To save registers in this case, we need to:

  a) Return from NMI handler instead of looping infinitely
  or
  b) Call the callback function directly from the infinite loop

Inherently, a) is risky because NMI is also used to prevent corrupted
data from being propagated to devices.  So, we chose b).

This patch does the following:

1. Move the infinite looping of CPUs which haven't called panic() in NMI
   context (actually done by panic_smp_self_stop()) outside of panic() to
   enable us to refer pt_regs. Please note that panic_smp_self_stop() is
   still used for normal context.

2. Call a callback of kdump_nmi_shootdown_cpus() directly to save
   registers and do some cleanups after setting waiting_for_crash_ipi which
   is used for counting down the number of CPUs which handled the callback

Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <dahi@linux.vnet.ibm.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Gobinda Charan Maji <gobinda.cemk07@gmail.com>
Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Javi Merino <javi.merino@arm.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: kexec@lists.infradead.org
Cc: linux-doc@vger.kernel.org
Cc: lkml <linux-kernel@vger.kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Stefan Lippers-Hollmann <s.l-h@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Link: http://lkml.kernel.org/r/20151210014628.25437.75256.stgit@softrs
[ Cleanup comments, fixup formatting. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
(cherry picked from commit 58c5661f2144c089bbc2e5d87c9ec1dc1d2964fe)
Signed-off-by: Somasundaram Krishnasamy <somasundaram.krishnasamy@oracle.com>
Reviewed-by: Jack Vogel <jack.vogel@oracle.com>
arch/x86/kernel/nmi.c
arch/x86/kernel/reboot.c
include/linux/kernel.h
kernel/panic.c
kernel/watchdog.c

index 1bceb35140ef2d6deb3de37aec2841d23f07c781..26cf62a1345bfc968ca2939410dae87fddc736fc 100644 (file)
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
 #endif
 
        if (panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
 
        pr_emerg("Dazed and confused, but trying to continue\n");
 
@@ -256,7 +256,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
        show_regs(regs);
 
        if (panic_on_io_nmi) {
-               nmi_panic("NMI IOCK error: Not continuing");
+               nmi_panic(regs, "NMI IOCK error: Not continuing");
 
                /*
                 * If we end up here, it means we have received an NMI while
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 
        pr_emerg("Do you have a strange power saving mode enabled?\n");
        if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
 
        pr_emerg("Dazed and confused, but trying to continue\n");
 }
index 86db4bcd7ce52bcb74a5bf42efcd8e7152488cf1..30e22a4782cc162af2627ac08375f10848bb2700 100644 (file)
@@ -718,6 +718,7 @@ static int crashing_cpu;
 static nmi_shootdown_cb shootdown_callback;
 
 static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
 
 static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 {
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
        smp_send_nmi_allbutself();
 
+       /* Kick CPUs looping in NMI context. */
+       WRITE_ONCE(crash_ipi_issued, 1);
+
        msecs = 1000; /* Wait at most a second for the other cpus to stop */
        while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
                mdelay(1);
@@ -788,6 +792,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
        /* Leave the nmi callback set */
 }
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+       while (1) {
+               /*
+                * Wait for the crash dumping IPI to be issued, and then
+                * call its callback directly.
+                */
+               if (READ_ONCE(crash_ipi_issued))
+                       crash_nmi_callback(0, regs); /* Don't return */
+
+               cpu_relax();
+       }
+}
+
 #else /* !CONFIG_SMP */
 void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 {
index a4bcd4427e726f351de1df163412b9fe76767436..14616f6c18570df72fcf06d82c4e575ff39036df 100644 (file)
@@ -254,6 +254,7 @@ extern long (*panic_blink)(int state);
 __printf(1, 2)
 void panic(const char *fmt, ...)
        __noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
@@ -448,14 +449,21 @@ extern atomic_t panic_cpu;
 
 /*
  * A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU.
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
  */
-#define nmi_panic(fmt, ...)                                            \
+#define nmi_panic(regs, fmt, ...)                                      \
 do {                                                                   \
-       int cpu = raw_smp_processor_id();                               \
+       int old_cpu, cpu;                                               \
                                                                        \
-       if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu)  \
+       cpu = raw_smp_processor_id();                                   \
+       old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);   \
+                                                                       \
+       if (old_cpu == PANIC_CPU_INVALID)                               \
                panic(fmt, ##__VA_ARGS__);                              \
+       else if (old_cpu != cpu)                                        \
+               nmi_panic_self_stop(regs);                              \
 } while (0)
 
 /*
index 1122115d21920685e24d41fbf8dc77027f058830..e7ed1a878872cc9dc60b80858f3ee0940dbd9ab4 100644 (file)
@@ -60,6 +60,15 @@ void __weak panic_smp_self_stop(void)
                cpu_relax();
 }
 
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+       panic_smp_self_stop();
+}
+
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
 /**
index 5c329004116b531dbe15a460e8a0e2105fb05797..5b2702a6c1b541d81a4c96af72a0a199c1794702 100644 (file)
@@ -324,7 +324,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
                        trigger_allbutself_cpu_backtrace();
 
                if (hardlockup_panic)
-                       nmi_panic("Hard LOCKUP");
+                       nmi_panic(regs, "Hard LOCKUP");
 
                __this_cpu_write(hard_watchdog_warn, true);
                return;