panic, x86: Allow CPUs to save registers even if looping in NMI context

author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>

Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)

committer Chuck Anderson <chuck.anderson@oracle.com>

Mon, 27 Feb 2017 19:26:17 +0000 (11:26 -0800)
author Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)
committer Chuck Anderson <chuck.anderson@oracle.com>
Mon, 27 Feb 2017 19:26:17 +0000 (11:26 -0800)
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index 1bceb35140ef2d6deb3de37aec2841d23f07c781..26cf62a1345bfc968ca2939410dae87fddc736fc 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
  #endif
  
         if (panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  
@@ -256,7 +256,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
         show_regs(regs);
  
         if (panic_on_io_nmi) {
-               nmi_panic("NMI IOCK error: Not continuing");
+               nmi_panic(regs, "NMI IOCK error: Not continuing");
  
                 /*
                  * If we end up here, it means we have received an NMI while
@@ -305,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
  
         pr_emerg("Do you have a strange power saving mode enabled?\n");
         if (unknown_nmi_panic || panic_on_unrecovered_nmi)
-               nmi_panic("NMI: Not continuing");
+               nmi_panic(regs, "NMI: Not continuing");
  
         pr_emerg("Dazed and confused, but trying to continue\n");
  }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c

index 86db4bcd7ce52bcb74a5bf42efcd8e7152488cf1..30e22a4782cc162af2627ac08375f10848bb2700 100644 (file)
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -718,6 +718,7 @@ static int crashing_cpu;
  static nmi_shootdown_cb shootdown_callback;
  
  static atomic_t waiting_for_crash_ipi;
+static int crash_ipi_issued;
  
  static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
  {
@@ -780,6 +781,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  
         smp_send_nmi_allbutself();
  
+       /* Kick CPUs looping in NMI context. */
+       WRITE_ONCE(crash_ipi_issued, 1);
+
         msecs = 1000; /* Wait at most a second for the other cpus to stop */
         while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) {
                 mdelay(1);
@@ -788,6 +792,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  
         /* Leave the nmi callback set */
  }
+
+/* Override the weak function in kernel/panic.c */
+void nmi_panic_self_stop(struct pt_regs *regs)
+{
+       while (1) {
+               /*
+                * Wait for the crash dumping IPI to be issued, and then
+                * call its callback directly.
+                */
+               if (READ_ONCE(crash_ipi_issued))
+                       crash_nmi_callback(0, regs); /* Don't return */
+
+               cpu_relax();
+       }
+}
+
  #else /* !CONFIG_SMP */
  void nmi_shootdown_cpus(nmi_shootdown_cb callback)
  {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index a4bcd4427e726f351de1df163412b9fe76767436..14616f6c18570df72fcf06d82c4e575ff39036df 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -254,6 +254,7 @@ extern long (*panic_blink)(int state);
  __printf(1, 2)
  void panic(const char *fmt, ...)
         __noreturn __cold;
+void nmi_panic_self_stop(struct pt_regs *);
  extern void oops_enter(void);
  extern void oops_exit(void);
  void print_oops_end_marker(void);
@@ -448,14 +449,21 @@ extern atomic_t panic_cpu;
  
  /*
   * A variant of panic() called from NMI context. We return if we've already
- * panicked on this CPU.
+ * panicked on this CPU. If another CPU already panicked, loop in
+ * nmi_panic_self_stop() which can provide architecture dependent code such
+ * as saving register state for crash dump.
   */
-#define nmi_panic(fmt, ...)                                            \
+#define nmi_panic(regs, fmt, ...)                                      \
  do {                                                                   \
-       int cpu = raw_smp_processor_id();                               \
+       int old_cpu, cpu;                                               \
                                                                         \
-       if (atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu) != cpu)  \
+       cpu = raw_smp_processor_id();                                   \
+       old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);   \
+                                                                       \
+       if (old_cpu == PANIC_CPU_INVALID)                               \
                 panic(fmt, ##__VA_ARGS__);                              \
+       else if (old_cpu != cpu)                                        \
+               nmi_panic_self_stop(regs);                              \
  } while (0)
  
  /*
diff --git a/kernel/panic.c b/kernel/panic.c

index 1122115d21920685e24d41fbf8dc77027f058830..e7ed1a878872cc9dc60b80858f3ee0940dbd9ab4 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -60,6 +60,15 @@ void __weak panic_smp_self_stop(void)
                 cpu_relax();
  }
  
+/*
+ * Stop ourselves in NMI context if another CPU has already panicked. Arch code
+ * may override this to prepare for crash dumping, e.g. save regs info.
+ */
+void __weak nmi_panic_self_stop(struct pt_regs *regs)
+{
+       panic_smp_self_stop();
+}
+
  atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
  
  /**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c

index 5c329004116b531dbe15a460e8a0e2105fb05797..5b2702a6c1b541d81a4c96af72a0a199c1794702 100644 (file)
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -324,7 +324,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
                         trigger_allbutself_cpu_backtrace();
  
                 if (hardlockup_panic)
-                       nmi_panic("Hard LOCKUP");
+                       nmi_panic(regs, "Hard LOCKUP");
  
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
author	Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
	Mon, 14 Dec 2015 10:19:10 +0000 (11:19 +0100)
committer	Chuck Anderson <chuck.anderson@oracle.com>
	Mon, 27 Feb 2017 19:26:17 +0000 (11:26 -0800)
arch/x86/kernel/nmi.c		patch \| blob \| history
arch/x86/kernel/reboot.c		patch \| blob \| history
include/linux/kernel.h		patch \| blob \| history
kernel/panic.c		patch \| blob \| history
kernel/watchdog.c		patch \| blob \| history