*
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
     } s;
 };
 
+/* ========================================================================= */
+/*                               UVH_SCRATCH5                                */
+/* ========================================================================= */
+#define UVH_SCRATCH5 0x2d0200UL
+#define UVH_SCRATCH5_32 0x00778
+
+#define UVH_SCRATCH5_SCRATCH5_SHFT 0
+#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+union uvh_scratch5_u {
+    unsigned long      v;
+    struct uvh_scratch5_s {
+       unsigned long   scratch5 : 64;  /* RW, W1CS */
+    } s;
+};
 
 #endif /* __ASM_UV_MMRS_X86_H__ */
 
 #include <asm/smp.h>
 #include <asm/x86_init.h>
 #include <asm/emergency-restart.h>
+#include <asm/nmi.h>
+
+/* BMC sets a bit this MMR non-zero before sending an NMI */
+#define UVH_NMI_MMR                            UVH_SCRATCH5
+#define UVH_NMI_MMR_CLEAR                      (UVH_NMI_MMR + 8)
+#define UV_NMI_PENDING_MASK                    (1UL << 63)
+DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
+       unsigned long real_uv_nmi;
+       int bid;
+
        if (reason != DIE_NMIUNKNOWN)
                return NOTIFY_OK;
 
        if (in_crash_kexec)
                /* do nothing if entering the crash kernel */
                return NOTIFY_OK;
+
        /*
-        * Use a lock so only one cpu prints at a time
-        * to prevent intermixed output.
+        * Each blade has an MMR that indicates when an NMI has been sent
+        * to cpus on the blade. If an NMI is detected, atomically
+        * clear the MMR and update a per-blade NMI count used to
+        * cause each cpu on the blade to notice a new NMI.
+        */
+       bid = uv_numa_blade_id();
+       real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+
+       if (unlikely(real_uv_nmi)) {
+               spin_lock(&uv_blade_info[bid].nmi_lock);
+               real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
+               if (real_uv_nmi) {
+                       uv_blade_info[bid].nmi_count++;
+                       uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
+               }
+               spin_unlock(&uv_blade_info[bid].nmi_lock);
+       }
+
+       if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
+               return NOTIFY_DONE;
+
+       __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
+
+       /*
+        * Use a lock so only one cpu prints at a time.
+        * This prevents intermixed output.
         */
        spin_lock(&uv_nmi_lock);
-       pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
+       pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
        dump_stack();
        spin_unlock(&uv_nmi_lock);
 
 }
 
 static struct notifier_block uv_dump_stack_nmi_nb = {
-       .notifier_call  = uv_handle_nmi
+       .notifier_call  = uv_handle_nmi,
+       .priority = NMI_LOCAL_LOW_PRIOR - 1,
 };
 
 void uv_register_nmi_notifier(void)
        printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
 
        bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-       uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+       uv_blade_info = kzalloc(bytes, GFP_KERNEL);
        BUG_ON(!uv_blade_info);
+
        for (blade = 0; blade < uv_num_possible_blades(); blade++)
                uv_blade_info[blade].memory_nid = -1;
 
                        uv_blade_info[blade].pnode = pnode;
                        uv_blade_info[blade].nr_possible_cpus = 0;
                        uv_blade_info[blade].nr_online_cpus = 0;
+                       spin_lock_init(&uv_blade_info[blade].nmi_lock);
                        max_pnode = max(pnode, max_pnode);
                        blade++;
                }