void setup_irq_regs(void);
 
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
 #endif /* _ASM_TILE_IRQ_H */
 
 /* kernel/messaging.c */
 void hv_message_intr(struct pt_regs *, int intnum);
 
+#define        TILE_NMI_DUMP_STACK     1       /* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
 /* kernel/irq.c */
 void tile_dev_intr(struct pt_regs *, int intnum);
 
 
 /** hv_console_set_ipi */
 #define HV_DISPATCH_CONSOLE_SET_IPI               63
 
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI                      65
+
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          64
+#define _HV_DISPATCH_END                          66
 
 
 #ifndef __ASSEMBLER__
 #define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
 /** Device interrupt downcall interrupt vector */
 #define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL            64
+
+#define HV_NMI_FLAG_FORCE    0x1  /**< Force an NMI downcall regardless of
+               the ICS bit of the client. */
 
 #ifndef __ASSEMBLER__
 
 int hv_dev_poll_cancel(int devhdl);
 
 
+/** NMI information */
+typedef struct
+{
+  /** Result: negative error, or HV_NMI_RESULT_xxx. */
+  int result;
+
+  /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+  HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK        0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS  1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV   2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE    0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ *  This will cause the NMI to be issued on the remote tile regardless
+ *  of the state of the client interrupt mask.  However, if the remote
+ *  tile is in the hypervisor, it will not execute the NMI, and
+ *  HV_NMI_RESULT_FAIL_HV will be returned.  Similarly, if the remote
+ *  tile is in a client interrupt critical section at the time of the
+ *  NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ *  be returned.  In this second case, however, if HV_NMI_FLAG_FORCE
+ *  is set in flags, then the remote tile will enter its NMI interrupt
+ *  vector regardless.  Forcing the NMI vector during an interrupt
+ *  critical section will mean that the client can not safely continue
+ *  execution after handling the interrupt.
+ *
+ *  @param tile Tile to which the NMI request is sent.
+ *  @param info NMI information which is defined by and interpreted by the
+ *         supervisor, is passed to the specified tile, and is
+ *         stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ *         specified tile when entering the NMI handler routine.
+ *         Typically, this parameter stores the NMI type, or an aligned
+ *         VA plus some special bits, etc.
+ *  @param flags Flags (HV_NMI_FLAG_xxx).
+ *  @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
 /** Scatter-gather list for preada/pwritea calls. */
 typedef struct
 #if CHIP_VA_WIDTH() <= 32
 
 gensym hv_get_ipi_pte, 0x700, 32
 gensym hv_set_pte_super_shift, 0x720, 32
 gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
 
 #define hv_get_ipi_pte _hv_get_ipi_pte
 #define hv_set_pte_super_shift _hv_set_pte_super_shift
 #define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
 #include <hv/hypervisor.h>
 #undef hv_init
 #undef hv_install_context
 #undef hv_get_ipi_pte
 #undef hv_set_pte_super_shift
 #undef hv_console_set_ipi
+#undef hv_send_nmi
 
 /*
  * Provide macros based on <linux/syscalls.h> to provide a wrapper
         HV_VirtAddr, tlb_va, unsigned long, tlb_length,
         unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
         HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+        __hv64, flags)
 
        .ifc \c_routine, handle_perf_interrupt
        mfspr   r2, AUX_PERF_COUNT_STS
        .endif
+       .ifc \c_routine, do_nmi
+       mfspr   r2, SPR_SYSTEM_SAVE_K_2   /* nmi type */
+       .else
+       .endif
        .endif
        .endif
        .endif
 
        /* Synthetic interrupt delivered only by the simulator */
        int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+       /* Synthetic interrupt delivered by hv */
+       int_hand     INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
 
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/delay.h>
 #include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
 
        dump_stack_regs(regs);
 }
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
+
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+       int is_idle = is_idle_task(current) && !in_interrupt();
+       int cpu;
+
+       nmi_enter();
+       cpu = smp_processor_id();
+       if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+               goto done;
+
+       spin_lock(&backtrace_lock);
+       if (is_idle)
+               pr_info("CPU: %d idle\n", cpu);
+       else
+               show_regs(regs);
+       spin_unlock(&backtrace_lock);
+       atomic_dec(&backtrace_cpus);
+done:
+       nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+       struct cpumask mask;
+       HV_Coord tile;
+       unsigned int timeout;
+       int cpu;
+       int ongoing;
+       HV_NMI_Info info[NR_CPUS];
+
+       ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+       if (ongoing != 0) {
+               pr_err("Trying to do all-cpu backtrace.\n");
+               pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+                      ongoing);
+               if (self) {
+                       pr_err("Reporting the stack on this cpu only.\n");
+                       dump_stack();
+               }
+               return;
+       }
+
+       cpumask_copy(&mask, cpu_online_mask);
+       cpumask_clear_cpu(smp_processor_id(), &mask);
+       cpumask_copy(&backtrace_mask, &mask);
+
+       /* Backtrace for myself first. */
+       if (self)
+               dump_stack();
+
+       /* Tentatively dump stack on remote tiles via NMI. */
+       timeout = 100;
+       while (!cpumask_empty(&mask) && timeout) {
+               for_each_cpu(cpu, &mask) {
+                       tile.x = cpu_x(cpu);
+                       tile.y = cpu_y(cpu);
+                       info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+                       if (info[cpu].result == HV_NMI_RESULT_OK)
+                               cpumask_clear_cpu(cpu, &mask);
+               }
+
+               mdelay(10);
+               timeout--;
+       }
+
+       /* Warn about cpus stuck in ICS and decrement their counts here. */
+       if (!cpumask_empty(&mask)) {
+               for_each_cpu(cpu, &mask) {
+                       switch (info[cpu].result) {
+                       case HV_NMI_RESULT_FAIL_ICS:
+                               pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+                                       cpu, info[cpu].pc);
+                               break;
+                       case HV_NMI_RESULT_FAIL_HV:
+                               pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+                                       cpu);
+                               break;
+                       case HV_ENOSYS:
+                               pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+                               goto skip_for_each;
+                       default:  /* should not happen */
+                               pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+                                       cpu, info[cpu].result, info[cpu].pc);
+                               break;
+                       }
+               }
+skip_for_each:
+               atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+       }
+}
+#endif /* __tilegx_ */
 
        exception_exit(prev_state);
 }
 
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+       switch (reason) {
+       case TILE_NMI_DUMP_STACK:
+               do_nmi_dump_stack(regs);
+               break;
+       default:
+               panic("Unexpected do_nmi type %ld", reason);
+               return;
+       }
+}
+
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
        _dump_stack(dummy, pc, lr, sp, r52);