}
 
 /*
- * Dump stacks of all tasks running on stalled CPUs.
+ * Dump stacks of all tasks running on stalled CPUs.  First try using
+ * NMIs, but fall back to manual remote stack tracing on architectures
+ * that don't support NMI-based stack dumps.  The NMI-triggered stack
+ * traces are more accurate because they are printed by the target CPU.
  */
 static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
 {
 
        rcu_for_each_leaf_node(rsp, rnp) {
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
-               if (rnp->qsmask != 0) {
-                       for_each_leaf_node_possible_cpu(rnp, cpu)
-                               if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+               for_each_leaf_node_possible_cpu(rnp, cpu)
+                       if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
+                               if (!trigger_single_cpu_backtrace(cpu))
                                        dump_cpu_task(cpu);
-               }
                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        }
 }