rlwimi  r11,r12,0,MSR_EE;               \
        mtmsrd  r11,1
 
-#define STD_EXCEPTION_COMMON(trap, label, hdlr)                \
-       .align  7;                                      \
-       .globl label##_common;                          \
-label##_common:                                                \
-       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
-       DISABLE_INTS;                                   \
-       bl      .save_nvgprs;                           \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;             \
-       bl      hdlr;                                   \
-       b       .ret_from_except
+#define ADD_NVGPRS                             \
+       bl      .save_nvgprs
+
+#define RUNLATCH_ON                            \
+BEGIN_FTR_SECTION                              \
+       clrrdi  r3,r1,THREAD_SHIFT;             \
+       ld      r4,TI_LOCAL_FLAGS(r3);          \
+       andi.   r0,r4,_TLF_RUNLATCH;            \
+       beql    ppc64_runlatch_on_trampoline;   \
+END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
+
+#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions)    \
+       .align  7;                                              \
+       .globl label##_common;                                  \
+label##_common:                                                        \
+       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);              \
+       additions;                                              \
+       addi    r3,r1,STACK_FRAME_OVERHEAD;                     \
+       bl      hdlr;                                           \
+       b       ret
+
+#define STD_EXCEPTION_COMMON(trap, label, hdlr)                        \
+       EXCEPTION_COMMON(trap, label, hdlr, ret_from_except,    \
+                        ADD_NVGPRS;DISABLE_INTS)
 
 /*
  * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
  * in the idle task and therefore need the special idle handling
  * (finish nap and runlatch)
  */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)  \
-       .align  7;                                      \
-       .globl label##_common;                          \
-label##_common:                                                \
-       EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);      \
-       FINISH_NAP;                                     \
-       DISABLE_INTS;                                   \
-BEGIN_FTR_SECTION                                      \
-       bl      .ppc64_runlatch_on;                     \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)                    \
-       addi    r3,r1,STACK_FRAME_OVERHEAD;             \
-       bl      hdlr;                                   \
-       b       .ret_from_except_lite
+#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)            \
+       EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \
+                        FINISH_NAP;RUNLATCH_ON;DISABLE_INTS)
 
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
 
 
 #define proc_trap()    asm volatile("trap")
 
-#ifdef CONFIG_PPC64
-
-extern void ppc64_runlatch_on(void);
-extern void __ppc64_runlatch_off(void);
-
-#define ppc64_runlatch_off()                                   \
-       do {                                                    \
-               if (cpu_has_feature(CPU_FTR_CTRL) &&            \
-                   test_thread_flag(TIF_RUNLATCH))             \
-                       __ppc64_runlatch_off();                 \
-       } while (0)
+#define __get_SP()     ({unsigned long sp; \
+                       asm volatile("mr %0,1": "=r" (sp)); sp;})
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
 
-#else
-#define ppc64_runlatch_on()
-#define ppc64_runlatch_off()
-
-#endif /* CONFIG_PPC64 */
-
-#define __get_SP()     ({unsigned long sp; \
-                       asm volatile("mr %0,1": "=r" (sp)); sp;})
-
 struct pt_regs;
 
 extern void ppc_save_regs(struct pt_regs *regs);
 
 
 extern struct dentry *powerpc_debugfs_root;
 
+#ifdef CONFIG_PPC64
+
+extern void __ppc64_runlatch_on(void);
+extern void __ppc64_runlatch_off(void);
+
+/*
+ * We manually hard enable-disable, this is called
+ * in the idle loop and we don't want to mess up
+ * with soft-disable/enable & interrupt replay.
+ */
+#define ppc64_runlatch_off()                                   \
+       do {                                                    \
+               if (cpu_has_feature(CPU_FTR_CTRL) &&            \
+                   test_thread_local_flags(_TLF_RUNLATCH)) {   \
+                       unsigned long msr = mfmsr();            \
+                       __hard_irq_disable();                   \
+                       __ppc64_runlatch_off();                 \
+                       if (msr & MSR_EE)                       \
+                               __hard_irq_enable();            \
+               }                                               \
+       } while (0)
+
+#define ppc64_runlatch_on()                                    \
+       do {                                                    \
+               if (cpu_has_feature(CPU_FTR_CTRL) &&            \
+                   !test_thread_local_flags(_TLF_RUNLATCH)) {  \
+                       unsigned long msr = mfmsr();            \
+                       __hard_irq_disable();                   \
+                       __ppc64_runlatch_on();                  \
+                       if (msr & MSR_EE)                       \
+                               __hard_irq_enable();            \
+               }                                               \
+       } while (0)
+#else
+#define ppc64_runlatch_on()
+#define ppc64_runlatch_off()
+#endif /* CONFIG_PPC64 */
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
 
 #define TIF_NOERROR            12      /* Force successful syscall return */
 #define TIF_NOTIFY_RESUME      13      /* callback before returning to user */
 #define TIF_SYSCALL_TRACEPOINT 15      /* syscall tracepoint instrumentation */
-#define TIF_RUNLATCH           16      /* Is the runlatch enabled? */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
 #define TLF_SLEEPING           1       /* suspend code enabled SLEEP mode */
 #define TLF_RESTORE_SIGMASK    2       /* Restore signal mask in do_signal */
 #define TLF_LAZY_MMU           3       /* tlb_batch is active */
+#define TLF_RUNLATCH           4       /* Is the runlatch enabled? */
 
 #define _TLF_NAPPING           (1 << TLF_NAPPING)
 #define _TLF_SLEEPING          (1 << TLF_SLEEPING)
 #define _TLF_RESTORE_SIGMASK   (1 << TLF_RESTORE_SIGMASK)
 #define _TLF_LAZY_MMU          (1 << TLF_LAZY_MMU)
+#define _TLF_RUNLATCH          (1 << TLF_RUNLATCH)
 
 #ifndef __ASSEMBLY__
 #define HAVE_SET_RESTORE_SIGMASK       1
        set_bit(TIF_SIGPENDING, &ti->flags);
 }
 
+static inline bool test_thread_local_flags(unsigned int flags)
+{
+       struct thread_info *ti = current_thread_info();
+       return (ti->local_flags & flags) != 0;
+}
+
 #ifdef CONFIG_PPC64
 #define is_32bit_task()        (test_thread_flag(TIF_32BIT))
 #else
 
 system_call_entry:
        b       system_call_common
 
+ppc64_runlatch_on_trampoline:
+       b       .__ppc64_runlatch_on
+
 /*
  * Here we have detected that the kernel stack pointer is bad.
  * R9 contains the saved CR, r13 points to the paca,
 
 EXPORT_SYMBOL(dump_stack);
 
 #ifdef CONFIG_PPC64
-void ppc64_runlatch_on(void)
+/* Called with hard IRQs off */
+void __ppc64_runlatch_on(void)
 {
+       struct thread_info *ti = current_thread_info();
        unsigned long ctrl;
 
-       if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) {
-               HMT_medium();
-
-               ctrl = mfspr(SPRN_CTRLF);
-               ctrl |= CTRL_RUNLATCH;
-               mtspr(SPRN_CTRLT, ctrl);
+       ctrl = mfspr(SPRN_CTRLF);
+       ctrl |= CTRL_RUNLATCH;
+       mtspr(SPRN_CTRLT, ctrl);
 
-               set_thread_flag(TIF_RUNLATCH);
-       }
+       ti->local_flags |= TLF_RUNLATCH;
 }
 
+/* Called with hard IRQs off */
 void __ppc64_runlatch_off(void)
 {
+       struct thread_info *ti = current_thread_info();
        unsigned long ctrl;
 
-       HMT_medium();
-
-       clear_thread_flag(TIF_RUNLATCH);
+       ti->local_flags &= ~TLF_RUNLATCH;
 
        ctrl = mfspr(SPRN_CTRLF);
        ctrl &= ~CTRL_RUNLATCH;
        mtspr(SPRN_CTRLT, ctrl);
 }
-#endif
+#endif /* CONFIG_PPC64 */
 
 #if THREAD_SHIFT < PAGE_SHIFT