#include <asm/segment.h>
 #include <asm/irqflags.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
 
        SAVE_ARGS 0,1,0
        /* no need to do an access_ok check here because rbp has been
           32bit zero extended */ 
+       ASM_STAC
 1:     movl    (%rbp),%ebp
        _ASM_EXTABLE(1b,ia32_badarg)
+       ASM_CLAC
        orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
        /* no need to do an access_ok check here because r8 has been
           32bit zero extended */ 
        /* hardware stack frame is complete now */      
+       ASM_STAC
 1:     movl    (%r8),%r9d
        _ASM_EXTABLE(1b,ia32_badarg)
+       ASM_CLAC
        orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
 END(ia32_cstar_target)
                                
 ia32_badarg:
+       ASM_CLAC
        movq $-EFAULT,%rax
        jmp ia32_sysret
        CFI_ENDPROC
 
 
        /* See comment in fxsave() below. */
 #ifdef CONFIG_AS_FXSAVEQ
-       asm volatile("1:  fxsaveq %[fx]\n\t"
-                    "2:\n"
+       asm volatile(ASM_STAC "\n"
+                    "1:  fxsaveq %[fx]\n\t"
+                    "2: " ASM_CLAC "\n"
                     ".section .fixup,\"ax\"\n"
                     "3:  movl $-1,%[err]\n"
                     "    jmp  2b\n"
                     : [err] "=r" (err), [fx] "=m" (*fx)
                     : "0" (0));
 #else
-       asm volatile("1:  rex64/fxsave (%[fx])\n\t"
-                    "2:\n"
+       asm volatile(ASM_STAC "\n"
+                    "1:  rex64/fxsave (%[fx])\n\t"
+                    "2: " ASM_CLAC "\n"
                     ".section .fixup,\"ax\"\n"
                     "3:  movl $-1,%[err]\n"
                     "    jmp  2b\n"
 
 #include <asm/asm.h>
 #include <asm/errno.h>
 #include <asm/processor.h>
+#include <asm/smap.h>
 
 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)    \
-       asm volatile("1:\t" insn "\n"                           \
-                    "2:\t.section .fixup,\"ax\"\n"             \
+       asm volatile("\t" ASM_STAC "\n"                         \
+                    "1:\t" insn "\n"                           \
+                    "2:\t" ASM_CLAC "\n"                       \
+                    "\t.section .fixup,\"ax\"\n"               \
                     "3:\tmov\t%3, %1\n"                        \
                     "\tjmp\t2b\n"                              \
                     "\t.previous\n"                            \
                     : "i" (-EFAULT), "0" (oparg), "1" (0))
 
 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)    \
-       asm volatile("1:\tmovl  %2, %0\n"                       \
+       asm volatile("\t" ASM_STAC "\n"                         \
+                    "1:\tmovl  %2, %0\n"                       \
                     "\tmovl\t%0, %3\n"                         \
                     "\t" insn "\n"                             \
                     "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"     \
                     "\tjnz\t1b\n"                              \
-                    "3:\t.section .fixup,\"ax\"\n"             \
+                    "3:\t" ASM_CLAC "\n"                       \
+                    "\t.section .fixup,\"ax\"\n"               \
                     "4:\tmov\t%5, %1\n"                        \
                     "\tjmp\t3b\n"                              \
                     "\t.previous\n"                            \
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
                return -EFAULT;
 
-       asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
-                    "2:\t.section .fixup, \"ax\"\n"
+       asm volatile("\t" ASM_STAC "\n"
+                    "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
+                    "2:\t" ASM_CLAC "\n"
+                    "\t.section .fixup, \"ax\"\n"
                     "3:\tmov     %3, %0\n"
                     "\tjmp     2b\n"
                     "\t.previous\n"
 
 
 #ifdef CONFIG_X86_SMAP
 
-static inline void clac(void)
+static __always_inline void clac(void)
 {
        /* Note: a barrier is implicit in alternative() */
        alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
-static inline void stac(void)
+static __always_inline void stac(void)
 {
        /* Note: a barrier is implicit in alternative() */
        alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 
 #include <linux/string.h>
 #include <asm/asm.h>
 #include <asm/page.h>
+#include <asm/smap.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
 
 #ifdef CONFIG_X86_32
 #define __put_user_asm_u64(x, addr, err, errret)                       \
-       asm volatile("1:        movl %%eax,0(%2)\n"                     \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        movl %%eax,0(%2)\n"                     \
                     "2:        movl %%edx,4(%2)\n"                     \
-                    "3:\n"                                             \
+                    "3: " ASM_CLAC "\n"                                \
                     ".section .fixup,\"ax\"\n"                         \
                     "4:        movl %3,%0\n"                           \
                     "  jmp 3b\n"                                       \
                     : "A" (x), "r" (addr), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex_u64(x, addr)                                 \
-       asm volatile("1:        movl %%eax,0(%1)\n"                     \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        movl %%eax,0(%1)\n"                     \
                     "2:        movl %%edx,4(%1)\n"                     \
-                    "3:\n"                                             \
+                    "3: " ASM_CLAC "\n"                                \
                     _ASM_EXTABLE_EX(1b, 2b)                            \
                     _ASM_EXTABLE_EX(2b, 3b)                            \
                     : : "A" (x), "r" (addr))
 } while (0)
 
 #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret)      \
-       asm volatile("1:        mov"itype" %2,%"rtype"1\n"              \
-                    "2:\n"                                             \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        mov"itype" %2,%"rtype"1\n"              \
+                    "2: " ASM_CLAC "\n"                                \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  xor"itype" %"rtype"1,%"rtype"1\n"               \
 } while (0)
 
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)                        \
-       asm volatile("1:        mov"itype" %1,%"rtype"0\n"              \
-                    "2:\n"                                             \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        mov"itype" %1,%"rtype"0\n"              \
+                    "2: " ASM_CLAC "\n"                                \
                     _ASM_EXTABLE_EX(1b, 2b)                            \
                     : ltype(x) : "m" (__m(addr)))
 
  * aliasing issues.
  */
 #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret)      \
-       asm volatile("1:        mov"itype" %"rtype"1,%2\n"              \
-                    "2:\n"                                             \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        mov"itype" %"rtype"1,%2\n"              \
+                    "2: " ASM_CLAC "\n"                                \
                     ".section .fixup,\"ax\"\n"                         \
                     "3:        mov %3,%0\n"                            \
                     "  jmp 2b\n"                                       \
                     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
 
 #define __put_user_asm_ex(x, addr, itype, rtype, ltype)                        \
-       asm volatile("1:        mov"itype" %"rtype"0,%1\n"              \
-                    "2:\n"                                             \
+       asm volatile(ASM_STAC "\n"                                      \
+                    "1:        mov"itype" %"rtype"0,%1\n"              \
+                    "2: " ASM_CLAC "\n"                                \
                     _ASM_EXTABLE_EX(1b, 2b)                            \
                     : : ltype(x), "m" (__m(addr)))
 
 
        if (unlikely(err))
                return -EFAULT;
 
-       __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
-                            "2:\n"
+       __asm__ __volatile__(ASM_STAC "\n"
+                            "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
+                            "2: " ASM_CLAC "\n"
                             ".section .fixup,\"ax\"\n"
                             "3:  movl $-1,%[err]\n"
                             "    jmp  2b\n"
        u32 lmask = mask;
        u32 hmask = mask >> 32;
 
-       __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
-                            "2:\n"
+       __asm__ __volatile__(ASM_STAC "\n"
+                            "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
+                            "2: " ASM_CLAC "\n"
                             ".section .fixup,\"ax\"\n"
                             "3:  movl $-1,%[err]\n"
                             "    jmp  2b\n"
 
 
        /* Flags to clear on syscall */
        wrmsrl(MSR_SYSCALL_MASK,
-              X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
+              X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
+              X86_EFLAGS_IOPL|X86_EFLAGS_AC);
 }
 
 unsigned long kernel_eflags;
 
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  * System call entry. Up to 6 arguments in registers are supported.
  *
  * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
+ * stack pointer.  However, it does mask the flags register for us, so
+ * CLD and CLAC are not needed.
  */
 
 /*
         */
        .p2align CONFIG_X86_L1_CACHE_SHIFT
 common_interrupt:
+       ASM_CLAC
        XCPT_FRAME
        addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
        interrupt do_IRQ
  */
 .macro apicinterrupt num sym do_sym
 ENTRY(\sym)
+       ASM_CLAC
        INTR_FRAME
        pushq_cfi $~(\num)
 .Lcommon_\sym:
  */
 .macro zeroentry sym do_sym
 ENTRY(\sym)
+       ASM_CLAC
        INTR_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
        pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
 
 .macro paranoidzeroentry sym do_sym
 ENTRY(\sym)
+       ASM_CLAC
        INTR_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
        pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
 #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
 .macro paranoidzeroentry_ist sym do_sym ist
 ENTRY(\sym)
+       ASM_CLAC
        INTR_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
        pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
 
 .macro errorentry sym do_sym
 ENTRY(\sym)
+       ASM_CLAC
        XCPT_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
        subq $ORIG_RAX-R15, %rsp
        /* error code is on the stack already */
 .macro paranoiderrorentry sym do_sym
 ENTRY(\sym)
+       ASM_CLAC
        XCPT_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
        subq $ORIG_RAX-R15, %rsp
 
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 /*
  * By placing feature2 after feature1 in altinstructions section, we logically
  */
 ENTRY(copy_user_generic_unrolled)
        CFI_STARTPROC
+       ASM_STAC
        cmpl $8,%edx
        jb 20f          /* less then 8 bytes, go to byte copy loop */
        ALIGN_DESTINATION
        decl %ecx
        jnz 21b
 23:    xor %eax,%eax
+       ASM_CLAC
        ret
 
        .section .fixup,"ax"
  */
 ENTRY(copy_user_generic_string)
        CFI_STARTPROC
+       ASM_STAC
        andl %edx,%edx
        jz 4f
        cmpl $8,%edx
 3:     rep
        movsb
 4:     xorl %eax,%eax
+       ASM_CLAC
        ret
 
        .section .fixup,"ax"
  */
 ENTRY(copy_user_enhanced_fast_string)
        CFI_STARTPROC
+       ASM_STAC
        andl %edx,%edx
        jz 2f
        movl %edx,%ecx
 1:     rep
        movsb
 2:     xorl %eax,%eax
+       ASM_CLAC
        ret
 
        .section .fixup,"ax"
 
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
        .macro ALIGN_DESTINATION
 #ifdef FIX_ALIGNMENT
  */
 ENTRY(__copy_user_nocache)
        CFI_STARTPROC
+       ASM_STAC
        cmpl $8,%edx
        jb 20f          /* less then 8 bytes, go to byte copy loop */
        ALIGN_DESTINATION
        decl %ecx
        jnz 21b
 23:    xorl %eax,%eax
+       ASM_CLAC
        sfence
        ret
 
 
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
        .text
 ENTRY(__get_user_1)
        GET_THREAD_INFO(%_ASM_DX)
        cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
        jae bad_get_user
+       ASM_STAC
 1:     movzb (%_ASM_AX),%edx
        xor %eax,%eax
+       ASM_CLAC
        ret
        CFI_ENDPROC
 ENDPROC(__get_user_1)
        GET_THREAD_INFO(%_ASM_DX)
        cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
        jae bad_get_user
+       ASM_STAC
 2:     movzwl -1(%_ASM_AX),%edx
        xor %eax,%eax
+       ASM_CLAC
        ret
        CFI_ENDPROC
 ENDPROC(__get_user_2)
        GET_THREAD_INFO(%_ASM_DX)
        cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
        jae bad_get_user
+       ASM_STAC
 3:     mov -3(%_ASM_AX),%edx
        xor %eax,%eax
+       ASM_CLAC
        ret
        CFI_ENDPROC
 ENDPROC(__get_user_4)
        GET_THREAD_INFO(%_ASM_DX)
        cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
        jae     bad_get_user
+       ASM_STAC
 4:     movq -7(%_ASM_AX),%_ASM_DX
        xor %eax,%eax
+       ASM_CLAC
        ret
        CFI_ENDPROC
 ENDPROC(__get_user_8)
        CFI_STARTPROC
        xor %edx,%edx
        mov $(-EFAULT),%_ASM_AX
+       ASM_CLAC
        ret
        CFI_ENDPROC
 END(bad_get_user)
 
 #include <asm/thread_info.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
+#include <asm/smap.h>
 
 
 /*
 
 #define ENTER  CFI_STARTPROC ; \
                GET_THREAD_INFO(%_ASM_BX)
-#define EXIT   ret ; \
+#define EXIT   ASM_CLAC ;      \
+               ret ;           \
                CFI_ENDPROC
 
 .text
        ENTER
        cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
        jae bad_put_user
+       ASM_STAC
 1:     movb %al,(%_ASM_CX)
        xor %eax,%eax
        EXIT
        sub $1,%_ASM_BX
        cmp %_ASM_BX,%_ASM_CX
        jae bad_put_user
+       ASM_STAC
 2:     movw %ax,(%_ASM_CX)
        xor %eax,%eax
        EXIT
        sub $3,%_ASM_BX
        cmp %_ASM_BX,%_ASM_CX
        jae bad_put_user
+       ASM_STAC
 3:     movl %eax,(%_ASM_CX)
        xor %eax,%eax
        EXIT
        sub $7,%_ASM_BX
        cmp %_ASM_BX,%_ASM_CX
        jae bad_put_user
+       ASM_STAC
 4:     mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
 5:     movl %edx,4(%_ASM_CX)
 
        int __d0;                                                       \
        might_fault();                                                  \
        __asm__ __volatile__(                                           \
+               ASM_STAC "\n"                                           \
                "0:     rep; stosl\n"                                   \
                "       movl %2,%0\n"                                   \
                "1:     rep; stosb\n"                                   \
-               "2:\n"                                                  \
+               "2: " ASM_CLAC "\n"                                     \
                ".section .fixup,\"ax\"\n"                              \
                "3:     lea 0(%2,%0,4),%0\n"                            \
                "       jmp 2b\n"                                       \
                return n;
        }
 #endif
+       stac();
        if (movsl_is_ok(to, from, n))
                __copy_user(to, from, n);
        else
                n = __copy_user_intel(to, from, n);
+       clac();
        return n;
 }
 EXPORT_SYMBOL(__copy_to_user_ll);
 unsigned long __copy_from_user_ll(void *to, const void __user *from,
                                        unsigned long n)
 {
+       stac();
        if (movsl_is_ok(to, from, n))
                __copy_user_zeroing(to, from, n);
        else
                n = __copy_user_zeroing_intel(to, from, n);
+       clac();
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll);
 unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
                                         unsigned long n)
 {
+       stac();
        if (movsl_is_ok(to, from, n))
                __copy_user(to, from, n);
        else
                n = __copy_user_intel((void __user *)to,
                                      (const void *)from, n);
+       clac();
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nozero);
 unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
                                        unsigned long n)
 {
+       stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
        if (n > 64 && cpu_has_xmm2)
                n = __copy_user_zeroing_intel_nocache(to, from, n);
 #else
        __copy_user_zeroing(to, from, n);
 #endif
+       clac();
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache);
 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
                                        unsigned long n)
 {
+       stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
        if (n > 64 && cpu_has_xmm2)
                n = __copy_user_intel_nocache(to, from, n);
 #else
        __copy_user(to, from, n);
 #endif
+       clac();
        return n;
 }
 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
 
        might_fault();
        /* no memory constraint because it doesn't change any memory gcc knows
           about */
+       stac();
        asm volatile(
                "       testq  %[size8],%[size8]\n"
                "       jz     4f\n"
                : [size8] "=&c"(size), [dst] "=&D" (__d0)
                : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
                  [zero] "r" (0UL), [eight] "r" (8UL));
+       clac();
        return size;
 }
 EXPORT_SYMBOL(__clear_user);
        for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
                if (__put_user_nocheck(c, to++, sizeof(char)))
                        break;
+       clac();
        return len;
 }