#include <linux/cpumask.h>
 #include <asm/frame.h>
 
-static inline void load_sp0(struct tss_struct *tss,
-                            struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
 {
-       PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
+       PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
 }
 
 /* The paravirtualized CPUID instruction. */
 
        void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
        void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
 
-       void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+       void (*load_sp0)(unsigned long sp0);
 
        void (*set_iopl_mask)(unsigned mask);
 
 
 }
 
 static inline void
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+native_load_sp0(unsigned long sp0)
 {
-       tss->x86_tss.sp0 = thread->sp0;
+       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
 #else
 #define __cpuid                        native_cpuid
 
-static inline void load_sp0(struct tss_struct *tss,
-                           struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
 {
-       native_load_sp0(tss, thread);
+       native_load_sp0(sp0);
 }
 
 #define set_iopl_mask native_set_iopl_mask
 
        initialize_tlbstate_and_flush();
        enter_lazy_tlb(&init_mm, me);
 
-       load_sp0(t, ¤t->thread);
+       load_sp0(current->thread.sp0);
        set_tss_desc(cpu, t);
        load_TR_desc();
        load_mm_ldt(&init_mm);
        initialize_tlbstate_and_flush();
        enter_lazy_tlb(&init_mm, curr);
 
-       load_sp0(t, thread);
+       load_sp0(thread->sp0);
        set_tss_desc(cpu, t);
        load_TR_desc();
        load_mm_ldt(&init_mm);
 
         * current_thread_info().  Refresh the SYSENTER configuration in
         * case prev or next is vm86.
         */
-       load_sp0(tss, next);
+       load_sp0(next->sp0);
        refresh_sysenter_cs(next);
        this_cpu_write(cpu_current_top_of_stack,
                       (unsigned long)task_stack_page(next_p) +
 
        this_cpu_write(current_task, next_p);
 
        /* Reload sp0. */
-       load_sp0(tss, next);
+       load_sp0(next->sp0);
 
        /*
         * Now maybe reload the debug registers and handle I/O bitmaps
 
 
 void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 {
-       struct tss_struct *tss;
        struct task_struct *tsk = current;
        struct vm86plus_struct __user *user;
        struct vm86 *vm86 = current->thread.vm86;
                do_exit(SIGSEGV);
        }
 
-       tss = &per_cpu(cpu_tss, get_cpu());
+       preempt_disable();
        tsk->thread.sp0 = vm86->saved_sp0;
        tsk->thread.sysenter_cs = __KERNEL_CS;
-       load_sp0(tss, &tsk->thread);
+       load_sp0(tsk->thread.sp0);
        refresh_sysenter_cs(&tsk->thread);
        vm86->saved_sp0 = 0;
-       put_cpu();
+       preempt_enable();
 
        memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
 
 
 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 {
-       struct tss_struct *tss;
        struct task_struct *tsk = current;
        struct vm86 *vm86 = tsk->thread.vm86;
        struct kernel_vm86_regs vm86regs;
        vm86->saved_sp0 = tsk->thread.sp0;
        lazy_save_gs(vm86->regs32.gs);
 
-       tss = &per_cpu(cpu_tss, get_cpu());
        /* make room for real-mode segments */
+       preempt_disable();
        tsk->thread.sp0 += 16;
 
        if (static_cpu_has(X86_FEATURE_SEP)) {
                refresh_sysenter_cs(&tsk->thread);
        }
 
-       load_sp0(tss, &tsk->thread);
-       put_cpu();
+       load_sp0(tsk->thread.sp0);
+       preempt_enable();
 
        if (vm86->flags & VM86_SCREEN_BITMAP)
                mark_screen_rdonly(tsk->mm);
 
        }
 }
 
-static void xen_load_sp0(struct tss_struct *tss,
-                        struct thread_struct *thread)
+static void xen_load_sp0(unsigned long sp0)
 {
        struct multicall_space mcs;
 
        mcs = xen_mc_entry(0);
-       MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
+       MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
-       tss->x86_tss.sp0 = thread->sp0;
+       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)