void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+       void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
        void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
 
        if (npt_enabled)
                goto set;
 
-       if (kvm_read_cr0_bits(vcpu, X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-               svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
-               vcpu->fpu_active = 1;
-       }
-
        vcpu->arch.cr0 = cr0;
        cr0 |= X86_CR0_PG | X86_CR0_WP;
-       if (!vcpu->fpu_active) {
-               svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
+
+       if (!vcpu->fpu_active)
                cr0 |= X86_CR0_TS;
-       }
 set:
        /*
         * re-enable caching here because the QEMU bios
        svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
        if (!kvm_read_cr0_bits(&svm->vcpu, X86_CR0_TS))
                svm->vmcb->save.cr0 &= ~X86_CR0_TS;
+       else
+               svm->vmcb->save.cr0 |= X86_CR0_TS;
        svm->vcpu.fpu_active = 1;
 
        return 1;
 
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
+       if (npt_enabled)
+               vcpu->fpu_active = 1;
 }
 
 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
 
        svm->vmcb->save.cr3 = root;
        force_new_asid(vcpu);
-
-       if (vcpu->fpu_active) {
-               svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
-               svm->vmcb->save.cr0 |= X86_CR0_TS;
-               vcpu->fpu_active = 0;
-       }
 }
 
 static int is_disabled(void)
        return false;
 }
 
+static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (npt_enabled) {
+               /* hack: npt requires active fpu at this time */
+               vcpu->fpu_active = 1;
+               return;
+       }
+
+       svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
+       svm->vmcb->save.cr0 |= X86_CR0_TS;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
        .cache_reg = svm_cache_reg,
        .get_rflags = svm_get_rflags,
        .set_rflags = svm_set_rflags,
+       .fpu_deactivate = svm_fpu_deactivate,
 
        .tlb_flush = svm_flush_tlb,
 
 
 #define KVM_GUEST_CR0_MASK                                             \
        (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST                                \
-       (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
+       (X86_CR0_WP | X86_CR0_NE | X86_CR0_MP)
 #define KVM_VM_CR0_ALWAYS_ON                                           \
        (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
 {
        u32 eb;
 
-       eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
-       if (!vcpu->fpu_active)
-               eb |= 1u << NM_VECTOR;
+       eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR)
+               | (1u << NM_VECTOR);
        /*
         * Unconditionally intercept #DB so we can maintain dr6 without
         * reading it every exit.
                eb = ~0;
        if (enable_ept)
                eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+       if (vcpu->fpu_active)
+               eb &= ~(1u << NM_VECTOR);
        vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
-       if (!vcpu->fpu_active)
-               return;
-       vcpu->fpu_active = 0;
        vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
        update_exception_bitmap(vcpu);
 }
        else
                hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
 
-       vmx_fpu_deactivate(vcpu);
-
        if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
                enter_pmode(vcpu);
 
        if (enable_ept)
                ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
+       if (!vcpu->fpu_active)
+               hw_cr0 |= X86_CR0_TS;
+
        vmcs_writel(CR0_READ_SHADOW, cr0);
        vmcs_writel(GUEST_CR0, hw_cr0);
        vcpu->arch.cr0 = cr0;
-
-       if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
-               vmx_fpu_activate(vcpu);
 }
 
 static u64 construct_eptp(unsigned long root_hpa)
 
        vmx_flush_tlb(vcpu);
        vmcs_writel(GUEST_CR3, guest_cr3);
-       if (kvm_read_cr0_bits(vcpu, X86_CR0_PE))
-               vmx_fpu_deactivate(vcpu);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                };
                break;
        case 2: /* clts */
-               vmx_fpu_deactivate(vcpu);
                vcpu->arch.cr0 &= ~X86_CR0_TS;
                vmcs_writel(CR0_READ_SHADOW, kvm_read_cr0(vcpu));
                trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
-               vmx_fpu_activate(vcpu);
                skip_emulated_instruction(vcpu);
                return 1;
        case 1: /*mov from cr*/
        .cache_reg = vmx_cache_reg,
        .get_rflags = vmx_get_rflags,
        .set_rflags = vmx_set_rflags,
+       .fpu_deactivate = vmx_fpu_deactivate,
 
        .tlb_flush = vmx_flush_tlb,
 
 
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-       kvm_x86_ops->vcpu_put(vcpu);
        kvm_put_guest_fpu(vcpu);
+       kvm_x86_ops->vcpu_put(vcpu);
 }
 
 static int is_efer_nx(void)
                        r = 0;
                        goto out;
                }
+               if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
+                       vcpu->fpu_active = 0;
+                       kvm_x86_ops->fpu_deactivate(vcpu);
+               }
        }
 
        preempt_disable();
        kvm_fx_save(&vcpu->arch.guest_fx_image);
        kvm_fx_restore(&vcpu->arch.host_fx_image);
        ++vcpu->stat.fpu_reload;
+       set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
 }
 EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 
 
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_KVMCLOCK_UPDATE    8
 #define KVM_REQ_KICK               9
+#define KVM_REQ_DEACTIVATE_FPU    10
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID    0