config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
        depends on X86 && EXPERIMENTAL
+       select PREEMPT_NOTIFIERS
        select ANON_INODES
        ---help---
          Support hosting fully virtualized guest machines using hardware
 
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/preempt.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
 
 struct kvm_vcpu {
        struct kvm *kvm;
+       struct preempt_notifier preempt_notifier;
        int vcpu_id;
        struct mutex mutex;
        int   cpu;
        struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
        void (*vcpu_free)(struct kvm_vcpu *vcpu);
 
-       void (*vcpu_load)(struct kvm_vcpu *vcpu);
+       void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
        void (*vcpu_put)(struct kvm_vcpu *vcpu);
        void (*vcpu_decache)(struct kvm_vcpu *vcpu);
 
 
 
 struct kvm_arch_ops *kvm_arch_ops;
 
+static __read_mostly struct preempt_ops kvm_preempt_ops;
+
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
 static struct kvm_stats_debugfs_item {
  */
 static void vcpu_load(struct kvm_vcpu *vcpu)
 {
+       int cpu;
+
        mutex_lock(&vcpu->mutex);
-       kvm_arch_ops->vcpu_load(vcpu);
+       cpu = get_cpu();
+       preempt_notifier_register(&vcpu->preempt_notifier);
+       kvm_arch_ops->vcpu_load(vcpu, cpu);
+       put_cpu();
 }
 
 static void vcpu_put(struct kvm_vcpu *vcpu)
 {
+       preempt_disable();
        kvm_arch_ops->vcpu_put(vcpu);
+       preempt_notifier_unregister(&vcpu->preempt_notifier);
+       preempt_enable();
        mutex_unlock(&vcpu->mutex);
 }
 
 {
        if (!need_resched())
                return;
-       vcpu_put(vcpu);
        cond_resched();
-       vcpu_load(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
        unsigned bytes;
        int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
 
-       kvm_arch_ops->vcpu_put(vcpu);
        q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
                 PAGE_KERNEL);
        if (!q) {
-               kvm_arch_ops->vcpu_load(vcpu);
                free_pio_guest_pages(vcpu);
                return -ENOMEM;
        }
                memcpy(p, q, bytes);
        q -= vcpu->pio.guest_page_offset;
        vunmap(q);
-       kvm_arch_ops->vcpu_load(vcpu);
        free_pio_guest_pages(vcpu);
        return 0;
 }
        if (IS_ERR(vcpu))
                return PTR_ERR(vcpu);
 
+       preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
+
        vcpu_load(vcpu);
        r = kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
 
 hpa_t bad_page_address;
 
+static inline
+struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
+{
+       return container_of(pn, struct kvm_vcpu, preempt_notifier);
+}
+
+static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
+{
+       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
+       kvm_arch_ops->vcpu_load(vcpu, cpu);
+}
+
+static void kvm_sched_out(struct preempt_notifier *pn,
+                         struct task_struct *next)
+{
+       struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
+       kvm_arch_ops->vcpu_put(vcpu);
+}
+
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 {
        int r;
                goto out_free;
        }
 
+       kvm_preempt_ops.sched_in = kvm_sched_in;
+       kvm_preempt_ops.sched_out = kvm_sched_out;
+
        return r;
 
 out_free:
 
        kvm_mmu_free_some_pages(vcpu);
        if (r < 0) {
                spin_unlock(&vcpu->kvm->lock);
-               kvm_arch_ops->vcpu_put(vcpu);
                r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL);
-               kvm_arch_ops->vcpu_load(vcpu);
                spin_lock(&vcpu->kvm->lock);
                kvm_mmu_free_some_pages(vcpu);
        }
 
        kfree(svm);
 }
 
-static void svm_vcpu_load(struct kvm_vcpu *vcpu)
+static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       int cpu, i;
+       int i;
 
-       cpu = get_cpu();
        if (unlikely(cpu != vcpu->cpu)) {
                u64 tsc_this, delta;
 
                wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
 
        rdtscll(vcpu->host_tsc);
-       put_cpu();
 }
 
 static void svm_vcpu_decache(struct kvm_vcpu *vcpu)
 
 static void vmx_load_host_state(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long flags;
 
        if (!vmx->host_state.loaded)
                return;
                 * If we have to reload gs, we must take care to
                 * preserve our gs base.
                 */
-               local_irq_disable();
+               local_irq_save(flags);
                load_gs(vmx->host_state.gs_sel);
 #ifdef CONFIG_X86_64
                wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
-               local_irq_enable();
+               local_irq_restore(flags);
 
                reload_tss();
        }
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
  */
-static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
+static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 phys_addr = __pa(vmx->vmcs);
-       int cpu;
        u64 tsc_this, delta;
 
-       cpu = get_cpu();
-
        if (vcpu->cpu != cpu)
                vcpu_clear(vcpu);
 
 {
        vmx_load_host_state(vcpu);
        kvm_put_guest_fpu(vcpu);
-       put_cpu();
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
        if (unlikely(r))
                goto out;
 
+       preempt_disable();
+
        if (!vcpu->mmio_read_completed)
                do_interrupt_requests(vcpu, kvm_run);
 
        vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
 
        asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+       vmx->launched = 1;
+
+       preempt_enable();
 
        if (unlikely(fail)) {
                kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
        if (unlikely(prof_on == KVM_PROFILING))
                profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP));
 
-       vmx->launched = 1;
        r = kvm_handle_exit(kvm_run, vcpu);
        if (r > 0) {
                /* Give scheduler a change to reschedule. */
 {
        int err;
        struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL);
+       int cpu;
 
        if (!vmx)
                return ERR_PTR(-ENOMEM);
 
        vmcs_clear(vmx->vmcs);
 
-       vmx_vcpu_load(&vmx->vcpu);
+       cpu = get_cpu();
+       vmx_vcpu_load(&vmx->vcpu, cpu);
        err = vmx_vcpu_setup(&vmx->vcpu);
        vmx_vcpu_put(&vmx->vcpu);
+       put_cpu();
        if (err)
                goto free_vmcs;