*/
 static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
 
+#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING
+
 static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
 {
        return &(to_vmx(vcpu)->pi_desc);
         * current pCPU if the task was migrated.
         */
        if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
-               raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+               raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);
+
+               /*
+                * In addition to taking the wakeup lock for the regular/IRQ
+                * context, tell lockdep it is being taken for the "sched out"
+                * context as well.  vCPU loads happens in task context, and
+                * this is taking the lock of the *previous* CPU, i.e. can race
+                * with both the scheduler and the wakeup handler.
+                */
+               raw_spin_lock(spinlock);
+               spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
                list_del(&vmx->pi_wakeup_list);
-               raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+               spin_release(&spinlock->dep_map, _RET_IP_);
+               raw_spin_unlock(spinlock);
        }
 
        dest = cpu_physical_id(cpu);
 
        lockdep_assert_irqs_disabled();
 
-       raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
+       /*
+        * Acquire the wakeup lock using the "sched out" context to workaround
+        * a lockdep false positive.  When this is called, schedule() holds
+        * various per-CPU scheduler locks.  When the wakeup handler runs, it
+        * holds this CPU's wakeup lock while calling try_to_wake_up(), which
+        * can eventually take the aforementioned scheduler locks, which causes
+        * lockdep to assume there is deadlock.
+        *
+        * Deadlock can't actually occur because IRQs are disabled for the
+        * entirety of the sched_out critical section, i.e. the wakeup handler
+        * can't run while the scheduler locks are held.
+        */
+       raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
+                            PI_LOCK_SCHED_OUT);
        list_add_tail(&vmx->pi_wakeup_list,
                      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
        raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));