From 97459ee3e5fbcaa8a512ef1a60e33cb2ff94868b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Jul 2025 10:29:32 +0100 Subject: [PATCH] KVM: x86/xen: Fix SCHEDOP_poll fidelity with Xen There were two (probably unimportant, given that we've run a billion or two Xen guests on this now without noticing except by code inspection) ways in which the KVM implementation of SCHEDOP_poll differs from Xen: 1. Xen allows a vCPU to poll a port which is targeted at another vCPU. 2. Xen explicitly wakes a polling vCPU even when the event is unmasked. I can't imagine why the first would ever be used, but the second could theoretically be important. KVM currently wouldn't actually deliver an interrupt to the target vCPU if another port was already pending, but if the poll was being used to handle spinlock contention inside another interrupt handler, the wakeup might never happen and lead to a deadlock. Signed-off-by: David Woodhouse --- arch/x86/kvm/xen.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b029bb29a16f..f9bcc598dfa17 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1769,15 +1769,25 @@ handle_in_userspace: return 0; } -static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) +static bool kvm_xen_check_pollers(struct kvm *kvm, struct kvm_vcpu *deliver_vcpu, int port) { - int poll_evtchn = vcpu->arch.xen.poll_evtchn; - - if ((poll_evtchn == port || poll_evtchn == -1) && - test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) { - kvm_make_request(KVM_REQ_UNBLOCK, vcpu); - kvm_vcpu_kick(vcpu); + int vcpu_idx, nr_vcpus = atomic_read(&kvm->online_vcpus); + bool ret = false; + + for_each_set_bit(vcpu_idx, kvm->arch.xen.poll_mask, nr_vcpus) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, vcpu_idx); + int poll_evtchn = vcpu->arch.xen.poll_evtchn; + + if ((poll_evtchn == port || poll_evtchn == -1) && + test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) { + kvm_make_request(KVM_REQ_UNBLOCK, vcpu); + kvm_vcpu_kick(vcpu); + if (vcpu == deliver_vcpu) + ret = true; + } } + + return ret; } /* @@ -1797,7 +1807,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) unsigned long *pending_bits, *mask_bits; unsigned long flags; int port_word_bit; - bool kick_vcpu = false; + bool kick_vcpu = false, check_pollers = false; int vcpu_idx, idx, rc; vcpu_idx = READ_ONCE(xe->vcpu_idx); @@ -1843,9 +1853,10 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) if (test_and_set_bit(xe->port, pending_bits)) { rc = 0; /* It was already raised */ } else if (test_bit(xe->port, mask_bits)) { + check_pollers = true; rc = -ENOTCONN; /* Masked */ - kvm_xen_check_poller(vcpu, xe->port); } else { + check_pollers = true; rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ read_unlock_irqrestore(&gpc->lock, flags); @@ -1888,6 +1899,10 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) read_unlock_irqrestore(&gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); + /* No need to kick the target vCPU if it was polling */ + if (check_pollers && kvm_xen_check_pollers(kvm, vcpu, xe->port)) + kick_vcpu = false; + if (kick_vcpu) { kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); -- 2.50.1