]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: x86/xen: Add event channel interrupt vector upcall xenpv-post-4
authorDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 9 Dec 2020 20:08:30 +0000 (20:08 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Mon, 4 Jan 2021 16:18:29 +0000 (16:18 +0000)
It turns out that we can't handle event channels *entirely* in userspace
by delivering them as ExtINT, because KVM is a bit picky about when it
accepts ExtINT interrupts from a legacy PIC. The in-kernel local APIC
has to have LVT0 configured in APIC_MODE_EXTINT and unmasked, which
isn't necessarily the case for Xen guests especially on secondary CPUs.

To cope with this, add kvm_xen_get_interrupt() which checks the
evtchn_pending_upcall field in the Xen vcpu_info, and delivers the Xen
upcall vector (configured by KVM_XEN_ATTR_TYPE_UPCALL_VECTOR) if it's
set regardless of LAPIC LVT0 configuration. This gives us the minimum
support we need for completely userspace-based implementation of event
channels.

This does mean that vcpu_enter_guest() needs to check for the
evtchn_pending_upcall flag being set, because it can't rely on someone
having set KVM_REQ_EVENT unless we were to add some way for userspace to
do so manually.

But actually, I don't quite see how that works reliably for interrupts
injected with KVM_INTERRUPT either. In kvm_vcpu_ioctl_interrupt() the
KVM_REQ_EVENT request is set once, but that'll get cleared the first time
through vcpu_enter_guest(). So if the first exit is for something *else*
without interrupts being enabled yet, won't the KVM_REQ_EVENT request
have been consumed already and just be lost?

I wonder if my addition of '|| kvm_xen_has_interrupt(vcpu)' should
actually be '|| kvm_has_injectable_intr(vcpu)' to fix that pre-existing
bug?

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/irq.c
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
include/uapi/linux/kvm.h

index 3fb236d99e776209a0357ea091b6d5beef97d6ad..51ac9e15839679a48ff06cf1c84d35d38e58e58c 100644 (file)
@@ -907,6 +907,7 @@ struct msr_bitmap_range {
 struct kvm_xen {
        bool long_mode;
        bool shinfo_set;
+       u8 upcall_vector;
        struct gfn_to_hva_cache shinfo_cache;
 };
 
index 814698e5b1526180da42043045e87a521870698d..24668b51b5c894c6b34d55c48225a5e784467f86 100644 (file)
@@ -14,6 +14,7 @@
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
+#include "xen.h"
 
 /*
  * check if there are pending timer events
@@ -56,6 +57,9 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v)
        if (!lapic_in_kernel(v))
                return v->arch.interrupt.injected;
 
+       if (kvm_xen_has_interrupt(v))
+               return 1;
+
        if (!kvm_apic_accept_pic_intr(v))
                return 0;
 
@@ -110,6 +114,9 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
        if (!lapic_in_kernel(v))
                return v->arch.interrupt.nr;
 
+       if (kvm_xen_has_interrupt(v))
+               return v->kvm->arch.xen.upcall_vector;
+
        if (irqchip_split(v->kvm)) {
                int vector = v->arch.pending_external_vector;
 
index e7a639e998d54f2a2f1958151cef580836e5c49f..cf1c1db090d24a9ee2da98a54a67d30ec06151c8 100644 (file)
@@ -8935,7 +8935,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_x86_ops.msr_filter_changed(vcpu);
        }
 
-       if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+       if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
+           kvm_xen_has_interrupt(vcpu)) {
                ++vcpu->stat.req_event;
                kvm_apic_accept_events(vcpu);
                if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
index 17cbb4462b7e05fa4e10256a0c9cd469cb567e49..4bc9da9fcfb80d8ba4c8ecc95ba2327cc7c5ddd4 100644 (file)
@@ -176,6 +176,45 @@ void kvm_xen_setup_runstate_page(struct kvm_vcpu *v)
        kvm_xen_update_runstate(v, RUNSTATE_running, steal_time);
 }
 
+int kvm_xen_has_interrupt(struct kvm_vcpu *v)
+{
+       u8 rc = 0;
+
+       /*
+        * If the global upcall vector (HVMIRQ_callback_vector) is set and
+        * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
+        */
+       if (v->arch.xen.vcpu_info_set && v->kvm->arch.xen.upcall_vector) {
+               struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
+               struct kvm_memslots *slots = kvm_memslots(v->kvm);
+               unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
+
+               /* No need for compat handling here */
+               BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
+                            offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
+               BUILD_BUG_ON(sizeof(rc) !=
+                            sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+               BUILD_BUG_ON(sizeof(rc) !=
+                            sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+
+               /*
+                * For efficiency, this mirrors the checks for using the valid
+                * cache in kvm_read_guest_offset_cached(), but just uses
+                * __get_user() instead. And falls back to the slow path.
+                */
+               if (likely(slots->generation == ghc->generation &&
+                          !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
+                       /* Fast path */
+                       __get_user(rc, (u8 __user *)ghc->hva + offset);
+               } else {
+                       /* Slow path */
+                       kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
+                                                     sizeof(rc));
+               }
+       }
+       return rc;
+}
+
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 {
        struct kvm_vcpu *v;
@@ -245,6 +284,14 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                v->arch.xen.last_state_ns = ktime_get_ns();
                break;
 
+       case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
+               if (data->u.vector < 0x10)
+                       return -EINVAL;
+
+               kvm->arch.xen.upcall_vector = data->u.vector;
+               r = 0;
+               break;
+
        default:
                break;
        }
@@ -303,6 +350,11 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                }
                break;
 
+       case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
+               data->u.vector = kvm->arch.xen.upcall_vector;
+               r = 0;
+               break;
+
        default:
                break;
        }
index 407e717476d66e8d50baf7d35d4e44224d146c2d..d64916ac4a1288ace108827db9c2c134fbb6e98f 100644 (file)
@@ -11,6 +11,7 @@
 
 void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
 void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
+int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
index a075d7297da629fd62582274ff1f0107a7b46396..0926d469b2f70f371241356688ee39a66ac88513 100644 (file)
@@ -1593,6 +1593,7 @@ struct kvm_xen_hvm_attr {
 
        union {
                __u8 long_mode;
+               __u8 vector;
                struct {
                        __u64 gfn;
                } shared_info;
@@ -1610,6 +1611,7 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_VCPU_INFO            0x2
 #define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO       0x3
 #define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE                0x4
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x5
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {