From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 9 Dec 2020 20:08:30 +0000 (+0000)
Subject: KVM: x86/xen: Add event channel interrupt vector upcall
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fxenpv-brim-merge-20201212;p=users%2Fdwmw2%2Flinux.git

KVM: x86/xen: Add event channel interrupt vector upcall

It turns out that we can't handle event channels *entirely* in userspace
by delivering them as ExtINT, because KVM is a bit picky about when it
accepts ExtINT interrupts from a legacy PIC. The in-kernel local APIC
has to have LVT0 configured in APIC_MODE_EXTINT and unmasked, which
isn't necessarily the case for Xen guests especially on secondary CPUs.

To cope with this, add kvm_xen_get_interrupt() which checks the
evtchn_pending_upcall field in the Xen vcpu_info, and delivers the Xen
upcall vector (configured by KVM_XEN_ATTR_TYPE_UPCALL_VECTOR) if it's
set regardless of LAPIC LVT0 configuration. This gives us the minimum
support we need for completely userspace-based implementation of event
channels.

This does mean that vcpu_enter_guest() needs to check for the
evtchn_pending_upcall flag being set, because it can't rely on someone
having set KVM_REQ_EVENT unless we were to add some way for userspace to
do so manually.

But actually, I don't quite see how that works reliably for interrupts
injected with KVM_INTERRUPT either. In kvm_vcpu_ioctl_interrupt() the
KVM_REQ_EVENT request is set once, but that'll get cleared the first time
through vcpu_enter_guest(). So if the first exit is for something *else*
without interrupts being enabled yet, won't the KVM_REQ_EVENT request
have been consumed already and just be lost?

I wonder if my addition of '|| kvm_xen_has_interrupt(vcpu)' should
actually be '|| kvm_has_injectable_intr(vcpu)' to fix that pre-existing
bug?

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 62f8dc78807bf..36f2a2756bcc3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -902,6 +902,7 @@ struct msr_bitmap_range {
 /* Xen emulation context */
 struct kvm_xen {
 	bool long_mode;
+	u8 upcall_vector;
 	struct kvm_host_map shinfo_map;
 	void *shinfo;
 };
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 814698e5b1526..24668b51b5c89 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -14,6 +14,7 @@
 #include "irq.h"
 #include "i8254.h"
 #include "x86.h"
+#include "xen.h"
 
 /*
  * check if there are pending timer events
@@ -56,6 +57,9 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v)
 	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.injected;
 
+	if (kvm_xen_has_interrupt(v))
+		return 1;
+
 	if (!kvm_apic_accept_pic_intr(v))
 		return 0;
 
@@ -110,6 +114,9 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.nr;
 
+	if (kvm_xen_has_interrupt(v))
+		return v->kvm->arch.xen.upcall_vector;
+
 	if (irqchip_split(v->kvm)) {
 		int vector = v->arch.pending_external_vector;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ad9eea8f4f268..1711072b36165 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8891,7 +8891,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_x86_ops.msr_filter_changed(vcpu);
 	}
 
-	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
+	    kvm_xen_has_interrupt(vcpu)) {
 		++vcpu->stat.req_event;
 		kvm_apic_accept_events(vcpu);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 8a807b610d2bd..fd7164e641bdf 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -257,6 +257,22 @@ void kvm_xen_setup_pvclock_page(struct kvm_vcpu *v)
 	srcu_read_unlock(&v->kvm->srcu, idx);
 }
 
+int kvm_xen_has_interrupt(struct kvm_vcpu *v)
+{
+       int rc = 0;
+
+       if (v->kvm->arch.xen.upcall_vector) {
+               int idx = srcu_read_lock(&v->kvm->srcu);
+               struct vcpu_info *vcpu_info = xen_vcpu_info(v);
+
+               if (vcpu_info && READ_ONCE(vcpu_info->evtchn_upcall_pending))
+                       rc = 1;
+
+               srcu_read_unlock(&v->kvm->srcu, idx);
+       }
+       return rc;
+}
+
 static int vcpu_attr_loc(struct kvm_vcpu *vcpu, u16 type,
 			 struct kvm_host_map **map, void ***hva, size_t *sz)
 {
@@ -335,6 +351,14 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 	}
 
+	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
+		if (data->u.vector < 0x10)
+			return -EINVAL;
+
+		kvm->arch.xen.upcall_vector = data->u.vector;
+		r = 0;
+		break;
+
 	default:
 		break;
 	}
@@ -382,6 +406,11 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		break;
 	}
 
+	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
+		data->u.vector = kvm->arch.xen.upcall_vector;
+		r = 0;
+		break;
+
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index ccd6002f55bc0..1f599342f02cf 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -25,6 +25,7 @@ static inline struct kvm_vcpu *xen_vcpu_to_vcpu(struct kvm_vcpu_xen *xen_vcpu)
 void kvm_xen_setup_pvclock_page(struct kvm_vcpu *vcpu);
 void kvm_xen_setup_runstate_page(struct kvm_vcpu *vcpu);
 void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu);
+int kvm_xen_has_interrupt (struct kvm_vcpu *vcpu);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1047364d1adff..113279fa9e1e3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1587,6 +1587,7 @@ struct kvm_xen_hvm_attr {
 
 	union {
 		__u8 long_mode;
+		__u8 vector;
 		struct {
 			__u64 gfn;
 		} shared_info;
@@ -1604,6 +1605,7 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_ATTR_TYPE_VCPU_INFO		0x2
 #define KVM_XEN_ATTR_TYPE_VCPU_TIME_INFO	0x3
 #define KVM_XEN_ATTR_TYPE_VCPU_RUNSTATE		0x4
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x5
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {