]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
KVM: nVMX: Get to-be-acknowledge IRQ for nested VM-Exit at injection site
authorSean Christopherson <seanjc@google.com>
Fri, 6 Sep 2024 04:34:08 +0000 (21:34 -0700)
committerSean Christopherson <seanjc@google.com>
Tue, 10 Sep 2024 03:14:58 +0000 (20:14 -0700)
Move the logic to get the to-be-acknowledge IRQ for a nested VM-Exit from
nested_vmx_vmexit() to vmx_check_nested_events(), which is subtly the one
and only path where KVM invokes nested_vmx_vmexit() with
EXIT_REASON_EXTERNAL_INTERRUPT.  A future fix will perform a last-minute
check on L2's nested posted interrupt notification vector, just before
injecting a nested VM-Exit.  To handle that scenario correctly, KVM needs
to get the interrupt _before_ injecting VM-Exit, as simply querying the
highest priority interrupt, via kvm_cpu_has_interrupt(), would result in
TOCTOU bug, as a new, higher priority interrupt could arrive between
kvm_cpu_has_interrupt() and kvm_cpu_get_interrupt().

Unfortunately, simply moving the call to kvm_cpu_get_interrupt() doesn't
suffice, as a VMWRITE to GUEST_INTERRUPT_STATUS.SVI is hiding in
kvm_get_apic_interrupt(), and acknowledging the interrupt before nested
VM-Exit would cause the VMWRITE to hit vmcs02 instead of vmcs01.

Open code a rough equivalent to kvm_cpu_get_interrupt() so that the IRQ
is acknowledged after emulating VM-Exit, taking care to avoid the TOCTOU
issue described above.

Opportunistically convert the WARN_ON() to a WARN_ON_ONCE().  If KVM has
a bug that results in a false positive from kvm_cpu_has_interrupt(),
spamming dmesg won't help the situation.

Note, nested_vmx_reflect_vmexit() can never reflect external interrupts as
they are always "wanted" by L0.

Link: https://lore.kernel.org/r/20240906043413.1049633-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/irq.c
arch/x86/kvm/vmx/nested.c

index 4a93ac1b9be9144dbc19051ebf49f248f4b61ee5..aa31c4b94977469af40351a94252f1030befcfaa 100644 (file)
@@ -2256,6 +2256,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 
index 3d7eb11d0e456a145a27cbf02e2e623ae0699a64..810da99ff7edc0a0530b5d4fa1be6179ff9b92a1 100644 (file)
@@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  * Read pending interrupt(from non-APIC source)
  * vector and intack.
  */
-static int kvm_cpu_get_extint(struct kvm_vcpu *v)
+int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
        if (!kvm_cpu_has_extint(v)) {
                WARN_ON(!lapic_in_kernel(v));
@@ -131,6 +131,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
        } else
                return kvm_pic_read_irq(v->kvm); /* PIC */
 }
+EXPORT_SYMBOL_GPL(kvm_cpu_get_extint);
 
 /*
  * Read pending interrupt vector and intack.
index 867de342df33dbaa3df2c9333c3f8a9738a86258..1a6dc85cde180ceeff87de9ecace03926a772d2f 100644 (file)
@@ -4285,11 +4285,37 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
        }
 
        if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
+               int irq;
+
                if (block_nested_events)
                        return -EBUSY;
                if (!nested_exit_on_intr(vcpu))
                        goto no_vmexit;
-               nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+
+               if (!nested_exit_intr_ack_set(vcpu)) {
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
+                       return 0;
+               }
+
+               irq = kvm_cpu_get_extint(vcpu);
+               if (irq != -1) {
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
+                                         INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
+                       return 0;
+               }
+
+               irq = kvm_apic_has_interrupt(vcpu);
+               WARN_ON_ONCE(irq < 0);
+
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
+                                 INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0);
+
+               /*
+                * ACK the interrupt _after_ emulating VM-Exit, as the IRQ must
+                * be marked as in-service in vmcs01.GUEST_INTERRUPT_STATUS.SVI
+                * if APICv is active.
+                */
+               kvm_apic_ack_interrupt(vcpu, irq);
                return 0;
        }
 
@@ -4970,14 +4996,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
        vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
        if (likely(!vmx->fail)) {
-               if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
-                   nested_exit_intr_ack_set(vcpu)) {
-                       int irq = kvm_cpu_get_interrupt(vcpu);
-                       WARN_ON(irq < 0);
-                       vmcs12->vm_exit_intr_info = irq |
-                               INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
-               }
-
                if (vm_exit_reason != -1)
                        trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
                                                       vmcs12->exit_qualification,