From 329c6c66f3bd136eb07b0379b0f1c3c82a5b6b34 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 15 Sep 2023 12:34:24 +0200 Subject: [PATCH] KVM: x86: Allow exit to userspace on HLT even when emulated in-kernel The VMM may have work to do on behalf of the guest. For HLT at least it's possible to schedule other I/O threads and the kernel emulation of HLT will at least schedule away and let those threads run. But MWAIT is more painful because the best we can do there is yield(). Our emulation of MWAIT is just to treat it as a no-op, causing the guest to busy-wait in its idle loop. There *may* be ways to emulate MWAIT a bit more faithfully, as explored in https://www.contrib.andrew.cmu.edu/~somlo/OSXKVM/mwait.html A simpler approach is to have MWAIT return to userspace each time around the guest's loop, allowing userspace to preempt it and do some work before returning to the vCPU. And if we're going to do that for MWAIT-idle guests directly from the vCPU thread, it's neater to do it for HLT-idle guests that way too. Signed-off-by: David Woodhouse --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/x86.c | 20 ++++++++++++++++---- include/uapi/linux/kvm.h | 5 +++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28bd38303d704..4d89d47e98025 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1286,6 +1286,8 @@ struct kvm_arch { bool pause_in_guest; bool cstate_in_guest; + unsigned long userspace_exits; + unsigned long irq_sources_bitmap; s64 kvmclock_offset; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index abc951f7bb957..3bc2354670b07 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4540,6 +4540,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r |= KVM_X86_DISABLE_EXITS_MWAIT; } break; + case KVM_CAP_X86_USERSPACE_EXITS: + r = KVM_X86_USERSPACE_VALID_EXITS; + break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) break; @@ -6344,6 +6347,14 @@ split_irqchip_unlock: kvm->arch.cstate_in_guest = true; } + r = 0; + break; + case KVM_CAP_X86_USERSPACE_EXITS: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_USERSPACE_VALID_EXITS) + break; + + kvm->arch.userspace_exits = cap->args[0]; r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: @@ -9633,11 +9644,12 @@ static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason) ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { vcpu->arch.mp_state = state; - return 1; - } else { - vcpu->run->exit_reason = reason; - return 0; + if (reason != KVM_EXIT_HLT || + !(vcpu->kvm->arch.userspace_exits & KVM_X86_USERSPACE_EXIT_HLT)) + return 1; } + vcpu->run->exit_reason = reason; + return 0; } int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f089ab2909784..9624987bd6c35 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -841,6 +841,10 @@ struct kvm_ioeventfd { KVM_X86_DISABLE_EXITS_PAUSE | \ KVM_X86_DISABLE_EXITS_CSTATE) +#define KVM_X86_USERSPACE_EXIT_MWAIT (1 << 0) +#define KVM_X86_USERSPACE_EXIT_HLT (1 << 1) +#define KVM_X86_USERSPACE_VALID_EXITS (KVM_X86_USERSPACE_EXIT_HLT) + /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { /* in */ @@ -1192,6 +1196,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_COUNTER_OFFSET 227 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_X86_USERSPACE_EXITS 230 #ifdef KVM_CAP_IRQ_ROUTING -- 2.50.1