From 08833719e77041e331f6193878f1b944744b9068 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:34 -0800 Subject: [PATCH 01/16] KVM: selftests: Assert that vcpu->cpuid is non-NULL when getting CPUID entries Add a sanity check in __vcpu_get_cpuid_entry() to provide a friendlier error than a segfault when a test developer tries to use a vCPU CPUID helper on a barebones vCPU. Link: https://lore.kernel.org/r/20241128013424.4096668-8-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/x86/processor.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 9ec984cf8674..71aa290c583f 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1014,6 +1014,8 @@ static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *v uint32_t function, uint32_t index) { + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } -- 2.51.0 From a2a791e8208623b1575f21c7ec559df095c0a96e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:35 -0800 Subject: [PATCH 02/16] KVM: selftests: Refresh vCPU CPUID cache in __vcpu_get_cpuid_entry() Refresh selftests' CPUID cache in the vCPU structure when querying a CPUID entry so that tests don't consume stale data when KVM modifies CPUID as a side effect to a completely unrelated change. E.g. KVM adjusts OSXSAVE in response to CR4.OSXSAVE changes. Unnecessarily invoking KVM_GET_CPUID is suboptimal, but vcpu->cpuid exists to simplify selftests development, not for performance reasons. And, unfortunately, trying to handle the side effects in tests or other flows is unpleasant, e.g. selftests could manually refresh if KVM_SET_SREGS is successful, but that would still leave a gap with respect to guest CR4 changes. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-9-seanjc@google.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/include/x86/processor.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 71aa290c583f..b6753e27dfb6 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1010,12 +1010,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) { TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); + vcpu_get_cpuid(vcpu); + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, function, index); } @@ -1036,7 +1043,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) return r; /* On success, refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); return 0; } @@ -1046,12 +1053,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); /* Refresh the cache to pick up adjustments made by KVM. */ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); -} - -static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) -{ - vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + vcpu_get_cpuid(vcpu); } void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, -- 2.51.0 From 01bcd829c63fdde92d9d6c32b2ed3ba34ead0930 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:36 -0800 Subject: [PATCH 03/16] KVM: selftests: Verify KVM stuffs runtime CPUID OS bits on CR4 writes Extend x86's set sregs test to verify that KVM sets/clears OSXSAVE and OSKPKE according to CR4.XSAVE and CR4.PKE respectively. For performance reasons, KVM is responsible for emulating the architectural behavior of the OS CPUID bits tracking CR4. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-10-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/set_sregs_test.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index 96fd690d479a..f4095a3d1278 100644 --- a/tools/testing/selftests/kvm/x86/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -85,6 +85,16 @@ static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)), + "KVM didn't %s OSXSAVE in CPUID as expected", + (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear"); + + TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) == + (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)), + "KVM didn't %s OSPKE in CPUID as expected", + (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear"); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); -- 2.51.0 From b0c3d6871778ea28b22761134fb399926782a1a6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:37 -0800 Subject: [PATCH 04/16] KVM: x86: Move __kvm_is_valid_cr4() definition to x86.h Let vendor code inline __kvm_is_valid_cr4() now x86.c's cr4_reserved_bits no longer exists, as keeping cr4_reserved_bits local to x86.c was the only reason for "hiding" the definition of __kvm_is_valid_cr4(). No functional change intended. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-11-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 9 --------- arch/x86/kvm/x86.h | 6 +++++- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 10abba616056..a82f3db4da1b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1281,15 +1281,6 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); -bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) -{ - if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) - return false; - - return true; -} -EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4); - static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { return __kvm_is_valid_cr4(vcpu, cr4) && diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index ec623d23d13d..7a87c5fc57f1 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -550,7 +550,6 @@ static inline void kvm_machine_check(void) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); @@ -577,6 +576,11 @@ enum kvm_msr_access { #define KVM_MSR_RET_UNSUPPORTED 2 #define KVM_MSR_RET_FILTERED 3 +static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + return !(cr4 & vcpu->arch.cr4_guest_rsvd_bits); +} + #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ u64 __reserved_bits = CR4_RESERVED_BITS; \ -- 2.51.0 From ac32cbd4dfc642b0bb8a10dd998246c86f19e326 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:38 -0800 Subject: [PATCH 05/16] KVM: x86/pmu: Drop now-redundant refresh() during init() Drop the manual kvm_pmu_refresh() from kvm_pmu_init() now that kvm_arch_vcpu_create() performs the refresh via kvm_vcpu_after_set_cpuid(). Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-12-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 47a46283c866..75e9cfc689f8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -797,7 +797,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); kvm_pmu_call(init)(vcpu); - kvm_pmu_refresh(vcpu); } /* Release perf_events for vPMCs that have been unused for a full time slice. */ -- 2.51.0 From 21d7f06d1a8364a57432ccc1e6167c73c043b913 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:39 -0800 Subject: [PATCH 06/16] KVM: x86: Drop now-redundant MAXPHYADDR and GPA rsvd bits from vCPU creation Drop the manual initialization of maxphyaddr and reserved_gpa_bits during vCPU creation now that kvm_arch_vcpu_create() unconditionally invokes kvm_vcpu_after_set_cpuid(), which handles all such CPUID caching. None of the helpers between the existing code in kvm_arch_vcpu_create() and the call to kvm_vcpu_after_set_cpuid() consume maxphyaddr or reserved_gpa_bits (though auditing vmx_vcpu_create() and svm_vcpu_create() isn't exactly easy). Link: https://lore.kernel.org/r/20241128013424.4096668-13-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a82f3db4da1b..503bfcbc1735 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12258,9 +12258,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) goto free_emulate_ctxt; } - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); - vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); - kvm_async_pf_hash_reset(vcpu); if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { -- 2.51.0 From 04cd8f8628d88da7839b1643db0bef8522c39254 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:40 -0800 Subject: [PATCH 07/16] KVM: x86: Disallow KVM_CAP_X86_DISABLE_EXITS after vCPU creation Reject KVM_CAP_X86_DISABLE_EXITS if vCPUs have been created, as disabling PAUSE/MWAIT/HLT exits after vCPUs have been created is broken and useless, e.g. except for PAUSE on SVM, the relevant intercepts aren't updated after vCPU creation. vCPUs may also end up with an inconsistent configuration if exits are disabled between creation of multiple vCPUs. Cc: Hou Wenlong Link: https://lore.kernel.org/all/9227068821b275ac547eb2ede09ec65d2281fe07.1680179693.git.houwenlong.hwl@antgroup.com Link: https://lore.kernel.org/all/20230121020738.2973-2-kechenl@nvidia.com Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20241128013424.4096668-14-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 1 + arch/x86/kvm/x86.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 454c2aaa155e..bbe445e6c113 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7670,6 +7670,7 @@ branch to guests' 0x200 interrupt vector. :Architectures: x86 :Parameters: args[0] defines which exits are disabled :Returns: 0 on success, -EINVAL when args[0] contains invalid exits + or if any vCPUs have already been created Valid bits in args[0] are:: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 503bfcbc1735..e3c3ceb54966 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6531,6 +6531,10 @@ split_irqchip_unlock: if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) break; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + goto disable_exits_unlock; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; @@ -6552,6 +6556,8 @@ split_irqchip_unlock: } r = 0; +disable_exits_unlock: + mutex_unlock(&kvm->lock); break; case KVM_CAP_MSR_PLATFORM_INFO: kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; -- 2.51.0 From c829ccd4d9dc4a892e7c9ae032b87ac90ace2252 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:41 -0800 Subject: [PATCH 08/16] KVM: x86: Reject disabling of MWAIT/HLT interception when not allowed Reject KVM_CAP_X86_DISABLE_EXITS if userspace attempts to disable MWAIT or HLT exits and KVM previously reported (via KVM_CHECK_EXTENSION) that disabling the exit(s) is not allowed. E.g. because MWAIT isn't supported or the CPU doesn't have an always-running APIC timer, or because KVM is configured to mitigate cross-thread vulnerabilities. Cc: Kechen Lu Fixes: 4d5422cea3b6 ("KVM: X86: Provide a capability to disable MWAIT intercepts") Fixes: 6f0f2d5ef895 ("KVM: x86: Mitigate the cross-thread return address predictions bug") Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-15-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3c3ceb54966..c627df92ce64 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4531,6 +4531,20 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static u64 kvm_get_allowed_disable_exits(void) +{ + u64 r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } + return r; +} + #ifdef CONFIG_KVM_HYPERV static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 __user *cpuid_arg) @@ -4673,15 +4687,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r = KVM_X86_DISABLE_EXITS_PAUSE; - - if (!mitigate_smt_rsb) { - r |= KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_CSTATE; - - if (kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; - } + r = kvm_get_allowed_disable_exits(); break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) @@ -6528,33 +6534,29 @@ split_irqchip_unlock: break; case KVM_CAP_X86_DISABLE_EXITS: r = -EINVAL; - if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + if (cap->args[0] & ~kvm_get_allowed_disable_exits()) break; mutex_lock(&kvm->lock); if (kvm->created_vcpus) goto disable_exits_unlock; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) - kvm->arch.pause_in_guest = true; - #define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." - if (!mitigate_smt_rsb) { - if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && - (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) - pr_warn_once(SMT_RSB_MSG); - - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; - } + if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) && + cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; r = 0; disable_exits_unlock: mutex_unlock(&kvm->lock); -- 2.51.0 From af5366bea2cb9dfb5da2880e1dff544f87505300 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:42 -0800 Subject: [PATCH 09/16] KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS Drop the KVM_X86_DISABLE_VALID_EXITS definition, as it is misleading, and unused in KVM *because* it is misleading. The set of exits that can be disabled is dynamic, i.e. userspace (and KVM) must check KVM's actual capabilities. Suggested-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-16-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 343de0a51797..45e6d8fca9b9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { -- 2.51.0 From 7b2658cb33c744ca41358ada2421a86774914764 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:43 -0800 Subject: [PATCH 10/16] KVM: selftests: Fix a bad TEST_REQUIRE() in x86's KVM PV test Actually check for KVM support for disabling HLT-exiting instead of effectively checking that KVM_CAP_X86_DISABLE_EXITS is #defined to a non-zero value, and convert the TEST_REQUIRE() to a simple return so that only the sub-test is skipped if HLT-exiting is mandatory. The goof has likely gone unnoticed because all x86 CPUs support disabling HLT-exiting, only systems with the opt-in mitigate_smt_rsb KVM module param disallow HLT-exiting. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..2aee93108a54 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -140,9 +140,10 @@ static void test_pv_unhalt(void) struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); -- 2.51.0 From 59cb3acdb316130c7247a3d3a20d7d6e75e2896a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:44 -0800 Subject: [PATCH 11/16] KVM: selftests: Update x86's KVM PV test to match KVM's disabling exits behavior Rework x86's KVM PV features test to align with KVM's new, fixed behavior of not allowing userspace to disable HLT-exiting after vCPUs have been created. Rework the core testcase to disable HLT-exiting before creating a vCPU, and opportunistically modify keep the paired VM+vCPU creation to verify that KVM rejects KVM_CAP_X86_DISABLE_EXITS as expected. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-18-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 2aee93108a54..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,6 +139,7 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) return; @@ -152,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ -- 2.51.0 From 01d1059d635a101a21f145284e8023b0ffa5f7ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:45 -0800 Subject: [PATCH 12/16] KVM: x86: Zero out PV features cache when the CPUID leaf is not present Clear KVM's PV feature cache prior when processing a new guest CPUID so that KVM doesn't keep a stale cache entry if userspace does KVM_SET_CPUID2 multiple times, once with a PV features entry, and a second time without. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Cc: Oliver Upton Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 626a243febc9..1157357f499a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -272,6 +272,8 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); + vcpu->arch.pv_cpuid.features = 0; + /* * save the feature bitmap to avoid cpuid lookup for every PV * operation -- 2.51.0 From f21958e328a9e5813562bfb822e674833a3ca36b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:46 -0800 Subject: [PATCH 13/16] KVM: x86: Don't update PV features caches when enabling enforcement capability Revert the chunk of commit 01b4f510b9f4 ("kvm: x86: ensure pv_cpuid.features is initialized when enabling cap") that forced a PV features cache refresh during KVM_CAP_ENFORCE_PV_FEATURE_CPUID, as whatever ioctl() ordering issue it alleged to have fixed never existed upstream, and likely never existed in any kernel. At the time of the commit, there was a tangentially related ioctl() ordering issue, as toggling KVM_X86_DISABLE_EXITS_HLT after KVM_SET_CPUID2 would have resulted in KVM potentially leaving KVM_FEATURE_PV_UNHALT set. But (a) that bug affected the entire guest CPUID, not just the cache, (b) commit 01b4f510b9f4 didn't address that bug, it only refreshed the cache (with the bad CPUID), and (c) setting KVM_X86_DISABLE_EXITS_HLT after vCPU creation is completely broken as KVM configures HLT-exiting only during vCPU creation, which is why KVM_CAP_X86_DISABLE_EXITS is now disallowed if vCPUs have been created. Another tangentially related bug was KVM's failure to clear the cache when handling KVM_SET_CPUID2, but again commit 01b4f510b9f4 did nothing to fix that bug. The most plausible explanation for the what commit 01b4f510b9f4 was trying to fix is a bug that existed in Google's internal kernel that was the source of commit 01b4f510b9f4. At the time, Google's internal kernel had not yet picked up commit 0d3b2ba16ba68 ("KVM: X86: Go on updating other CPUID leaves when leaf 1 is absent"), i.e. KVM would not initialize the PV features cache if KVM_SET_CPUID2 was called without a CPUID.0x1 entry. Of course, no sane real world VMM would omit CPUID.0x1, including the KVM selftest added by commit ac4a4d6de22e ("selftests: kvm: test enforcement of paravirtual cpuid features"). And the test didn't actually try to verify multiple orderings, nor did the selftest enter the guest without doing KVM_SET_CPUID2, so who knows what motivated the change. Regardless of why commit 01b4f510b9f4 ("kvm: x86: ensure pv_cpuid.features is initialized when enabling cap") was added, refreshing the cache during KVM_CAP_ENFORCE_PV_FEATURE_CPUID isn't necessary. Cc: Oliver Upton Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-20-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/cpuid.h | 1 - arch/x86/kvm/x86.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1157357f499a..c1a8d5744690 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -268,7 +268,7 @@ static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcp vcpu->arch.cpuid_nent, base); } -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +static void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da47dbf5d59b..e76a1f008faf 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -12,7 +12,6 @@ void kvm_set_cpu_caps(void); void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c627df92ce64..adcaa033c5d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5814,9 +5814,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; - if (vcpu->arch.pv_cpuid.enforce) - kvm_update_pv_runtime(vcpu); - return 0; default: return -EINVAL; -- 2.51.0 From 6416b0fb1660eb8bb73dc35dd5beb844646cb603 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:47 -0800 Subject: [PATCH 14/16] KVM: x86: Do reverse CPUID sanity checks in __feature_leaf() Do the compile-time sanity checks on reverse_cpuid in __feature_leaf() so that higher level APIs don't need to "manually" perform the sanity checks. No functional change intended. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-21-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.h | 3 --- arch/x86/kvm/reverse_cpuid.h | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index e76a1f008faf..99d4f6245610 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -180,7 +180,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } @@ -188,7 +187,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } @@ -196,7 +194,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature); } diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index e46220ece83c..1d2db9d529ff 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -145,7 +145,10 @@ static __always_inline u32 __feature_translate(int x86_feature) static __always_inline u32 __feature_leaf(int x86_feature) { - return __feature_translate(x86_feature) / 32; + u32 x86_leaf = __feature_translate(x86_feature) / 32; + + reverse_cpuid_check(x86_leaf); + return x86_leaf; } /* @@ -168,7 +171,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_featu { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; } -- 2.51.0 From 96cbc766baf05daf5dbcfd17c605d821f10170be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:48 -0800 Subject: [PATCH 15/16] KVM: x86: Account for max supported CPUID leaf when getting raw host CPUID Explicitly zero out the feature word in kvm_cpu_caps if the word's associated CPUID function is greater than the max leaf supported by the CPU. For such unsupported functions, Intel CPUs return the output from the last supported leaf, not all zeros. Practically speaking, this is likely a benign bug, as KVM uses the raw host CPUID to mask the kernel's computed capabilities, and the kernel does perform max leaf checks when populating boot_cpu_data. The only way KVM's goof could be problematic is if the kernel force-set a feature in a leaf that is completely unsupported, _and_ the max supported leaf happened to return a value with '1' the same bit position. Which is theoretically possible, but extremely unlikely. And even if that did happen, it's entirely possible that KVM would still provide the correct functionality; the kernel did set the capability after all. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-22-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c1a8d5744690..8fc07af8f72d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -600,18 +600,37 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, return 0; } -/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ -static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) { - const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); struct kvm_cpuid_entry2 entry; + u32 base; - reverse_cpuid_check(leaf); + /* + * KVM only supports features defined by Intel (0x0), AMD (0x80000000), + * and Centaur (0xc0000000). WARN if a feature for new vendor base is + * defined, as this and other code would need to be updated. + */ + base = cpuid.function & 0xffff0000; + if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000)) + return 0; + + if (cpuid_eax(base) < cpuid.function) + return 0; cpuid_count(cpuid.function, cpuid.index, &entry.eax, &entry.ebx, &entry.ecx, &entry.edx); - kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg); + return *__cpuid_entry_get_reg(&entry, cpuid.reg); +} + +/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ +static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +{ + const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); + + reverse_cpuid_check(leaf); + + kvm_cpu_caps[leaf] &= raw_cpuid_get(cpuid); } static __always_inline -- 2.51.0 From ccf93de484a33f8fe943734f3eed0f05004a48e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:49 -0800 Subject: [PATCH 16/16] KVM: x86: Unpack F() CPUID feature flag macros to one flag per line of code Refactor kvm_set_cpu_caps() to express each supported (or not) feature flag on a separate line, modulo a handful of cases where KVM does not, and likely will not, support a sequence of flags. This will allow adding fancier macros with longer, more descriptive names without resulting in absurd line lengths and/or weird code. Isolating each flag also makes it far easier to review changes, reduces code conflicts, and generally makes it easier to resolve conflicts. Lastly, it allows co-locating comments for notable flags, e.g. MONITOR, precisely with the relevant flag. No functional change intended. Suggested-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-23-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 295 +++++++++++++++++++++++++++++++++---------- 1 file changed, 231 insertions(+), 64 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8fc07af8f72d..18c29ca5aa4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -683,48 +683,121 @@ void kvm_set_cpu_caps(void) sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); kvm_cpu_cap_mask(CPUID_1_ECX, + F(XMM3) | + F(PCLMULQDQ) | + 0 /* DTES64 */ | /* * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not* * advertised to guests via CPUID! */ - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | + 0 /* MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | - 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) | - F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | - F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | - F(F16C) | F(RDRAND) + 0 /* TM2 */ | + F(SSSE3) | + 0 /* CNXT-ID */ | + 0 /* Reserved */ | + F(FMA) | + F(CX16) | + 0 /* xTPR Update */ | + F(PDCM) | + F(PCID) | + 0 /* Reserved, DCA */ | + F(XMM4_1) | + F(XMM4_2) | + F(X2APIC) | + F(MOVBE) | + F(POPCNT) | + 0 /* Reserved*/ | + F(AES) | + F(XSAVE) | + 0 /* OSXSAVE */ | + F(AVX) | + F(F16C) | + F(RDRAND) ); /* KVM emulates x2apic in software irrespective of host support. */ kvm_cpu_cap_set(X86_FEATURE_X2APIC); kvm_cpu_cap_mask(CPUID_1_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | - 0 /* Reserved, DS, ACPI */ | F(MMX) | - F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | + F(FPU) | + F(VME) | + F(DE) | + F(PSE) | + F(TSC) | + F(MSR) | + F(PAE) | + F(MCE) | + F(CX8) | + F(APIC) | + 0 /* Reserved */ | + F(SEP) | + F(MTRR) | + F(PGE) | + F(MCA) | + F(CMOV) | + F(PAT) | + F(PSE36) | + 0 /* PSN */ | + F(CLFLUSH) | + 0 /* Reserved, DS, ACPI */ | + F(MMX) | + F(FXSR) | + F(XMM) | + F(XMM2) | + F(SELFSNOOP) | 0 /* HTT, TM, Reserved, PBE */ ); kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | - F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) | - F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) | - F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | - F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) | + F(FSGSBASE) | + F(SGX) | + F(BMI1) | + F(HLE) | + F(AVX2) | + F(FDP_EXCPTN_ONLY) | + F(SMEP) | + F(BMI2) | + F(ERMS) | + F(INVPCID) | + F(RTM) | + F(ZERO_FCS_FDS) | + 0 /*MPX*/ | + F(AVX512F) | + F(AVX512DQ) | + F(RDSEED) | + F(ADX) | + F(SMAP) | + F(AVX512IFMA) | + F(CLFLUSHOPT) | + F(CLWB) | + 0 /*INTEL_PT*/ | + F(AVX512PF) | + F(AVX512ER) | + F(AVX512CD) | + F(SHA_NI) | + F(AVX512BW) | F(AVX512VL)); kvm_cpu_cap_mask(CPUID_7_ECX, - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | - F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | - F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) | F(BUS_LOCK_DETECT) + F(AVX512VBMI) | + F(LA57) | + F(PKU) | + 0 /*OSPKE*/ | + F(RDPID) | + F(AVX512_VPOPCNTDQ) | + F(UMIP) | + F(AVX512_VBMI2) | + F(GFNI) | + F(VAES) | + F(VPCLMULQDQ) | + F(AVX512_VNNI) | + F(AVX512_BITALG) | + F(CLDEMOTE) | + F(MOVDIRI) | + F(MOVDIR64B) | + 0 /*WAITPKG*/ | + F(SGX_LC) | + F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & feature_bit(LA57)) @@ -738,11 +811,22 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_PKU); kvm_cpu_cap_mask(CPUID_7_EDX, - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) | - F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) + F(AVX512_4VNNIW) | + F(AVX512_4FMAPS) | + F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | + F(ARCH_CAPABILITIES) | + F(INTEL_STIBP) | + F(MD_CLEAR) | + F(AVX512_VP2INTERSECT) | + F(FSRM) | + F(SERIALIZE) | + F(TSXLDTRK) | + F(AVX512_FP16) | + F(AMX_TILE) | + F(AMX_INT8) | + F(AMX_BF16) | + F(FLUSH_L1D) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ @@ -759,50 +843,110 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) | - F(CMPCCXADD) | F(FZRM) | F(FSRS) | F(FSRC) | F(AMX_FP16) | - F(AVX_IFMA) | F(LAM) + F(SHA512) | + F(SM3) | + F(SM4) | + F(AVX_VNNI) | + F(AVX512_BF16) | + F(CMPCCXADD) | + F(FZRM) | + F(FSRS) | + F(FSRC) | + F(AMX_FP16) | + F(AVX_IFMA) | + F(LAM) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) | - F(AVX_VNNI_INT16) | F(PREFETCHITI) | F(AVX10) + F(AVX_VNNI_INT8) | + F(AVX_NE_CONVERT) | + F(AMX_COMPLEX) | + F(AVX_VNNI_INT16) | + F(PREFETCHITI) | + F(AVX10) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, - F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | - F(BHI_CTRL) | F(MCDT_NO) + F(INTEL_PSFD) | + F(IPRED_CTRL) | + F(RRSBA_CTRL) | + F(DDPD_U) | + F(BHI_CTRL) | + F(MCDT_NO) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, - F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd + F(XSAVEOPT) | + F(XSAVEC) | + F(XGETBV1) | + F(XSAVES) | + f_xfd ); kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, - SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) + SF(SGX1) | + SF(SGX2) | + SF(SGX_EDECCSSA) ); kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, - F(AVX10_128) | F(AVX10_256) | F(AVX10_512) + F(AVX10_128) | + F(AVX10_256) | + F(AVX10_512) ); kvm_cpu_cap_mask(CPUID_8000_0001_ECX, - F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | - F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | - F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | - F(TOPOEXT) | 0 /* PERFCTR_CORE */ + F(LAHF_LM) | + F(CMP_LEGACY) | + 0 /*SVM*/ | + 0 /* ExtApicSpace */ | + F(CR8_LEGACY) | + F(ABM) | + F(SSE4A) | + F(MISALIGNSSE) | + F(3DNOWPREFETCH) | + F(OSVW) | + 0 /* IBS */ | + F(XOP) | + 0 /* SKINIT, WDT, LWP */ | + F(FMA4) | + F(TBM) | + F(TOPOEXT) | + 0 /* PERFCTR_CORE */ ); kvm_cpu_cap_mask(CPUID_8000_0001_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* Reserved */ | - F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | - F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | - 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) + F(FPU) | + F(VME) | + F(DE) | + F(PSE) | + F(TSC) | + F(MSR) | + F(PAE) | + F(MCE) | + F(CX8) | + F(APIC) | + 0 /* Reserved */ | + F(SYSCALL) | + F(MTRR) | + F(PGE) | + F(MCA) | + F(CMOV) | + F(PAT) | + F(PSE36) | + 0 /* Reserved */ | + F(NX) | + 0 /* Reserved */ | + F(MMXEXT) | + F(MMX) | + F(FXSR) | + F(FXSR_OPT) | + f_gbpages | + F(RDTSCP) | + 0 /* Reserved */ | + f_lm | + F(3DNOWEXT) | + F(3DNOW) ); if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) @@ -813,10 +957,18 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_8000_0008_EBX, - F(CLZERO) | F(XSAVEERPTR) | - F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - F(AMD_PSFD) | F(AMD_IBPB_RET) + F(CLZERO) | + F(XSAVEERPTR) | + F(WBNOINVD) | + F(AMD_IBPB) | + F(AMD_IBRS) | + F(AMD_SSBD) | + F(VIRT_SSBD) | + F(AMD_SSB_NO) | + F(AMD_STIBP) | + F(AMD_STIBP_ALWAYS_ON) | + F(AMD_PSFD) | + F(AMD_IBPB_RET) ); /* @@ -853,12 +1005,20 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); kvm_cpu_cap_mask(CPUID_8000_001F_EAX, - 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ | - F(SME_COHERENT)); + 0 /* SME */ | + 0 /* SEV */ | + 0 /* VM_PAGE_FLUSH */ | + 0 /* SEV_ES */ | + F(SME_COHERENT) + ); kvm_cpu_cap_mask(CPUID_8000_0021_EAX, - F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | - F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ | + F(NO_NESTED_DATA_BP) | + F(LFENCE_RDTSC) | + 0 /* SmmPgCfgLock */ | + F(NULL_SEL_CLR_BASE) | + F(AUTOIBRS) | + 0 /* PrefetchCtlMsr */ | F(WRMSR_XX_BASE_NS) ); @@ -887,9 +1047,16 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR); kvm_cpu_cap_mask(CPUID_C000_0001_EDX, - F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | - F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | - F(PMM) | F(PMM_EN) + F(XSTORE) | + F(XSTORE_EN) | + F(XCRYPT) | + F(XCRYPT_EN) | + F(ACE2) | + F(ACE2_EN) | + F(PHE) | + F(PHE_EN) | + F(PMM) | + F(PMM_EN) ); /* -- 2.51.0