From 04cd8f8628d88da7839b1643db0bef8522c39254 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:40 -0800 Subject: [PATCH 01/16] KVM: x86: Disallow KVM_CAP_X86_DISABLE_EXITS after vCPU creation Reject KVM_CAP_X86_DISABLE_EXITS if vCPUs have been created, as disabling PAUSE/MWAIT/HLT exits after vCPUs have been created is broken and useless, e.g. except for PAUSE on SVM, the relevant intercepts aren't updated after vCPU creation. vCPUs may also end up with an inconsistent configuration if exits are disabled between creation of multiple vCPUs. Cc: Hou Wenlong Link: https://lore.kernel.org/all/9227068821b275ac547eb2ede09ec65d2281fe07.1680179693.git.houwenlong.hwl@antgroup.com Link: https://lore.kernel.org/all/20230121020738.2973-2-kechenl@nvidia.com Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Reviewed-by: Binbin Wu Link: https://lore.kernel.org/r/20241128013424.4096668-14-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 1 + arch/x86/kvm/x86.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 454c2aaa155e..bbe445e6c113 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7670,6 +7670,7 @@ branch to guests' 0x200 interrupt vector. :Architectures: x86 :Parameters: args[0] defines which exits are disabled :Returns: 0 on success, -EINVAL when args[0] contains invalid exits + or if any vCPUs have already been created Valid bits in args[0] are:: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 503bfcbc1735..e3c3ceb54966 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6531,6 +6531,10 @@ split_irqchip_unlock: if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) break; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + goto disable_exits_unlock; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; @@ -6552,6 +6556,8 @@ split_irqchip_unlock: } r = 0; +disable_exits_unlock: + mutex_unlock(&kvm->lock); break; case KVM_CAP_MSR_PLATFORM_INFO: kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; -- 2.51.0 From c829ccd4d9dc4a892e7c9ae032b87ac90ace2252 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:41 -0800 Subject: [PATCH 02/16] KVM: x86: Reject disabling of MWAIT/HLT interception when not allowed Reject KVM_CAP_X86_DISABLE_EXITS if userspace attempts to disable MWAIT or HLT exits and KVM previously reported (via KVM_CHECK_EXTENSION) that disabling the exit(s) is not allowed. E.g. because MWAIT isn't supported or the CPU doesn't have an always-running APIC timer, or because KVM is configured to mitigate cross-thread vulnerabilities. Cc: Kechen Lu Fixes: 4d5422cea3b6 ("KVM: X86: Provide a capability to disable MWAIT intercepts") Fixes: 6f0f2d5ef895 ("KVM: x86: Mitigate the cross-thread return address predictions bug") Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-15-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 54 ++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3c3ceb54966..c627df92ce64 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4531,6 +4531,20 @@ static inline bool kvm_can_mwait_in_guest(void) boot_cpu_has(X86_FEATURE_ARAT); } +static u64 kvm_get_allowed_disable_exits(void) +{ + u64 r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } + return r; +} + #ifdef CONFIG_KVM_HYPERV static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 __user *cpuid_arg) @@ -4673,15 +4687,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r = KVM_X86_DISABLE_EXITS_PAUSE; - - if (!mitigate_smt_rsb) { - r |= KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_CSTATE; - - if (kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; - } + r = kvm_get_allowed_disable_exits(); break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) @@ -6528,33 +6534,29 @@ split_irqchip_unlock: break; case KVM_CAP_X86_DISABLE_EXITS: r = -EINVAL; - if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + if (cap->args[0] & ~kvm_get_allowed_disable_exits()) break; mutex_lock(&kvm->lock); if (kvm->created_vcpus) goto disable_exits_unlock; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) - kvm->arch.pause_in_guest = true; - #define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." - if (!mitigate_smt_rsb) { - if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && - (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) - pr_warn_once(SMT_RSB_MSG); - - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; - } + if (!mitigate_smt_rsb && boot_cpu_has_bug(X86_BUG_SMT_RSB) && + cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; r = 0; disable_exits_unlock: mutex_unlock(&kvm->lock); -- 2.51.0 From af5366bea2cb9dfb5da2880e1dff544f87505300 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:42 -0800 Subject: [PATCH 03/16] KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS Drop the KVM_X86_DISABLE_VALID_EXITS definition, as it is misleading, and unused in KVM *because* it is misleading. The set of exits that can be disabled is dynamic, i.e. userspace (and KVM) must check KVM's actual capabilities. Suggested-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-16-seanjc@google.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 343de0a51797..45e6d8fca9b9 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { -- 2.51.0 From 7b2658cb33c744ca41358ada2421a86774914764 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:43 -0800 Subject: [PATCH 04/16] KVM: selftests: Fix a bad TEST_REQUIRE() in x86's KVM PV test Actually check for KVM support for disabling HLT-exiting instead of effectively checking that KVM_CAP_X86_DISABLE_EXITS is #defined to a non-zero value, and convert the TEST_REQUIRE() to a simple return so that only the sub-test is skipped if HLT-exiting is mandatory. The goof has likely gone unnoticed because all x86 CPUs support disabling HLT-exiting, only systems with the opt-in mitigate_smt_rsb KVM module param disallow HLT-exiting. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-17-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 78878b3a2725..2aee93108a54 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -140,9 +140,10 @@ static void test_pv_unhalt(void) struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; - pr_info("testing KVM_FEATURE_PV_UNHALT\n"); + if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) + return; - TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS); + pr_info("testing KVM_FEATURE_PV_UNHALT\n"); /* KVM_PV_UNHALT test */ vm = vm_create_with_one_vcpu(&vcpu, guest_main); -- 2.51.0 From 59cb3acdb316130c7247a3d3a20d7d6e75e2896a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:44 -0800 Subject: [PATCH 05/16] KVM: selftests: Update x86's KVM PV test to match KVM's disabling exits behavior Rework x86's KVM PV features test to align with KVM's new, fixed behavior of not allowing userspace to disable HLT-exiting after vCPUs have been created. Rework the core testcase to disable HLT-exiting before creating a vCPU, and opportunistically modify keep the paired VM+vCPU creation to verify that KVM rejects KVM_CAP_X86_DISABLE_EXITS as expected. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-18-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/kvm_pv_test.c | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 2aee93108a54..1b805cbdb47b 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -139,6 +139,7 @@ static void test_pv_unhalt(void) struct kvm_vm *vm; struct kvm_cpuid_entry2 *ent; u32 kvm_sig_old; + int r; if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT)) return; @@ -152,19 +153,45 @@ static void test_pv_unhalt(void) TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect"); - /* Make sure KVM clears vcpu->arch.kvm_cpuid */ + /* Verify KVM disallows disabling exits after vCPU creation. */ + r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + TEST_ASSERT(r && errno == EINVAL, + "Disabling exits after vCPU creation didn't fail as expected"); + + kvm_vm_free(vm); + + /* Verify that KVM clear PV_UNHALT from guest CPUID. */ + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "vCPU created with PV_UNHALT set by default"); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); + + /* + * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT + * when KVM PV is not present, and DOES clear PV_UNHALT when switching + * back to the correct signature.. + */ ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); kvm_sig_old = ent->ebx; ent->ebx = 0xdeadbeef; vcpu_set_cpuid(vcpu); - vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT); + TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), + "PV_UNHALT cleared when using bogus KVM PV signature"); + ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE); ent->ebx = kvm_sig_old; vcpu_set_cpuid(vcpu); TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT), - "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS"); + "PV_UNHALT set in guest CPUID when HLT-exiting is disabled"); /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */ -- 2.51.0 From 01d1059d635a101a21f145284e8023b0ffa5f7ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:45 -0800 Subject: [PATCH 06/16] KVM: x86: Zero out PV features cache when the CPUID leaf is not present Clear KVM's PV feature cache prior when processing a new guest CPUID so that KVM doesn't keep a stale cache entry if userspace does KVM_SET_CPUID2 multiple times, once with a PV features entry, and a second time without. Fixes: 66570e966dd9 ("kvm: x86: only provide PV features if enabled in guest's CPUID") Cc: Oliver Upton Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-19-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 626a243febc9..1157357f499a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -272,6 +272,8 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); + vcpu->arch.pv_cpuid.features = 0; + /* * save the feature bitmap to avoid cpuid lookup for every PV * operation -- 2.51.0 From f21958e328a9e5813562bfb822e674833a3ca36b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:46 -0800 Subject: [PATCH 07/16] KVM: x86: Don't update PV features caches when enabling enforcement capability Revert the chunk of commit 01b4f510b9f4 ("kvm: x86: ensure pv_cpuid.features is initialized when enabling cap") that forced a PV features cache refresh during KVM_CAP_ENFORCE_PV_FEATURE_CPUID, as whatever ioctl() ordering issue it alleged to have fixed never existed upstream, and likely never existed in any kernel. At the time of the commit, there was a tangentially related ioctl() ordering issue, as toggling KVM_X86_DISABLE_EXITS_HLT after KVM_SET_CPUID2 would have resulted in KVM potentially leaving KVM_FEATURE_PV_UNHALT set. But (a) that bug affected the entire guest CPUID, not just the cache, (b) commit 01b4f510b9f4 didn't address that bug, it only refreshed the cache (with the bad CPUID), and (c) setting KVM_X86_DISABLE_EXITS_HLT after vCPU creation is completely broken as KVM configures HLT-exiting only during vCPU creation, which is why KVM_CAP_X86_DISABLE_EXITS is now disallowed if vCPUs have been created. Another tangentially related bug was KVM's failure to clear the cache when handling KVM_SET_CPUID2, but again commit 01b4f510b9f4 did nothing to fix that bug. The most plausible explanation for the what commit 01b4f510b9f4 was trying to fix is a bug that existed in Google's internal kernel that was the source of commit 01b4f510b9f4. At the time, Google's internal kernel had not yet picked up commit 0d3b2ba16ba68 ("KVM: X86: Go on updating other CPUID leaves when leaf 1 is absent"), i.e. KVM would not initialize the PV features cache if KVM_SET_CPUID2 was called without a CPUID.0x1 entry. Of course, no sane real world VMM would omit CPUID.0x1, including the KVM selftest added by commit ac4a4d6de22e ("selftests: kvm: test enforcement of paravirtual cpuid features"). And the test didn't actually try to verify multiple orderings, nor did the selftest enter the guest without doing KVM_SET_CPUID2, so who knows what motivated the change. Regardless of why commit 01b4f510b9f4 ("kvm: x86: ensure pv_cpuid.features is initialized when enabling cap") was added, refreshing the cache during KVM_CAP_ENFORCE_PV_FEATURE_CPUID isn't necessary. Cc: Oliver Upton Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-20-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/cpuid.h | 1 - arch/x86/kvm/x86.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1157357f499a..c1a8d5744690 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -268,7 +268,7 @@ static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcp vcpu->arch.cpuid_nent, base); } -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +static void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da47dbf5d59b..e76a1f008faf 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -12,7 +12,6 @@ void kvm_set_cpu_caps(void); void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); -void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu, u32 function, u32 index); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c627df92ce64..adcaa033c5d3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5814,9 +5814,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; - if (vcpu->arch.pv_cpuid.enforce) - kvm_update_pv_runtime(vcpu); - return 0; default: return -EINVAL; -- 2.51.0 From 6416b0fb1660eb8bb73dc35dd5beb844646cb603 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:47 -0800 Subject: [PATCH 08/16] KVM: x86: Do reverse CPUID sanity checks in __feature_leaf() Do the compile-time sanity checks on reverse_cpuid in __feature_leaf() so that higher level APIs don't need to "manually" perform the sanity checks. No functional change intended. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-21-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.h | 3 --- arch/x86/kvm/reverse_cpuid.h | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index e76a1f008faf..99d4f6245610 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -180,7 +180,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature); } @@ -188,7 +187,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature); } @@ -196,7 +194,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature) { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature); } diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index e46220ece83c..1d2db9d529ff 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -145,7 +145,10 @@ static __always_inline u32 __feature_translate(int x86_feature) static __always_inline u32 __feature_leaf(int x86_feature) { - return __feature_translate(x86_feature) / 32; + u32 x86_leaf = __feature_translate(x86_feature) / 32; + + reverse_cpuid_check(x86_leaf); + return x86_leaf; } /* @@ -168,7 +171,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned int x86_featu { unsigned int x86_leaf = __feature_leaf(x86_feature); - reverse_cpuid_check(x86_leaf); return reverse_cpuid[x86_leaf]; } -- 2.51.0 From 96cbc766baf05daf5dbcfd17c605d821f10170be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:48 -0800 Subject: [PATCH 09/16] KVM: x86: Account for max supported CPUID leaf when getting raw host CPUID Explicitly zero out the feature word in kvm_cpu_caps if the word's associated CPUID function is greater than the max leaf supported by the CPU. For such unsupported functions, Intel CPUs return the output from the last supported leaf, not all zeros. Practically speaking, this is likely a benign bug, as KVM uses the raw host CPUID to mask the kernel's computed capabilities, and the kernel does perform max leaf checks when populating boot_cpu_data. The only way KVM's goof could be problematic is if the kernel force-set a feature in a leaf that is completely unsupported, _and_ the max supported leaf happened to return a value with '1' the same bit position. Which is theoretically possible, but extremely unlikely. And even if that did happen, it's entirely possible that KVM would still provide the correct functionality; the kernel did set the capability after all. Reviewed-by: Maxim Levitsky Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-22-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c1a8d5744690..8fc07af8f72d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -600,18 +600,37 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, return 0; } -/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ -static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) { - const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); struct kvm_cpuid_entry2 entry; + u32 base; - reverse_cpuid_check(leaf); + /* + * KVM only supports features defined by Intel (0x0), AMD (0x80000000), + * and Centaur (0xc0000000). WARN if a feature for new vendor base is + * defined, as this and other code would need to be updated. + */ + base = cpuid.function & 0xffff0000; + if (WARN_ON_ONCE(base && base != 0x80000000 && base != 0xc0000000)) + return 0; + + if (cpuid_eax(base) < cpuid.function) + return 0; cpuid_count(cpuid.function, cpuid.index, &entry.eax, &entry.ebx, &entry.ecx, &entry.edx); - kvm_cpu_caps[leaf] &= *__cpuid_entry_get_reg(&entry, cpuid.reg); + return *__cpuid_entry_get_reg(&entry, cpuid.reg); +} + +/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ +static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +{ + const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); + + reverse_cpuid_check(leaf); + + kvm_cpu_caps[leaf] &= raw_cpuid_get(cpuid); } static __always_inline -- 2.51.0 From ccf93de484a33f8fe943734f3eed0f05004a48e9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:49 -0800 Subject: [PATCH 10/16] KVM: x86: Unpack F() CPUID feature flag macros to one flag per line of code Refactor kvm_set_cpu_caps() to express each supported (or not) feature flag on a separate line, modulo a handful of cases where KVM does not, and likely will not, support a sequence of flags. This will allow adding fancier macros with longer, more descriptive names without resulting in absurd line lengths and/or weird code. Isolating each flag also makes it far easier to review changes, reduces code conflicts, and generally makes it easier to resolve conflicts. Lastly, it allows co-locating comments for notable flags, e.g. MONITOR, precisely with the relevant flag. No functional change intended. Suggested-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-23-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 295 +++++++++++++++++++++++++++++++++---------- 1 file changed, 231 insertions(+), 64 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8fc07af8f72d..18c29ca5aa4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -683,48 +683,121 @@ void kvm_set_cpu_caps(void) sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); kvm_cpu_cap_mask(CPUID_1_ECX, + F(XMM3) | + F(PCLMULQDQ) | + 0 /* DTES64 */ | /* * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not* * advertised to guests via CPUID! */ - F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | + 0 /* MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | - 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) | - F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | - F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | - F(F16C) | F(RDRAND) + 0 /* TM2 */ | + F(SSSE3) | + 0 /* CNXT-ID */ | + 0 /* Reserved */ | + F(FMA) | + F(CX16) | + 0 /* xTPR Update */ | + F(PDCM) | + F(PCID) | + 0 /* Reserved, DCA */ | + F(XMM4_1) | + F(XMM4_2) | + F(X2APIC) | + F(MOVBE) | + F(POPCNT) | + 0 /* Reserved*/ | + F(AES) | + F(XSAVE) | + 0 /* OSXSAVE */ | + F(AVX) | + F(F16C) | + F(RDRAND) ); /* KVM emulates x2apic in software irrespective of host support. */ kvm_cpu_cap_set(X86_FEATURE_X2APIC); kvm_cpu_cap_mask(CPUID_1_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | - 0 /* Reserved, DS, ACPI */ | F(MMX) | - F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | + F(FPU) | + F(VME) | + F(DE) | + F(PSE) | + F(TSC) | + F(MSR) | + F(PAE) | + F(MCE) | + F(CX8) | + F(APIC) | + 0 /* Reserved */ | + F(SEP) | + F(MTRR) | + F(PGE) | + F(MCA) | + F(CMOV) | + F(PAT) | + F(PSE36) | + 0 /* PSN */ | + F(CLFLUSH) | + 0 /* Reserved, DS, ACPI */ | + F(MMX) | + F(FXSR) | + F(XMM) | + F(XMM2) | + F(SELFSNOOP) | 0 /* HTT, TM, Reserved, PBE */ ); kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | - F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) | - F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) | - F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | - F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) | + F(FSGSBASE) | + F(SGX) | + F(BMI1) | + F(HLE) | + F(AVX2) | + F(FDP_EXCPTN_ONLY) | + F(SMEP) | + F(BMI2) | + F(ERMS) | + F(INVPCID) | + F(RTM) | + F(ZERO_FCS_FDS) | + 0 /*MPX*/ | + F(AVX512F) | + F(AVX512DQ) | + F(RDSEED) | + F(ADX) | + F(SMAP) | + F(AVX512IFMA) | + F(CLFLUSHOPT) | + F(CLWB) | + 0 /*INTEL_PT*/ | + F(AVX512PF) | + F(AVX512ER) | + F(AVX512CD) | + F(SHA_NI) | + F(AVX512BW) | F(AVX512VL)); kvm_cpu_cap_mask(CPUID_7_ECX, - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | - F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | - F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) | F(BUS_LOCK_DETECT) + F(AVX512VBMI) | + F(LA57) | + F(PKU) | + 0 /*OSPKE*/ | + F(RDPID) | + F(AVX512_VPOPCNTDQ) | + F(UMIP) | + F(AVX512_VBMI2) | + F(GFNI) | + F(VAES) | + F(VPCLMULQDQ) | + F(AVX512_VNNI) | + F(AVX512_BITALG) | + F(CLDEMOTE) | + F(MOVDIRI) | + F(MOVDIR64B) | + 0 /*WAITPKG*/ | + F(SGX_LC) | + F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & feature_bit(LA57)) @@ -738,11 +811,22 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_PKU); kvm_cpu_cap_mask(CPUID_7_EDX, - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | - F(SERIALIZE) | F(TSXLDTRK) | F(AVX512_FP16) | - F(AMX_TILE) | F(AMX_INT8) | F(AMX_BF16) | F(FLUSH_L1D) + F(AVX512_4VNNIW) | + F(AVX512_4FMAPS) | + F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | + F(ARCH_CAPABILITIES) | + F(INTEL_STIBP) | + F(MD_CLEAR) | + F(AVX512_VP2INTERSECT) | + F(FSRM) | + F(SERIALIZE) | + F(TSXLDTRK) | + F(AVX512_FP16) | + F(AMX_TILE) | + F(AMX_INT8) | + F(AMX_BF16) | + F(FLUSH_L1D) ); /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ @@ -759,50 +843,110 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) | - F(CMPCCXADD) | F(FZRM) | F(FSRS) | F(FSRC) | F(AMX_FP16) | - F(AVX_IFMA) | F(LAM) + F(SHA512) | + F(SM3) | + F(SM4) | + F(AVX_VNNI) | + F(AVX512_BF16) | + F(CMPCCXADD) | + F(FZRM) | + F(FSRS) | + F(FSRC) | + F(AMX_FP16) | + F(AVX_IFMA) | + F(LAM) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) | - F(AVX_VNNI_INT16) | F(PREFETCHITI) | F(AVX10) + F(AVX_VNNI_INT8) | + F(AVX_NE_CONVERT) | + F(AMX_COMPLEX) | + F(AVX_VNNI_INT16) | + F(PREFETCHITI) | + F(AVX10) ); kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, - F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | F(DDPD_U) | - F(BHI_CTRL) | F(MCDT_NO) + F(INTEL_PSFD) | + F(IPRED_CTRL) | + F(RRSBA_CTRL) | + F(DDPD_U) | + F(BHI_CTRL) | + F(MCDT_NO) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, - F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd + F(XSAVEOPT) | + F(XSAVEC) | + F(XGETBV1) | + F(XSAVES) | + f_xfd ); kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, - SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) + SF(SGX1) | + SF(SGX2) | + SF(SGX_EDECCSSA) ); kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, - F(AVX10_128) | F(AVX10_256) | F(AVX10_512) + F(AVX10_128) | + F(AVX10_256) | + F(AVX10_512) ); kvm_cpu_cap_mask(CPUID_8000_0001_ECX, - F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | - F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | - F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | - F(TOPOEXT) | 0 /* PERFCTR_CORE */ + F(LAHF_LM) | + F(CMP_LEGACY) | + 0 /*SVM*/ | + 0 /* ExtApicSpace */ | + F(CR8_LEGACY) | + F(ABM) | + F(SSE4A) | + F(MISALIGNSSE) | + F(3DNOWPREFETCH) | + F(OSVW) | + 0 /* IBS */ | + F(XOP) | + 0 /* SKINIT, WDT, LWP */ | + F(FMA4) | + F(TBM) | + F(TOPOEXT) | + 0 /* PERFCTR_CORE */ ); kvm_cpu_cap_mask(CPUID_8000_0001_EDX, - F(FPU) | F(VME) | F(DE) | F(PSE) | - F(TSC) | F(MSR) | F(PAE) | F(MCE) | - F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | - F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* Reserved */ | - F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | - F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | - 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) + F(FPU) | + F(VME) | + F(DE) | + F(PSE) | + F(TSC) | + F(MSR) | + F(PAE) | + F(MCE) | + F(CX8) | + F(APIC) | + 0 /* Reserved */ | + F(SYSCALL) | + F(MTRR) | + F(PGE) | + F(MCA) | + F(CMOV) | + F(PAT) | + F(PSE36) | + 0 /* Reserved */ | + F(NX) | + 0 /* Reserved */ | + F(MMXEXT) | + F(MMX) | + F(FXSR) | + F(FXSR_OPT) | + f_gbpages | + F(RDTSCP) | + 0 /* Reserved */ | + f_lm | + F(3DNOWEXT) | + F(3DNOW) ); if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) @@ -813,10 +957,18 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_8000_0008_EBX, - F(CLZERO) | F(XSAVEERPTR) | - F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - F(AMD_PSFD) | F(AMD_IBPB_RET) + F(CLZERO) | + F(XSAVEERPTR) | + F(WBNOINVD) | + F(AMD_IBPB) | + F(AMD_IBRS) | + F(AMD_SSBD) | + F(VIRT_SSBD) | + F(AMD_SSB_NO) | + F(AMD_STIBP) | + F(AMD_STIBP_ALWAYS_ON) | + F(AMD_PSFD) | + F(AMD_IBPB_RET) ); /* @@ -853,12 +1005,20 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); kvm_cpu_cap_mask(CPUID_8000_001F_EAX, - 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ | - F(SME_COHERENT)); + 0 /* SME */ | + 0 /* SEV */ | + 0 /* VM_PAGE_FLUSH */ | + 0 /* SEV_ES */ | + F(SME_COHERENT) + ); kvm_cpu_cap_mask(CPUID_8000_0021_EAX, - F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | - F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ | + F(NO_NESTED_DATA_BP) | + F(LFENCE_RDTSC) | + 0 /* SmmPgCfgLock */ | + F(NULL_SEL_CLR_BASE) | + F(AUTOIBRS) | + 0 /* PrefetchCtlMsr */ | F(WRMSR_XX_BASE_NS) ); @@ -887,9 +1047,16 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR); kvm_cpu_cap_mask(CPUID_C000_0001_EDX, - F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | - F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | - F(PMM) | F(PMM_EN) + F(XSTORE) | + F(XSTORE_EN) | + F(XCRYPT) | + F(XCRYPT_EN) | + F(ACE2) | + F(ACE2_EN) | + F(PHE) | + F(PHE_EN) | + F(PMM) | + F(PMM_EN) ); /* -- 2.51.0 From 3cc359ca29adadb94f4551b0cf40bb2352c28361 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:50 -0800 Subject: [PATCH 11/16] KVM: x86: Rename kvm_cpu_cap_mask() to kvm_cpu_cap_init() Rename kvm_cpu_cap_mask() to kvm_cpu_cap_init() in anticipation of merging it with kvm_cpu_cap_init_kvm_defined(), and in anticipation of _setting_ bits in the helper (a future commit will play macro games to set emulated feature flags via kvm_cpu_cap_init()). No functional change intended. Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20241128013424.4096668-24-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 18c29ca5aa4f..5546ec572392 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -636,7 +636,7 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) static __always_inline void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ + /* Use kvm_cpu_cap_init for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -644,7 +644,7 @@ void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) __kvm_cpu_cap_mask(leaf); } -static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) +static __always_inline void kvm_cpu_cap_init(enum cpuid_leafs leaf, u32 mask) { /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); @@ -682,7 +682,7 @@ void kvm_set_cpu_caps(void) memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability, sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps))); - kvm_cpu_cap_mask(CPUID_1_ECX, + kvm_cpu_cap_init(CPUID_1_ECX, F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64 */ | @@ -718,7 +718,7 @@ void kvm_set_cpu_caps(void) /* KVM emulates x2apic in software irrespective of host support. */ kvm_cpu_cap_set(X86_FEATURE_X2APIC); - kvm_cpu_cap_mask(CPUID_1_EDX, + kvm_cpu_cap_init(CPUID_1_EDX, F(FPU) | F(VME) | F(DE) | @@ -748,7 +748,7 @@ void kvm_set_cpu_caps(void) 0 /* HTT, TM, Reserved, PBE */ ); - kvm_cpu_cap_mask(CPUID_7_0_EBX, + kvm_cpu_cap_init(CPUID_7_0_EBX, F(FSGSBASE) | F(SGX) | F(BMI1) | @@ -778,7 +778,7 @@ void kvm_set_cpu_caps(void) F(AVX512BW) | F(AVX512VL)); - kvm_cpu_cap_mask(CPUID_7_ECX, + kvm_cpu_cap_init(CPUID_7_ECX, F(AVX512VBMI) | F(LA57) | F(PKU) | @@ -810,7 +810,7 @@ void kvm_set_cpu_caps(void) if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) kvm_cpu_cap_clear(X86_FEATURE_PKU); - kvm_cpu_cap_mask(CPUID_7_EDX, + kvm_cpu_cap_init(CPUID_7_EDX, F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | @@ -842,7 +842,7 @@ void kvm_set_cpu_caps(void) if (boot_cpu_has(X86_FEATURE_AMD_SSBD)) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); - kvm_cpu_cap_mask(CPUID_7_1_EAX, + kvm_cpu_cap_init(CPUID_7_1_EAX, F(SHA512) | F(SM3) | F(SM4) | @@ -875,7 +875,7 @@ void kvm_set_cpu_caps(void) F(MCDT_NO) ); - kvm_cpu_cap_mask(CPUID_D_1_EAX, + kvm_cpu_cap_init(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | @@ -895,7 +895,7 @@ void kvm_set_cpu_caps(void) F(AVX10_512) ); - kvm_cpu_cap_mask(CPUID_8000_0001_ECX, + kvm_cpu_cap_init(CPUID_8000_0001_ECX, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | @@ -915,7 +915,7 @@ void kvm_set_cpu_caps(void) 0 /* PERFCTR_CORE */ ); - kvm_cpu_cap_mask(CPUID_8000_0001_EDX, + kvm_cpu_cap_init(CPUID_8000_0001_EDX, F(FPU) | F(VME) | F(DE) | @@ -956,7 +956,7 @@ void kvm_set_cpu_caps(void) SF(CONSTANT_TSC) ); - kvm_cpu_cap_mask(CPUID_8000_0008_EBX, + kvm_cpu_cap_init(CPUID_8000_0008_EBX, F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | @@ -1002,9 +1002,9 @@ void kvm_set_cpu_caps(void) * Hide all SVM features by default, SVM will set the cap bits for * features it emulates and/or exposes for L1. */ - kvm_cpu_cap_mask(CPUID_8000_000A_EDX, 0); + kvm_cpu_cap_init(CPUID_8000_000A_EDX, 0); - kvm_cpu_cap_mask(CPUID_8000_001F_EAX, + kvm_cpu_cap_init(CPUID_8000_001F_EAX, 0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | @@ -1012,7 +1012,7 @@ void kvm_set_cpu_caps(void) F(SME_COHERENT) ); - kvm_cpu_cap_mask(CPUID_8000_0021_EAX, + kvm_cpu_cap_init(CPUID_8000_0021_EAX, F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ | @@ -1036,7 +1036,7 @@ void kvm_set_cpu_caps(void) * kernel. LFENCE_RDTSC was a Linux-defined synthetic feature long * before AMD joined the bandwagon, e.g. LFENCE is serializing on most * CPUs that support SSE2. On CPUs that don't support AMD's leaf, - * kvm_cpu_cap_mask() will unfortunately drop the flag due to ANDing + * kvm_cpu_cap_init() will unfortunately drop the flag due to ANDing * the mask with the raw host CPUID, and reporting support in AMD's * leaf can make it easier for userspace to detect the feature. */ @@ -1046,7 +1046,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_NULL_SEL_CLR_BASE); kvm_cpu_cap_set(X86_FEATURE_NO_SMM_CTL_MSR); - kvm_cpu_cap_mask(CPUID_C000_0001_EDX, + kvm_cpu_cap_init(CPUID_C000_0001_EDX, F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | -- 2.51.0 From 6eac4d99a9677a35947aa115bbc60266def40c3e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:51 -0800 Subject: [PATCH 12/16] KVM: x86: Add a macro to init CPUID features that are 64-bit only Add a macro to mask-in feature flags that are supported only on 64-bit kernels/KVM. In addition to reducing overall #ifdeffery, using a macro will allow hardening the kvm_cpu_cap initialization sequences to assert that the features being advertised are indeed included in the word being initialized. And arguably using *F() macros through is more readable. No functional change intended. Reviewed-by: Maxim Levitsky Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20241128013424.4096668-25-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5546ec572392..bd1d54e25414 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -663,17 +663,14 @@ static __always_inline void kvm_cpu_cap_init(enum cpuid_leafs leaf, u32 mask) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \ }) +/* Features that KVM supports only on 64-bit kernels. */ +#define X86_64_F(name) \ +({ \ + (IS_ENABLED(CONFIG_X86_64) ? F(name) : 0); \ +}) + void kvm_set_cpu_caps(void) { -#ifdef CONFIG_X86_64 - unsigned int f_gbpages = F(GBPAGES); - unsigned int f_lm = F(LM); - unsigned int f_xfd = F(XFD); -#else - unsigned int f_gbpages = 0; - unsigned int f_lm = 0; - unsigned int f_xfd = 0; -#endif memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); BUILD_BUG_ON(sizeof(kvm_cpu_caps) - (NKVMCAPINTS * sizeof(*kvm_cpu_caps)) > @@ -880,7 +877,7 @@ void kvm_set_cpu_caps(void) F(XSAVEC) | F(XGETBV1) | F(XSAVES) | - f_xfd + X86_64_F(XFD) ); kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, @@ -941,10 +938,10 @@ void kvm_set_cpu_caps(void) F(MMX) | F(FXSR) | F(FXSR_OPT) | - f_gbpages | + X86_64_F(GBPAGES) | F(RDTSCP) | 0 /* Reserved */ | - f_lm | + X86_64_F(LM) | F(3DNOWEXT) | F(3DNOW) ); @@ -1078,6 +1075,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); #undef F #undef SF +#undef X86_64_F struct kvm_cpuid_array { struct kvm_cpuid_entry2 *entries; -- 2.51.0 From 264969b48a295e3fd89f01d3584a9f3cf6b47eb1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:52 -0800 Subject: [PATCH 13/16] KVM: x86: Add a macro to precisely handle aliased 0x1.EDX CPUID features Add a macro to precisely handle CPUID features that AMD duplicated from CPUID.0x1.EDX into CPUID.0x8000_0001.EDX. This will allow adding an assert that all features passed to kvm_cpu_cap_init() match the word being processed, e.g. to prevent passing a feature from CPUID 0x7 to CPUID 0x1. Because the kernel simply reuses the X86_FEATURE_* definitions from CPUID.0x1.EDX, KVM's use of the aliased features would result in false positives from such an assert. No functional change intended. Link: https://lore.kernel.org/r/20241128013424.4096668-26-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 47 +++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bd1d54e25414..11cd176a26f8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -669,6 +669,16 @@ static __always_inline void kvm_cpu_cap_init(enum cpuid_leafs leaf, u32 mask) (IS_ENABLED(CONFIG_X86_64) ? F(name) : 0); \ }) +/* + * Aliased Features - For features in 0x8000_0001.EDX that are duplicates of + * identical 0x1.EDX features, and thus are aliased from 0x1 to 0x8000_0001. + */ +#define ALIASED_1_EDX_F(name) \ +({ \ + BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \ + feature_bit(name); \ +}) + void kvm_set_cpu_caps(void) { memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); @@ -913,30 +923,30 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_init(CPUID_8000_0001_EDX, - F(FPU) | - F(VME) | - F(DE) | - F(PSE) | - F(TSC) | - F(MSR) | - F(PAE) | - F(MCE) | - F(CX8) | - F(APIC) | + ALIASED_1_EDX_F(FPU) | + ALIASED_1_EDX_F(VME) | + ALIASED_1_EDX_F(DE) | + ALIASED_1_EDX_F(PSE) | + ALIASED_1_EDX_F(TSC) | + ALIASED_1_EDX_F(MSR) | + ALIASED_1_EDX_F(PAE) | + ALIASED_1_EDX_F(MCE) | + ALIASED_1_EDX_F(CX8) | + ALIASED_1_EDX_F(APIC) | 0 /* Reserved */ | F(SYSCALL) | - F(MTRR) | - F(PGE) | - F(MCA) | - F(CMOV) | - F(PAT) | - F(PSE36) | + ALIASED_1_EDX_F(MTRR) | + ALIASED_1_EDX_F(PGE) | + ALIASED_1_EDX_F(MCA) | + ALIASED_1_EDX_F(CMOV) | + ALIASED_1_EDX_F(PAT) | + ALIASED_1_EDX_F(PSE36) | 0 /* Reserved */ | F(NX) | 0 /* Reserved */ | F(MMXEXT) | - F(MMX) | - F(FXSR) | + ALIASED_1_EDX_F(MMX) | + ALIASED_1_EDX_F(FXSR) | F(FXSR_OPT) | X86_64_F(GBPAGES) | F(RDTSCP) | @@ -1076,6 +1086,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); #undef F #undef SF #undef X86_64_F +#undef ALIASED_1_EDX_F struct kvm_cpuid_array { struct kvm_cpuid_entry2 *entries; -- 2.51.0 From 46505c0f69f99cc8cf0b50842a35a49200db5144 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:53 -0800 Subject: [PATCH 14/16] KVM: x86: Handle kernel- and KVM-defined CPUID words in a single helper Merge kvm_cpu_cap_init() and kvm_cpu_cap_init_kvm_defined() into a single helper. The only advantage of separating the two was to make it somewhat obvious that KVM directly initializes the KVM-defined words, whereas using a common helper will allow for hardening both kernel- and KVM-defined CPUID words without needing copy+paste. No functional change intended. Link: https://lore.kernel.org/r/20241128013424.4096668-27-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 46 +++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 11cd176a26f8..2cd06155684e 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -623,37 +623,23 @@ static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) return *__cpuid_entry_get_reg(&entry, cpuid.reg); } -/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */ -static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) +static __always_inline void kvm_cpu_cap_init(u32 leaf, u32 mask) { const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); - reverse_cpuid_check(leaf); + /* + * For kernel-defined leafs, mask the boot CPU's pre-populated value. + * For KVM-defined leafs, explicitly set the leaf, as KVM is the one + * and only authority. + */ + if (leaf < NCAPINTS) + kvm_cpu_caps[leaf] &= mask; + else + kvm_cpu_caps[leaf] = mask; kvm_cpu_caps[leaf] &= raw_cpuid_get(cpuid); } -static __always_inline -void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) -{ - /* Use kvm_cpu_cap_init for leafs that aren't KVM-only. */ - BUILD_BUG_ON(leaf < NCAPINTS); - - kvm_cpu_caps[leaf] = mask; - - __kvm_cpu_cap_mask(leaf); -} - -static __always_inline void kvm_cpu_cap_init(enum cpuid_leafs leaf, u32 mask) -{ - /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ - BUILD_BUG_ON(leaf >= NCAPINTS); - - kvm_cpu_caps[leaf] &= mask; - - __kvm_cpu_cap_mask(leaf); -} - #define F feature_bit /* Scattered Flag - For features that are scattered by cpufeatures.h. */ @@ -864,7 +850,7 @@ void kvm_set_cpu_caps(void) F(LAM) ); - kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, + kvm_cpu_cap_init(CPUID_7_1_EDX, F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(AMX_COMPLEX) | @@ -873,7 +859,7 @@ void kvm_set_cpu_caps(void) F(AVX10) ); - kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + kvm_cpu_cap_init(CPUID_7_2_EDX, F(INTEL_PSFD) | F(IPRED_CTRL) | F(RRSBA_CTRL) | @@ -890,13 +876,13 @@ void kvm_set_cpu_caps(void) X86_64_F(XFD) ); - kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, + kvm_cpu_cap_init(CPUID_12_EAX, SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); - kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX, + kvm_cpu_cap_init(CPUID_24_0_EBX, F(AVX10_128) | F(AVX10_256) | F(AVX10_512) @@ -959,7 +945,7 @@ void kvm_set_cpu_caps(void) if (!tdp_enabled && IS_ENABLED(CONFIG_X86_64)) kvm_cpu_cap_set(X86_FEATURE_GBPAGES); - kvm_cpu_cap_init_kvm_defined(CPUID_8000_0007_EDX, + kvm_cpu_cap_init(CPUID_8000_0007_EDX, SF(CONSTANT_TSC) ); @@ -1033,7 +1019,7 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE); kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO); - kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX, + kvm_cpu_cap_init(CPUID_8000_0022_EAX, F(PERFMON_V2) ); -- 2.51.0 From 8d862c270bf14cb3e63ca84a9a51be77c9fa4e2a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:54 -0800 Subject: [PATCH 15/16] KVM: x86: #undef SPEC_CTRL_SSBD in cpuid.c to avoid macro collisions Undefine SPEC_CTRL_SSBD, which is #defined by msr-index.h to represent the enable flag in MSR_IA32_SPEC_CTRL, to avoid issues with the macro being unpacked into its raw value when passed to KVM's F() macro. This will allow using multiple layers of macros in F() and friends, e.g. to harden against incorrect usage of F(). No functional change intended (cpuid.c doesn't consume SPEC_CTRL_SSBD). Link: https://lore.kernel.org/r/20241128013424.4096668-28-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2cd06155684e..cebb7314b9be 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -665,6 +665,12 @@ static __always_inline void kvm_cpu_cap_init(u32 leaf, u32 mask) feature_bit(name); \ }) +/* + * Undefine the MSR bit macro to avoid token concatenation issues when + * processing X86_FEATURE_SPEC_CTRL_SSBD. + */ +#undef SPEC_CTRL_SSBD + void kvm_set_cpu_caps(void) { memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps)); -- 2.51.0 From 3d142340d717f5e246f65769bc1d211b62d03677 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 17:33:55 -0800 Subject: [PATCH 16/16] KVM: x86: Harden CPU capabilities processing against out-of-scope features Add compile-time assertions to verify that usage of F() and friends in kvm_set_cpu_caps() is scoped to the correct CPUID word, e.g. to detect bugs where KVM passes a feature bit from word X into word y. Add a one-off assertion in the aliased feature macro to ensure that only word 0x8000_0001.EDX aliased the features defined for 0x1.EDX. To do so, convert kvm_cpu_cap_init() to a macro and have it define a local variable to track which CPUID word is being initialized that is then used to validate usage of F() (all of the inputs are compile-time constants and thus can be fed into BUILD_BUG_ON()). Redefine KVM_VALIDATE_CPU_CAP_USAGE after kvm_set_cpu_caps() to be a nop so that F() can be used in other flows that aren't as easily hardened, e.g. __do_cpuid_func_emulated() and __do_cpuid_func(). Invoke KVM_VALIDATE_CPU_CAP_USAGE() in SF() and X86_64_F() to ensure the validation occurs, e.g. if the usage of F() is completely compiled out (which shouldn't happen for boot_cpu_has(), but could happen in the future, e.g. if KVM were to use cpu_feature_enabled()). Link: https://lore.kernel.org/r/20241128013424.4096668-29-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 53 ++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cebb7314b9be..88ab264b4280 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -623,35 +623,53 @@ static __always_inline u32 raw_cpuid_get(struct cpuid_reg cpuid) return *__cpuid_entry_get_reg(&entry, cpuid.reg); } -static __always_inline void kvm_cpu_cap_init(u32 leaf, u32 mask) -{ - const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); - - /* - * For kernel-defined leafs, mask the boot CPU's pre-populated value. - * For KVM-defined leafs, explicitly set the leaf, as KVM is the one - * and only authority. - */ - if (leaf < NCAPINTS) - kvm_cpu_caps[leaf] &= mask; - else - kvm_cpu_caps[leaf] = mask; - - kvm_cpu_caps[leaf] &= raw_cpuid_get(cpuid); -} +/* + * For kernel-defined leafs, mask the boot CPU's pre-populated value. For KVM- + * defined leafs, explicitly set the leaf, as KVM is the one and only authority. + */ +#define kvm_cpu_cap_init(leaf, mask) \ +do { \ + const struct cpuid_reg cpuid = x86_feature_cpuid(leaf * 32); \ + const u32 __maybe_unused kvm_cpu_cap_init_in_progress = leaf; \ + \ + if (leaf < NCAPINTS) \ + kvm_cpu_caps[leaf] &= (mask); \ + else \ + kvm_cpu_caps[leaf] = (mask); \ + \ + kvm_cpu_caps[leaf] &= raw_cpuid_get(cpuid); \ +} while (0) -#define F feature_bit +/* + * Assert that the feature bit being declared, e.g. via F(), is in the CPUID + * word that's being initialized. Exempt 0x8000_0001.EDX usage of 0x1.EDX + * features, as AMD duplicated many 0x1.EDX features into 0x8000_0001.EDX. + */ +#define KVM_VALIDATE_CPU_CAP_USAGE(name) \ +do { \ + u32 __leaf = __feature_leaf(X86_FEATURE_##name); \ + \ + BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress); \ +} while (0) + +#define F(name) \ +({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ + feature_bit(name); \ +}) /* Scattered Flag - For features that are scattered by cpufeatures.h. */ #define SF(name) \ ({ \ BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \ }) /* Features that KVM supports only on 64-bit kernels. */ #define X86_64_F(name) \ ({ \ + KVM_VALIDATE_CPU_CAP_USAGE(name); \ (IS_ENABLED(CONFIG_X86_64) ? F(name) : 0); \ }) @@ -662,6 +680,7 @@ static __always_inline void kvm_cpu_cap_init(u32 leaf, u32 mask) #define ALIASED_1_EDX_F(name) \ ({ \ BUILD_BUG_ON(__feature_leaf(X86_FEATURE_##name) != CPUID_1_EDX); \ + BUILD_BUG_ON(kvm_cpu_cap_init_in_progress != CPUID_8000_0001_EDX); \ feature_bit(name); \ }) -- 2.51.0