From d88ed5fb7c88f404e57fe2b2a6d19fefc35b4dc7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:08:04 -0800 Subject: [PATCH 01/16] KVM: selftests: Ensure all vCPUs hit -EFAULT during initial RO stage During the initial mprotect(RO) stage of mmu_stress_test, keep vCPUs spinning until all vCPUs have hit -EFAULT, i.e. until all vCPUs have tried to write to a read-only page. If a vCPU manages to complete an entire iteration of the loop without hitting a read-only page, *and* the vCPU observes mprotect_ro_done before starting a second iteration, then the vCPU will prematurely fall through to GUEST_SYNC(3) (on x86 and arm64) and get out of sequence. Replace the "do-while (!r)" loop around the associated _vcpu_run() with a single invocation, as barring a KVM bug, the vCPU is guaranteed to hit -EFAULT, and retrying on success is super confusion, hides KVM bugs, and complicates this fix. The do-while loop was semi-unintentionally added specifically to fudge around a KVM x86 bug, and said bug is unhittable without modifying the test to force x86 down the !(x86||arm64) path. On x86, if forced emulation is enabled, vcpu_arch_put_guest() may trigger emulation of the store to memory. Due a (very, very) longstanding bug in KVM x86's emulator, emulate writes to guest memory that fail during __kvm_write_guest_page() unconditionally return KVM_EXIT_MMIO. While that is desirable in the !memslot case, it's wrong in this case as the failure happens due to __copy_to_user() hitting a read-only page, not an emulated MMIO region. But as above, x86 only uses vcpu_arch_put_guest() if the __x86_64__ guards are clobbered to force x86 down the common path, and of course the unexpected MMIO is a KVM bug, i.e. *should* cause a test failure. Fixes: b6c304aec648 ("KVM: selftests: Verify KVM correctly handles mprotect(PROT_READ)") Reported-by: Yan Zhao Closes: https://lore.kernel.org/all/20250208105318.16861-1-yan.y.zhao@intel.com Debugged-by: Yan Zhao Reviewed-by: Yan Zhao Tested-by: Yan Zhao Link: https://lore.kernel.org/r/20250228230804.3845860-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/mmu_stress_test.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index d9c76b4c0d88..6a437d2be9fa 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -18,6 +18,7 @@ #include "ucall_common.h" static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { @@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) /* * Write to the region while mprotect(PROT_READ) is underway. Keep - * looping until the memory is guaranteed to be read-only, otherwise - * vCPUs may complete their writes and advance to the next stage - * prematurely. + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. * * For architectures that support skipping the faulting instruction, * generate the store via inline assembly to ensure the exact length @@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif - } while (!READ_ONCE(mprotect_ro_done)); + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); /* * Only architectures that write the entire range can explicitly sync, @@ -81,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -148,12 +150,16 @@ static void *vcpu_worker(void *data) * be stuck on the faulting instruction for other architectures. Go to * stage 3 without a rendezvous */ - do { - r = _vcpu_run(vcpu); - } while (!r); + r = _vcpu_run(vcpu); TEST_ASSERT(r == -1 && errno == EFAULT, "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + #if defined(__x86_64__) || defined(__aarch64__) /* * Verify *all* writes from the guest hit EFAULT due to the VMA now @@ -378,7 +384,6 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); - usleep(10); mprotect_ro_done = true; sync_global_to_guest(vm, mprotect_ro_done); -- 2.51.0 From 3b2d3db368013729fd2167a0d91fec821dba807c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 28 Feb 2025 15:38:52 -0800 Subject: [PATCH 02/16] KVM: selftests: Fix printf() format goof in SEV smoke test Print out the index of mismatching XSAVE bytes using unsigned decimal format. Some versions of clang complain about trying to print an integer as an unsigned char. x86/sev_smoke_test.c:55:51: error: format specifies type 'unsigned char' but the argument has type 'int' [-Werror,-Wformat] Fixes: 8c53183dbaa2 ("selftests: kvm: add test for transferring FPU state into VMSA") Link: https://lore.kernel.org/r/20250228233852.3855676-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86/sev_smoke_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index a1a688e75266..d97816dc476a 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } -- 2.51.0 From f9dc8fb3afc968042bdaf4b6e445a9272071c9f3 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 4 Mar 2025 03:23:14 -0500 Subject: [PATCH 03/16] KVM: x86: Explicitly zero EAX and EBX when PERFMON_V2 isn't supported by KVM Fix a goof where KVM sets CPUID.0x80000022.EAX to CPUID.0x80000022.EBX instead of zeroing both when PERFMON_V2 isn't supported by KVM. In practice, barring a buggy CPU (or vCPU model when running nested) only the !enable_pmu case is affected, as KVM always supports PERFMON_V2 if it's available in hardware, i.e. CPUID.0x80000022.EBX will be '0' if PERFMON_V2 is unsupported. For the !enable_pmu case, the bug is relatively benign as KVM will refuse to enable PMU capabilities, but a VMM that reflects KVM's supported CPUID into the guest could inadvertently induce #GPs in the guest due to advertising support for MSRs that KVM refuses to emulate. Fixes: 94cdeebd8211 ("KVM: x86/cpuid: Add AMD CPUID ExtPerfMonAndDbg leaf 0x80000022") Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250304082314.472202-3-xiaoyao.li@intel.com [sean: massage shortlog and changelog, tag for stable] Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8eb3a88707f2..121edf1f2a79 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1763,7 +1763,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ecx = entry->edx = 0; if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) { - entry->eax = entry->ebx; + entry->eax = entry->ebx = 0; break; } -- 2.51.0 From 46c49372e10ea161beaa78936a64d2b49531dffb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 25 Feb 2025 13:01:41 -0500 Subject: [PATCH 04/16] KVM: x86: move vm_destroy callback at end of kvm_arch_destroy_vm TDX needs to free the TDR control structures last, after all paging structures have been torn down; move the vm_destroy callback at a suitable place. The new place is also okay for AMD; the main difference is that the MMU has been torn down and, if anything, that is better done before the SNP ASID is released. Extracted from a patch by Yan Zhao. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4b64ab350bcd..bd2b71d4e64f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12880,7 +12880,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } kvm_unload_vcpu_mmus(kvm); kvm_destroy_vcpus(kvm); - kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); @@ -12890,6 +12889,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_page_track_cleanup(kvm); kvm_xen_destroy_vm(kvm); kvm_hv_destroy_vm(kvm); + kvm_x86_call(vm_destroy)(kvm); } static void memslot_rmap_free(struct kvm_memory_slot *slot) -- 2.51.0 From 5f3b30b2b0d92e5a5050959a0994b2f3c4e1e6d0 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Sat, 12 Oct 2024 00:55:55 -0700 Subject: [PATCH 05/16] KVM: x86: Push down setting vcpu.arch.user_set_tsc Push down setting vcpu.arch.user_set_tsc to true from kvm_synchronize_tsc() to __kvm_synchronize_tsc() so that the two callers don't have to modify user_set_tsc directly as preparation. Later, prohibit changing TSC synchronization for TDX guests to modify __kvm_synchornize_tsc() change. We don't want to touch caller sites not to change user_set_tsc. Signed-off-by: Isaku Yamahata Message-ID: <62b1a7a35d6961844786b6e47e8ecb774af7a228.1728719037.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd2b71d4e64f..2da75bbf7f94 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2626,12 +2626,15 @@ static inline bool kvm_check_tsc_unstable(void) * participates in. */ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, - u64 ns, bool matched) + u64 ns, bool matched, bool user_set_tsc) { struct kvm *kvm = vcpu->kvm; lockdep_assert_held(&kvm->arch.tsc_write_lock); + if (user_set_tsc) + vcpu->kvm->arch.user_set_tsc = true; + /* * We also track th most recent recorded KHZ, write and time to * allow the matching interval to be extended at each write. @@ -2717,8 +2720,6 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value) } } - if (user_value) - kvm->arch.user_set_tsc = true; /* * For a reliable TSC, we can match TSC offsets, and for an unstable @@ -2738,7 +2739,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 *user_value) matched = true; } - __kvm_synchronize_tsc(vcpu, offset, data, ns, matched); + __kvm_synchronize_tsc(vcpu, offset, data, ns, matched, !!user_value); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); } @@ -5725,8 +5726,7 @@ static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, tsc = kvm_scale_tsc(rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset; ns = get_kvmclock_base_ns(); - kvm->arch.user_set_tsc = true; - __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched); + __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched, true); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); r = 0; -- 2.51.0 From adafea1106004dba26ea12d3193f4f132b1f0065 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Sat, 12 Oct 2024 00:55:56 -0700 Subject: [PATCH 06/16] KVM: x86: Add infrastructure for secure TSC Add guest_tsc_protected member to struct kvm_arch_vcpu and prohibit changing TSC offset/multiplier when guest_tsc_protected is true. X86 confidential computing technology defines protected guest TSC so that the VMM can't change the TSC offset/multiplier once vCPU is initialized. SEV-SNP defines Secure TSC as optional, whereas TDX mandates it. KVM has common logic on x86 that tries to guess or adjust TSC offset/multiplier for better guest TSC and TSC interrupt latency at KVM vCPU creation (kvm_arch_vcpu_postcreate()), vCPU migration over pCPU (kvm_arch_vcpu_load()), vCPU TSC device attributes (kvm_arch_tsc_set_attr()) and guest/host writing to TSC or TSC adjust MSR (kvm_set_msr_common()). The current x86 KVM implementation conflicts with protected TSC because the VMM can't change the TSC offset/multiplier. Because KVM emulates the TSC timer or the TSC deadline timer with the TSC offset/multiplier, the TSC timer interrupts is injected to the guest at the wrong time if the KVM TSC offset is different from what the TDX module determined. Originally this issue was found by cyclic test of rt-test [1] as the latency in TDX case is worse than VMX value + TDX SEAMCALL overhead. It turned out that the KVM TSC offset is different from what the TDX module determines. Disable or ignore the KVM logic to change/adjust the TSC offset/multiplier somehow, thus keeping the KVM TSC offset/multiplier the same as the value of the TDX module. Writes to MSR_IA32_TSC are also blocked as they amount to a change in the TSC offset. [1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git Reported-by: Marcelo Tosatti Signed-off-by: Isaku Yamahata Message-ID: <3a7444aec08042fe205666864b6858910e86aa98.1728719037.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 32ae3aa50c7e..ee55d1f753e8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1053,6 +1053,7 @@ struct kvm_vcpu_arch { /* Protected Guests */ bool guest_state_protected; + bool guest_tsc_protected; /* * Set when PDPTS were loaded directly by the userspace without diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2da75bbf7f94..053547fc6f89 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2569,6 +2569,9 @@ EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset) { + if (vcpu->arch.guest_tsc_protected) + return; + trace_kvm_write_tsc_offset(vcpu->vcpu_id, vcpu->arch.l1_tsc_offset, l1_offset); @@ -2632,6 +2635,9 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, lockdep_assert_held(&kvm->arch.tsc_write_lock); + if (vcpu->arch.guest_tsc_protected) + return; + if (user_set_tsc) vcpu->kvm->arch.user_set_tsc = true; @@ -3907,7 +3913,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: if (msr_info->host_initiated) { kvm_synchronize_tsc(vcpu, &data); - } else { + } else if (!vcpu->arch.guest_tsc_protected) { u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset; adjust_tsc_offset_guest(vcpu, adj); vcpu->arch.ia32_tsc_adjust_msr += adj; @@ -4987,7 +4993,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u64 offset = kvm_compute_l1_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_vcpu_write_tsc_offset(vcpu, offset); - vcpu->arch.tsc_catchup = 1; + if (!vcpu->arch.guest_tsc_protected) + vcpu->arch.tsc_catchup = 1; } if (kvm_lapic_hv_timer_in_use(vcpu)) -- 2.51.0 From 74c1807f6c4feddb3c3cb1056c54531d4adbaea6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Mar 2025 21:29:22 +0100 Subject: [PATCH 07/16] KVM: x86: block KVM_CAP_SYNC_REGS if guest state is protected KVM_CAP_SYNC_REGS does not make sense for VMs with protected guest state, since the register values cannot actually be written. Return 0 when using the VM-level KVM_CHECK_EXTENSION ioctl, and accordingly return -EINVAL from KVM_RUN if the valid/dirty fields are nonzero. However, on exit from KVM_RUN userspace could have placed a nonzero value into kvm_run->kvm_valid_regs, so check guest_state_protected again and skip store_regs() in that case. Cc: stable@vger.kernel.org Fixes: 517987e3fb19 ("KVM: x86: add fields to struct kvm_arch for CoCo features") Signed-off-by: Paolo Bonzini Message-ID: <20250306202923.646075-1-pbonzini@redhat.com> Reviewed-by: Pankaj Gupta Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 053547fc6f89..eceb97734646 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4580,6 +4580,11 @@ static bool kvm_is_vm_type_supported(unsigned long type) return type < 32 && (kvm_caps.supported_vm_types & BIT(type)); } +static inline u32 kvm_sync_valid_fields(struct kvm *kvm) +{ + return kvm && kvm->arch.has_protected_state ? 0 : KVM_SYNC_X86_VALID_FIELDS; +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r = 0; @@ -4688,7 +4693,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif case KVM_CAP_SYNC_REGS: - r = KVM_SYNC_X86_VALID_FIELDS; + r = kvm_sync_valid_fields(kvm); break; case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_VALID_FLAGS; @@ -11481,6 +11486,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_queued_exception *ex = &vcpu->arch.exception; struct kvm_run *kvm_run = vcpu->run; + u32 sync_valid_fields; int r; r = kvm_mmu_post_init_vm(vcpu->kvm); @@ -11526,8 +11532,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) goto out; } - if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) || - (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) { + sync_valid_fields = kvm_sync_valid_fields(vcpu->kvm); + if ((kvm_run->kvm_valid_regs & ~sync_valid_fields) || + (kvm_run->kvm_dirty_regs & ~sync_valid_fields)) { r = -EINVAL; goto out; } @@ -11585,7 +11592,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) out: kvm_put_guest_fpu(vcpu); - if (kvm_run->kvm_valid_regs) + if (kvm_run->kvm_valid_regs && likely(!vcpu->arch.guest_state_protected)) store_regs(vcpu); post_kvm_run_save(vcpu); kvm_vcpu_srcu_read_unlock(vcpu); -- 2.51.0 From d19a42d69692f4f1ef6dd0984caf280edd4a403a Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:11 -0800 Subject: [PATCH 08/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX KeyID management Intel TDX protects guest VMs from malicious host and certain physical attacks. Pre-TDX Intel hardware has support for a memory encryption architecture called MK-TME, which repurposes several high bits of physical address as "KeyID". TDX ends up with reserving a sub-range of MK-TME KeyIDs as "TDX private KeyIDs". Like MK-TME, these KeyIDs can be associated with an ephemeral key. For TDX this association is done by the TDX module. It also has its own tracking for which KeyIDs are in use. To do this ephemeral key setup and manipulate the TDX module's internal tracking, KVM will use the following SEAMCALLs: TDH.MNG.KEY.CONFIG: Mark the KeyID as in use, and initialize its ephemeral key. TDH.MNG.KEY.FREEID: Mark the KeyID as not in use. These SEAMCALLs both operate on TDR structures, which are setup using the previously added TDH.MNG.CREATE SEAMCALL. KVM's use of these operations will go like: - tdx_guest_keyid_alloc() - Initialize TD and TDR page with TDH.MNG.CREATE (not yet-added), passing KeyID - TDH.MNG.KEY.CONFIG to initialize the key - TD runs, teardown is started - TDH.MNG.KEY.FREEID - tdx_guest_keyid_free() Don't try to combine the tdx_guest_keyid_alloc() and TDH.MNG.KEY.CONFIG operations because TDH.MNG.CREATE and some locking need to be done in the middle. Don't combine TDH.MNG.KEY.FREEID and tdx_guest_keyid_free() so they are symmetrical with the creation path. So implement tdh_mng_key_config() and tdh_mng_key_freeid() as separate functions than tdx_guest_keyid_alloc() and tdx_guest_keyid_free(). The TDX module provides SEAMCALLs to hand pages to the TDX module for storing TDX controlled state. SEAMCALLs that operate on this state are directed to the appropriate TD VM using references to the pages originally provided for managing the TD's state. So the host kernel needs to track these pages, both as an ID for specifying which TD to operate on, and to allow them to be eventually reclaimed. The TD VM associated pages are called TDR (Trust Domain Root) and TDCS (Trust Domain Control Structure). Introduce "struct tdx_td" for holding references to pages provided to the TDX module for this TD VM associated state. Don't plan for any TD associated state that is controlled by KVM to live in this struct. Only expect it to hold data for concepts specific to the TDX architecture, for which there can't already be preexisting storage for in KVM. Add both the TDR page and an array of TDCS pages, even though the SEAMCALL wrappers will only need to know about the TDR pages for directing the SEAMCALLs to the right TD. Adding the TDCS pages to this struct will let all of the TD VM associated pages handed to the TDX module be tracked in one location. For a type to specify physical pages, use KVM's hpa_t type. Do this for KVM's benefit This is the common type used to hold physical addresses in KVM, so will make interoperability easier. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Message-ID: <20241203010317.827803-2-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 12 ++++++++++++ arch/x86/virt/vmx/tdx/tdx.c | 25 +++++++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 16 +++++++++------- 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index b4b16dafd55e..5950e0a092ca 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -119,6 +119,18 @@ static inline u64 sc_retry(sc_func_t func, u64 fn, int tdx_cpu_enable(void); int tdx_enable(void); const char *tdx_dump_mce_info(struct mce *m); + +struct tdx_td { + /* TD root structure: */ + struct page *tdr_page; + + int tdcs_nr_pages; + /* TD control structure: */ + struct page **tdcs_pages; +}; + +u64 tdh_mng_key_config(struct tdx_td *td); +u64 tdh_mng_key_freeid(struct tdx_td *td); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 7fdb37387886..1ffbdb840004 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1456,3 +1456,28 @@ void __init tdx_init(void) check_tdx_erratum(); } + +static inline u64 tdx_tdr_pa(struct tdx_td *td) +{ + return page_to_phys(td->tdr_page); +} + +u64 tdh_mng_key_config(struct tdx_td *td) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + }; + + return seamcall(TDH_MNG_KEY_CONFIG, &args); +} +EXPORT_SYMBOL_GPL(tdh_mng_key_config); + +u64 tdh_mng_key_freeid(struct tdx_td *td) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + }; + + return seamcall(TDH_MNG_KEY_FREEID, &args); +} +EXPORT_SYMBOL_GPL(tdh_mng_key_freeid); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 4e3d533cdd61..5579317f67ab 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -15,13 +15,15 @@ /* * TDX module SEAMCALL leaf functions */ -#define TDH_PHYMEM_PAGE_RDMD 24 -#define TDH_SYS_KEY_CONFIG 31 -#define TDH_SYS_INIT 33 -#define TDH_SYS_RD 34 -#define TDH_SYS_LP_INIT 35 -#define TDH_SYS_TDMR_INIT 36 -#define TDH_SYS_CONFIG 45 +#define TDH_MNG_KEY_CONFIG 8 +#define TDH_MNG_KEY_FREEID 20 +#define TDH_PHYMEM_PAGE_RDMD 24 +#define TDH_SYS_KEY_CONFIG 31 +#define TDH_SYS_INIT 33 +#define TDH_SYS_RD 34 +#define TDH_SYS_LP_INIT 35 +#define TDH_SYS_TDMR_INIT 36 +#define TDH_SYS_CONFIG 45 /* TDX page types */ #define PT_NDA 0x0 -- 2.51.0 From b8a4e7de84a2587a250e130a85e4b049dff06448 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:12 -0800 Subject: [PATCH 09/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX TD creation Intel TDX protects guest VMs from malicious hosts and certain physical attacks. It defines various control structures that hold state for things like TDs or vCPUs. These control structures are stored in pages given to the TDX module and encrypted with either the global KeyID or the guest KeyIDs. To manipulate these control structures the TDX module defines a few SEAMCALLs. KVM will use these during the process of creating a TD as follows: 1) Allocate a unique TDX KeyID for a new guest. 1) Call TDH.MNG.CREATE to create a "TD Root" (TDR) page, together with the new allocated KeyID. Unlike the rest of the TDX guest, the TDR page is crypto-protected by the 'global KeyID'. 2) Call the previously added TDH.MNG.KEY.CONFIG on each package to configure the KeyID for the guest. After this step, the KeyID to protect the guest is ready and the rest of the guest will be protected by this KeyID. 3) Call TDH.MNG.ADDCX to add TD Control Structure (TDCS) pages. 4) Call TDH.MNG.INIT to initialize the TDCS. To reclaim these pages for use by the kernel other SEAMCALLs are needed, which will be added in future patches. Add tdh_mng_addcx(), tdh_mng_create() and tdh_mng_init() to export these SEAMCALLs so that KVM can use them to create TDs. For SEAMCALLs that give a page to the TDX module to be encrypted, CLFLUSH the page mapped with KeyID 0, such that any dirty cache lines don't write back later and clobber TD memory or control structures. Don't worry about the other MK-TME KeyIDs because the kernel doesn't use them. The TDX docs specify that this flush is not needed unless the TDX module exposes the CLFLUSH_BEFORE_ALLOC feature bit. Be conservative and always flush. Add a helper function to facilitate this. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Message-ID: <20241203010317.827803-3-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 3 +++ arch/x86/virt/vmx/tdx/tdx.c | 51 +++++++++++++++++++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 3 +++ 3 files changed, 57 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 5950e0a092ca..6ba3b806e880 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -129,8 +129,11 @@ struct tdx_td { struct page **tdcs_pages; }; +u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_mng_key_config(struct tdx_td *td); +u64 tdh_mng_create(struct tdx_td *td, u16 hkid); u64 tdh_mng_key_freeid(struct tdx_td *td); +u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 1ffbdb840004..ce4b1e96c5b0 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1462,6 +1462,29 @@ static inline u64 tdx_tdr_pa(struct tdx_td *td) return page_to_phys(td->tdr_page); } +/* + * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether + * a CLFLUSH of pages is required before handing them to the TDX module. + * Be conservative and make the code simpler by doing the CLFLUSH + * unconditionally. + */ +static void tdx_clflush_page(struct page *page) +{ + clflush_cache_range(page_to_virt(page), PAGE_SIZE); +} + +u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) +{ + struct tdx_module_args args = { + .rcx = page_to_phys(tdcs_page), + .rdx = tdx_tdr_pa(td), + }; + + tdx_clflush_page(tdcs_page); + return seamcall(TDH_MNG_ADDCX, &args); +} +EXPORT_SYMBOL_GPL(tdh_mng_addcx); + u64 tdh_mng_key_config(struct tdx_td *td) { struct tdx_module_args args = { @@ -1472,6 +1495,18 @@ u64 tdh_mng_key_config(struct tdx_td *td) } EXPORT_SYMBOL_GPL(tdh_mng_key_config); +u64 tdh_mng_create(struct tdx_td *td, u16 hkid) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + .rdx = hkid, + }; + + tdx_clflush_page(td->tdr_page); + return seamcall(TDH_MNG_CREATE, &args); +} +EXPORT_SYMBOL_GPL(tdh_mng_create); + u64 tdh_mng_key_freeid(struct tdx_td *td) { struct tdx_module_args args = { @@ -1481,3 +1516,19 @@ u64 tdh_mng_key_freeid(struct tdx_td *td) return seamcall(TDH_MNG_KEY_FREEID, &args); } EXPORT_SYMBOL_GPL(tdh_mng_key_freeid); + +u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + .rdx = td_params, + }; + u64 ret; + + ret = seamcall_ret(TDH_MNG_INIT, &args); + + *extended_err = args.rcx; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mng_init); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 5579317f67ab..0861c3f09576 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -15,8 +15,11 @@ /* * TDX module SEAMCALL leaf functions */ +#define TDH_MNG_ADDCX 1 #define TDH_MNG_KEY_CONFIG 8 +#define TDH_MNG_CREATE 9 #define TDH_MNG_KEY_FREEID 20 +#define TDH_MNG_INIT 21 #define TDH_PHYMEM_PAGE_RDMD 24 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 -- 2.51.0 From 0d65dff2b9057107cd002864a27de6c2b49a5755 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:13 -0800 Subject: [PATCH 10/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX vCPU creation Intel TDX protects guest VMs from malicious host and certain physical attacks. It defines various control structures that hold state for virtualized components of the TD (i.e. VMs or vCPUs) These control structures are stored in pages given to the TDX module and encrypted with either the global KeyID or the guest KeyIDs. To manipulate these control structures the TDX module defines a few SEAMCALLs. KVM will use these during the process of creating a vCPU as follows: 1) Call TDH.VP.CREATE to create a TD vCPU Root (TDVPR) page for each vCPU. 2) Call TDH.VP.ADDCX to add per-vCPU control pages (TDCX) for each vCPU. 3) Call TDH.VP.INIT to initialize the TDCX for each vCPU. To reclaim these pages for use by the kernel other SEAMCALLs are needed, which will be added in future patches. Export functions to allow KVM to make these SEAMCALLs. Export two variants for TDH.VP.CREATE, in order to support the planned logic of KVM to support TDX modules with and without the ENUM_TOPOLOGY feature. If KVM can drop support for the !ENUM_TOPOLOGY case, this could go down a single version. Leave that for later discussion. The TDX module provides SEAMCALLs to hand pages to the TDX module for storing TDX controlled state. SEAMCALLs that operate on this state are directed to the appropriate TD vCPU using references to the pages originally provided for managing the vCPU's state. So the host kernel needs to track these pages, both as an ID for specifying which vCPU to operate on, and to allow them to be eventually reclaimed. The vCPU associated pages are called TDVPR (Trust Domain Virtual Processor Root) and TDCX (Trust Domain Control Extension). Introduce "struct tdx_vp" for holding references to pages provided to the TDX module for the TD vCPU associated state. Don't plan for any vCPU associated state that is controlled by KVM to live in this struct. Only expect it to hold data for concepts specific to the TDX architecture, for which there can't already be preexisting storage for in KVM. Add both the TDVPR page and an array of TDCX pages, even though the SEAMCALL wrappers will only need to know about the TDVPR pages for directing the SEAMCALLs to the right vCPU. Adding the TDCX pages to this struct will let all of the vCPU associated pages handed to the TDX module be tracked in one location. For a type to specify physical pages, use KVM's hpa_t type. Do this for KVM's benefit This is the common type used to hold physical addresses in KVM, so will make interoperability easier. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Message-ID: <20241203010317.827803-4-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 14 ++++++++++++ arch/x86/virt/vmx/tdx/tdx.c | 43 +++++++++++++++++++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 11 ++++++++++ 3 files changed, 68 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 6ba3b806e880..f783d5f1a0e1 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -127,13 +127,27 @@ struct tdx_td { int tdcs_nr_pages; /* TD control structure: */ struct page **tdcs_pages; + + /* Size of `tdcx_pages` in struct tdx_vp */ + int tdcx_nr_pages; +}; + +struct tdx_vp { + /* TDVP root page */ + struct page *tdvpr_page; + + /* TD vCPU control structure: */ + struct page **tdcx_pages; }; u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); +u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u16 hkid); +u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp); u64 tdh_mng_key_freeid(struct tdx_td *td); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); +u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index ce4b1e96c5b0..e55570575f22 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -5,6 +5,7 @@ * Intel Trusted Domain Extensions (TDX) support */ +#include "asm/page_types.h" #define pr_fmt(fmt) "virt/tdx: " fmt #include @@ -1462,6 +1463,11 @@ static inline u64 tdx_tdr_pa(struct tdx_td *td) return page_to_phys(td->tdr_page); } +static inline u64 tdx_tdvpr_pa(struct tdx_vp *td) +{ + return page_to_phys(td->tdvpr_page); +} + /* * The TDX module exposes a CLFLUSH_BEFORE_ALLOC bit to specify whether * a CLFLUSH of pages is required before handing them to the TDX module. @@ -1485,6 +1491,18 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page) } EXPORT_SYMBOL_GPL(tdh_mng_addcx); +u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page) +{ + struct tdx_module_args args = { + .rcx = page_to_phys(tdcx_page), + .rdx = tdx_tdvpr_pa(vp), + }; + + tdx_clflush_page(tdcx_page); + return seamcall(TDH_VP_ADDCX, &args); +} +EXPORT_SYMBOL_GPL(tdh_vp_addcx); + u64 tdh_mng_key_config(struct tdx_td *td) { struct tdx_module_args args = { @@ -1507,6 +1525,18 @@ u64 tdh_mng_create(struct tdx_td *td, u16 hkid) } EXPORT_SYMBOL_GPL(tdh_mng_create); +u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) +{ + struct tdx_module_args args = { + .rcx = tdx_tdvpr_pa(vp), + .rdx = tdx_tdr_pa(td), + }; + + tdx_clflush_page(vp->tdvpr_page); + return seamcall(TDH_VP_CREATE, &args); +} +EXPORT_SYMBOL_GPL(tdh_vp_create); + u64 tdh_mng_key_freeid(struct tdx_td *td) { struct tdx_module_args args = { @@ -1532,3 +1562,16 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) return ret; } EXPORT_SYMBOL_GPL(tdh_mng_init); + +u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) +{ + struct tdx_module_args args = { + .rcx = tdx_tdvpr_pa(vp), + .rdx = initial_rcx, + .r8 = x2apicid, + }; + + /* apicid requires version == 1. */ + return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args); +} +EXPORT_SYMBOL_GPL(tdh_vp_init); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 0861c3f09576..f0464f7d9780 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -16,10 +16,13 @@ * TDX module SEAMCALL leaf functions */ #define TDH_MNG_ADDCX 1 +#define TDH_VP_ADDCX 4 #define TDH_MNG_KEY_CONFIG 8 #define TDH_MNG_CREATE 9 +#define TDH_VP_CREATE 10 #define TDH_MNG_KEY_FREEID 20 #define TDH_MNG_INIT 21 +#define TDH_VP_INIT 22 #define TDH_PHYMEM_PAGE_RDMD 24 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 @@ -28,6 +31,14 @@ #define TDH_SYS_TDMR_INIT 36 #define TDH_SYS_CONFIG 45 +/* + * SEAMCALL leaf: + * + * Bit 15:0 Leaf number + * Bit 23:16 Version number + */ +#define TDX_VERSION_SHIFT 16 + /* TDX page types */ #define PT_NDA 0x0 #define PT_RSVD 0x1 -- 2.51.0 From 541b3e9e0d907b5d94e5d6e4a2e57962a7d1d134 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:14 -0800 Subject: [PATCH 11/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX page cache management Intel TDX protects guest VMs from malicious host and certain physical attacks. The TDX module uses pages provided by the host for both control structures and for TD guest pages. These pages are encrypted using the MK-TME encryption engine, with its special requirements around cache invalidation. For its own security, the TDX module ensures pages are flushed properly and track which usage they are currently assigned. For creating and tearing down TD VMs and vCPUs KVM will need to use the TDH.PHYMEM.PAGE.RECLAIM, TDH.PHYMEM.CACHE.WB, and TDH.PHYMEM.PAGE.WBINVD SEAMCALLs. Add tdh_phymem_page_reclaim() to enable KVM to call TDH.PHYMEM.PAGE.RECLAIM to reclaim the page for use by the host kernel. This effectively resets its state in the TDX module's page tracking (PAMT), if the page is available to be reclaimed. This will be used by KVM to reclaim the various types of pages owned by the TDX module. It will have a small wrapper in KVM that retries in the case of a relevant error code. Don't implement this wrapper in arch/x86 because KVM's solution around retrying SEAMCALLs will be better located in a single place. Add tdh_phymem_cache_wb() to enable KVM to call TDH.PHYMEM.CACHE.WB to do a cache write back in a way that the TDX module can verify, before it allows a KeyID to be freed. The KVM code will use this to have a small wrapper that handles retries. Since the TDH.PHYMEM.CACHE.WB operation is interruptible, have tdh_phymem_cache_wb() take a resume argument to pass this info to the TDX module for restarts. It is worth noting that this SEAMCALL uses a SEAM specific MSR to do the write back in sections. In this way it does export some new functionality that affects CPU state. Add tdh_phymem_page_wbinvd_tdr() to enable KVM to call TDH.PHYMEM.PAGE.WBINVD to do a cache write back and invalidate of a TDR, using the global KeyID. The underlying TDH.PHYMEM.PAGE.WBINVD SEAMCALL requires the related KeyID to be encoded into the SEAMCALL args. Since the global KeyID is not exposed to KVM, a dedicated wrapper is needed for TDR focused TDH.PHYMEM.PAGE.WBINVD operations. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Message-ID: <20241203010317.827803-5-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 18 ++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.c | 42 +++++++++++++++++++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 3 +++ 3 files changed, 63 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index f783d5f1a0e1..41a72ce961f3 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * Used by the #VE exception handler to gather the #VE exception @@ -140,6 +142,19 @@ struct tdx_vp { struct page **tdcx_pages; }; + +static inline u64 mk_keyed_paddr(u16 hkid, struct page *page) +{ + u64 ret; + + ret = page_to_phys(page); + /* KeyID bits are just above the physical address bits: */ + ret |= (u64)hkid << boot_cpu_data.x86_phys_bits; + + return ret; + +} + u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page); u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); u64 tdh_mng_key_config(struct tdx_td *td); @@ -148,6 +163,9 @@ u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp); u64 tdh_mng_key_freeid(struct tdx_td *td); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); +u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); +u64 tdh_phymem_cache_wb(bool resume); +u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); #else static inline void tdx_init(void) { } static inline int tdx_cpu_enable(void) { return -ENODEV; } diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index e55570575f22..2350d25b4ca3 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1575,3 +1575,45 @@ u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) return seamcall(TDH_VP_INIT | (1ULL << TDX_VERSION_SHIFT), &args); } EXPORT_SYMBOL_GPL(tdh_vp_init); + +/* + * TDX ABI defines output operands as PT, OWNER and SIZE. These are TDX defined fomats. + * So despite the names, they must be interpted specially as described by the spec. Return + * them only for error reporting purposes. + */ +u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size) +{ + struct tdx_module_args args = { + .rcx = page_to_phys(page), + }; + u64 ret; + + ret = seamcall_ret(TDH_PHYMEM_PAGE_RECLAIM, &args); + + *tdx_pt = args.rcx; + *tdx_owner = args.rdx; + *tdx_size = args.r8; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_phymem_page_reclaim); + +u64 tdh_phymem_cache_wb(bool resume) +{ + struct tdx_module_args args = { + .rcx = resume ? 1 : 0, + }; + + return seamcall(TDH_PHYMEM_CACHE_WB, &args); +} +EXPORT_SYMBOL_GPL(tdh_phymem_cache_wb); + +u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td) +{ + struct tdx_module_args args = {}; + + args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page); + + return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args); +} +EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_tdr); diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index f0464f7d9780..7a15c9afcdfa 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -24,11 +24,14 @@ #define TDH_MNG_INIT 21 #define TDH_VP_INIT 22 #define TDH_PHYMEM_PAGE_RDMD 24 +#define TDH_PHYMEM_PAGE_RECLAIM 28 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 #define TDH_SYS_RD 34 #define TDH_SYS_LP_INIT 35 #define TDH_SYS_TDMR_INIT 36 +#define TDH_PHYMEM_CACHE_WB 40 +#define TDH_PHYMEM_PAGE_WBINVD 41 #define TDH_SYS_CONFIG 45 /* -- 2.51.0 From 5e5151c5562afa28555b45e70f4386d62f62e640 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:15 -0800 Subject: [PATCH 12/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX VM/vCPU field access Intel TDX protects guest VMs from malicious host and certain physical attacks. The TDX module has TD scoped and vCPU scoped "metadata fields". These fields are a bit like VMCS fields, and stored in data structures maintained by the TDX module. Export 3 SEAMCALLs for use in reading and writing these fields: Make tdh_mng_rd() use MNG.VP.RD to read the TD scoped metadata. Make tdh_vp_rd()/tdh_vp_wr() use TDH.VP.RD/WR to read/write the vCPU scoped metadata. KVM will use these by creating inline helpers that target various metadata sizes. Export the raw SEAMCALL leaf, to avoid exporting the large number of various sized helpers. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Acked-by: Dave Hansen Message-ID: <20241203010317.827803-6-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 3 +++ arch/x86/virt/vmx/tdx/tdx.c | 47 +++++++++++++++++++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 3 +++ 3 files changed, 53 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 41a72ce961f3..22ffb8dfe994 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -160,9 +160,12 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page); u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u16 hkid); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp); +u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data); u64 tdh_mng_key_freeid(struct tdx_td *td); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); +u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data); +u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask); u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size); u64 tdh_phymem_cache_wb(bool resume); u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 2350d25b4ca3..93150f3c1112 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1537,6 +1537,23 @@ u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp) } EXPORT_SYMBOL_GPL(tdh_vp_create); +u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + .rdx = field, + }; + u64 ret; + + ret = seamcall_ret(TDH_MNG_RD, &args); + + /* R8: Content of the field, or 0 in case of error. */ + *data = args.r8; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_mng_rd); + u64 tdh_mng_key_freeid(struct tdx_td *td) { struct tdx_module_args args = { @@ -1563,6 +1580,36 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err) } EXPORT_SYMBOL_GPL(tdh_mng_init); +u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data) +{ + struct tdx_module_args args = { + .rcx = tdx_tdvpr_pa(vp), + .rdx = field, + }; + u64 ret; + + ret = seamcall_ret(TDH_VP_RD, &args); + + /* R8: Content of the field, or 0 in case of error. */ + *data = args.r8; + + return ret; +} +EXPORT_SYMBOL_GPL(tdh_vp_rd); + +u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask) +{ + struct tdx_module_args args = { + .rcx = tdx_tdvpr_pa(vp), + .rdx = field, + .r8 = data, + .r9 = mask, + }; + + return seamcall(TDH_VP_WR, &args); +} +EXPORT_SYMBOL_GPL(tdh_vp_wr); + u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid) { struct tdx_module_args args = { diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index 7a15c9afcdfa..aacd38b12989 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -19,11 +19,13 @@ #define TDH_VP_ADDCX 4 #define TDH_MNG_KEY_CONFIG 8 #define TDH_MNG_CREATE 9 +#define TDH_MNG_RD 11 #define TDH_VP_CREATE 10 #define TDH_MNG_KEY_FREEID 20 #define TDH_MNG_INIT 21 #define TDH_VP_INIT 22 #define TDH_PHYMEM_PAGE_RDMD 24 +#define TDH_VP_RD 26 #define TDH_PHYMEM_PAGE_RECLAIM 28 #define TDH_SYS_KEY_CONFIG 31 #define TDH_SYS_INIT 33 @@ -32,6 +34,7 @@ #define TDH_SYS_TDMR_INIT 36 #define TDH_PHYMEM_CACHE_WB 40 #define TDH_PHYMEM_PAGE_WBINVD 41 +#define TDH_VP_WR 43 #define TDH_SYS_CONFIG 45 /* -- 2.51.0 From e465cc63db1956f41370099c8ba0a62cf4f65990 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 2 Dec 2024 17:03:16 -0800 Subject: [PATCH 13/16] x86/virt/tdx: Add SEAMCALL wrappers for TDX flush operations Intel TDX protects guest VMs from malicious host and certain physical attacks. The TDX module has the concept of flushing vCPUs. These flushes include both a flush of the translation caches and also any other state internal to the TDX module. Before freeing a KeyID, this flush operation needs to be done. KVM will need to perform the flush on each pCPU associated with the TD, and also perform a TD scoped operation that checks if the flush has been done on all vCPU's associated with the TD. Add a tdh_vp_flush() function to be used to call TDH.VP.FLUSH on each pCPU associated with the TD during TD teardown. It will also be called when disabling TDX and during vCPU migration between pCPUs. Add tdh_mng_vpflushdone() to be used by KVM to call TDH.MNG.VPFLUSHDONE. KVM will use this during TD teardown to verify that TDH.VP.FLUSH has been called sufficiently, and advance the state machine that will allow for reclaiming the TD's KeyID. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Isaku Yamahata Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Reviewed-by: Binbin Wu Reviewed-by: Yuan Yao Message-ID: <20241203010317.827803-7-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 2 ++ arch/x86/virt/vmx/tdx/tdx.c | 20 ++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 22ffb8dfe994..1392f20c3f1d 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -161,6 +161,8 @@ u64 tdh_mng_key_config(struct tdx_td *td); u64 tdh_mng_create(struct tdx_td *td, u16 hkid); u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp); u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data); +u64 tdh_vp_flush(struct tdx_vp *vp); +u64 tdh_mng_vpflushdone(struct tdx_td *td); u64 tdh_mng_key_freeid(struct tdx_td *td); u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err); u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid); diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 93150f3c1112..369b264f4d9b 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -1554,6 +1554,26 @@ u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data) } EXPORT_SYMBOL_GPL(tdh_mng_rd); +u64 tdh_vp_flush(struct tdx_vp *vp) +{ + struct tdx_module_args args = { + .rcx = tdx_tdvpr_pa(vp), + }; + + return seamcall(TDH_VP_FLUSH, &args); +} +EXPORT_SYMBOL_GPL(tdh_vp_flush); + +u64 tdh_mng_vpflushdone(struct tdx_td *td) +{ + struct tdx_module_args args = { + .rcx = tdx_tdr_pa(td), + }; + + return seamcall(TDH_MNG_VPFLUSHDONE, &args); +} +EXPORT_SYMBOL_GPL(tdh_mng_vpflushdone); + u64 tdh_mng_key_freeid(struct tdx_td *td) { struct tdx_module_args args = { diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h index aacd38b12989..62cb7832c42d 100644 --- a/arch/x86/virt/vmx/tdx/tdx.h +++ b/arch/x86/virt/vmx/tdx/tdx.h @@ -20,6 +20,8 @@ #define TDH_MNG_KEY_CONFIG 8 #define TDH_MNG_CREATE 9 #define TDH_MNG_RD 11 +#define TDH_VP_FLUSH 18 +#define TDH_MNG_VPFLUSHDONE 19 #define TDH_VP_CREATE 10 #define TDH_MNG_KEY_FREEID 20 #define TDH_MNG_INIT 21 -- 2.51.0 From 7ba2fd80ee43e34f5e5c449e5e26538c44bbf52e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Jan 2025 06:00:28 -0500 Subject: [PATCH 14/16] x86/virt/tdx: allocate tdx_sys_info in static memory Adding all the information that KVM needs increases the size of struct tdx_sys_info, to the point that you can get warnings about the stack size of init_tdx_module(). Since KVM also needs to read the TDX metadata after init_tdx_module() returns, make the variable a global. Reviewed-by: Rick Edgecombe Reviewed-by: Kai Huang Signed-off-by: Paolo Bonzini --- arch/x86/virt/vmx/tdx/tdx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 369b264f4d9b..578ad468634b 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -53,6 +53,8 @@ static DEFINE_MUTEX(tdx_module_lock); /* All TDX-usable memory regions. Protected by mem_hotplug_lock. */ static LIST_HEAD(tdx_memlist); +static struct tdx_sys_info tdx_sysinfo; + typedef void (*sc_err_func_t)(u64 fn, u64 err, struct tdx_module_args *args); static inline void seamcall_err(u64 fn, u64 err, struct tdx_module_args *args) @@ -1061,15 +1063,14 @@ static int init_tdmrs(struct tdmr_info_list *tdmr_list) static int init_tdx_module(void) { - struct tdx_sys_info sysinfo; int ret; - ret = get_tdx_sys_info(&sysinfo); + ret = get_tdx_sys_info(&tdx_sysinfo); if (ret) return ret; /* Check whether the kernel can support this module */ - ret = check_features(&sysinfo); + ret = check_features(&tdx_sysinfo); if (ret) return ret; @@ -1090,12 +1091,12 @@ static int init_tdx_module(void) goto out_put_tdxmem; /* Allocate enough space for constructing TDMRs */ - ret = alloc_tdmr_list(&tdx_tdmr_list, &sysinfo.tdmr); + ret = alloc_tdmr_list(&tdx_tdmr_list, &tdx_sysinfo.tdmr); if (ret) goto err_free_tdxmem; /* Cover all TDX-usable memory regions in TDMRs */ - ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &sysinfo.tdmr); + ret = construct_tdmrs(&tdx_memlist, &tdx_tdmr_list, &tdx_sysinfo.tdmr); if (ret) goto err_free_tdmrs; -- 2.51.0 From 4caf32daf0b4f83681d63acc559635a2a8ddf71c Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 24 Dec 2024 09:07:40 -0500 Subject: [PATCH 15/16] x86/virt/tdx: Read essential global metadata for KVM KVM needs two classes of global metadata to create and run TDX guests: - "TD Control Structures" - "TD Configurability" The first class contains the sizes of TDX guest per-VM and per-vCPU control structures. KVM will need to use them to allocate enough space for those control structures. The second class contains info which reports things like which features are configurable to TDX guest etc. KVM will need to use them to properly configure TDX guests. Read them for KVM TDX to use. The code change is auto-generated by re-running the script in [1] after uncommenting the "td_conf" and "td_ctrl" part to regenerate the tdx_global_metadata.{hc} and update them to the existing ones in the kernel. #python tdx.py global_metadata.json tdx_global_metadata.h \ tdx_global_metadata.c The 'global_metadata.json' can be fetched from [2]. Note that as of this writing, the JSON file only allows a maximum of 32 CPUID entries. While this is enough for current contents of the CPUID leaves, there were plans to change the JSON per TDX module release which would change the ABI and potentially prevent future versions of the TDX module from working with older kernels. While discussions are ongoing with the TDX module team on what exactly constitutes an ABI breakage, in the meantime the TDX module team has agreed to not increase the number of CPUID entries beyond 128 without an opt in. Therefore the file was tweaked by hand to change the maximum number of CPUID_CONFIGs. Link: https://lore.kernel.org/kvm/0853b155ec9aac09c594caa60914ed6ea4dc0a71.camel@intel.com/ [1] Link: https://cdrdv2.intel.com/v1/dl/getContent/795381 [2] Signed-off-by: Kai Huang Signed-off-by: Rick Edgecombe Message-ID: <20241030190039.77971-4-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/virt/vmx/tdx/tdx_global_metadata.c | 50 +++++++++++++++++++++ arch/x86/virt/vmx/tdx/tdx_global_metadata.h | 19 ++++++++ 2 files changed, 69 insertions(+) diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c index 8027a24d1c6e..13ad2663488b 100644 --- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.c +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.c @@ -37,12 +37,62 @@ static int get_tdx_sys_info_tdmr(struct tdx_sys_info_tdmr *sysinfo_tdmr) return ret; } +static int get_tdx_sys_info_td_ctrl(struct tdx_sys_info_td_ctrl *sysinfo_td_ctrl) +{ + int ret = 0; + u64 val; + + if (!ret && !(ret = read_sys_metadata_field(0x9800000100000000, &val))) + sysinfo_td_ctrl->tdr_base_size = val; + if (!ret && !(ret = read_sys_metadata_field(0x9800000100000100, &val))) + sysinfo_td_ctrl->tdcs_base_size = val; + if (!ret && !(ret = read_sys_metadata_field(0x9800000100000200, &val))) + sysinfo_td_ctrl->tdvps_base_size = val; + + return ret; +} + +static int get_tdx_sys_info_td_conf(struct tdx_sys_info_td_conf *sysinfo_td_conf) +{ + int ret = 0; + u64 val; + int i, j; + + if (!ret && !(ret = read_sys_metadata_field(0x1900000300000000, &val))) + sysinfo_td_conf->attributes_fixed0 = val; + if (!ret && !(ret = read_sys_metadata_field(0x1900000300000001, &val))) + sysinfo_td_conf->attributes_fixed1 = val; + if (!ret && !(ret = read_sys_metadata_field(0x1900000300000002, &val))) + sysinfo_td_conf->xfam_fixed0 = val; + if (!ret && !(ret = read_sys_metadata_field(0x1900000300000003, &val))) + sysinfo_td_conf->xfam_fixed1 = val; + if (!ret && !(ret = read_sys_metadata_field(0x9900000100000004, &val))) + sysinfo_td_conf->num_cpuid_config = val; + if (!ret && !(ret = read_sys_metadata_field(0x9900000100000008, &val))) + sysinfo_td_conf->max_vcpus_per_td = val; + if (sysinfo_td_conf->num_cpuid_config > ARRAY_SIZE(sysinfo_td_conf->cpuid_config_leaves)) + return -EINVAL; + for (i = 0; i < sysinfo_td_conf->num_cpuid_config; i++) + if (!ret && !(ret = read_sys_metadata_field(0x9900000300000400 + i, &val))) + sysinfo_td_conf->cpuid_config_leaves[i] = val; + if (sysinfo_td_conf->num_cpuid_config > ARRAY_SIZE(sysinfo_td_conf->cpuid_config_values)) + return -EINVAL; + for (i = 0; i < sysinfo_td_conf->num_cpuid_config; i++) + for (j = 0; j < 2; j++) + if (!ret && !(ret = read_sys_metadata_field(0x9900000300000500 + i * 2 + j, &val))) + sysinfo_td_conf->cpuid_config_values[i][j] = val; + + return ret; +} + static int get_tdx_sys_info(struct tdx_sys_info *sysinfo) { int ret = 0; ret = ret ?: get_tdx_sys_info_features(&sysinfo->features); ret = ret ?: get_tdx_sys_info_tdmr(&sysinfo->tdmr); + ret = ret ?: get_tdx_sys_info_td_ctrl(&sysinfo->td_ctrl); + ret = ret ?: get_tdx_sys_info_td_conf(&sysinfo->td_conf); return ret; } diff --git a/arch/x86/virt/vmx/tdx/tdx_global_metadata.h b/arch/x86/virt/vmx/tdx/tdx_global_metadata.h index 6dd3c9695f59..060a2ad744bf 100644 --- a/arch/x86/virt/vmx/tdx/tdx_global_metadata.h +++ b/arch/x86/virt/vmx/tdx/tdx_global_metadata.h @@ -17,9 +17,28 @@ struct tdx_sys_info_tdmr { u16 pamt_1g_entry_size; }; +struct tdx_sys_info_td_ctrl { + u16 tdr_base_size; + u16 tdcs_base_size; + u16 tdvps_base_size; +}; + +struct tdx_sys_info_td_conf { + u64 attributes_fixed0; + u64 attributes_fixed1; + u64 xfam_fixed0; + u64 xfam_fixed1; + u16 num_cpuid_config; + u16 max_vcpus_per_td; + u64 cpuid_config_leaves[128]; + u64 cpuid_config_values[128][2]; +}; + struct tdx_sys_info { struct tdx_sys_info_features features; struct tdx_sys_info_tdmr tdmr; + struct tdx_sys_info_td_ctrl td_ctrl; + struct tdx_sys_info_td_conf td_conf; }; #endif -- 2.51.0 From aed4dde24c8e10e579ad40d04333a13140d011fc Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 30 Oct 2024 12:00:17 -0700 Subject: [PATCH 16/16] x86/virt/tdx: Add tdx_guest_keyid_alloc/free() to alloc and free TDX guest KeyID Intel TDX protects guest VMs from malicious host and certain physical attacks. Pre-TDX Intel hardware has support for a memory encryption architecture called MK-TME, which repurposes several high bits of physical address as "KeyID". The BIOS reserves a sub-range of MK-TME KeyIDs as "TDX private KeyIDs". Each TDX guest must be assigned with a unique TDX KeyID when it is created. The kernel reserves the first TDX private KeyID for crypto-protection of specific TDX module data which has a lifecycle that exceeds the KeyID reserved for the TD's use. The rest of the KeyIDs are left for TDX guests to use. Create a small KeyID allocator. Export tdx_guest_keyid_alloc()/tdx_guest_keyid_free() to allocate and free TDX guest KeyID for KVM to use. Don't provide the stub functions when CONFIG_INTEL_TDX_HOST=n since they are not supposed to be called in this case. Signed-off-by: Isaku Yamahata Signed-off-by: Rick Edgecombe Message-ID: <20241030190039.77971-5-rick.p.edgecombe@intel.com> Acked-by: Dave Hansen Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/tdx.h | 3 +++ arch/x86/virt/vmx/tdx/tdx.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 1392f20c3f1d..75a91869453d 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -122,6 +122,9 @@ int tdx_cpu_enable(void); int tdx_enable(void); const char *tdx_dump_mce_info(struct mce *m); +int tdx_guest_keyid_alloc(void); +void tdx_guest_keyid_free(unsigned int keyid); + struct tdx_td { /* TD root structure: */ struct page *tdr_page; diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 578ad468634b..0122051af6b3 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,8 @@ static u32 tdx_global_keyid __ro_after_init; static u32 tdx_guest_keyid_start __ro_after_init; static u32 tdx_nr_guest_keyids __ro_after_init; +static DEFINE_IDA(tdx_guest_keyid_pool); + static DEFINE_PER_CPU(bool, tdx_lp_initialized); static struct tdmr_info_list tdx_tdmr_list; @@ -1459,6 +1462,20 @@ void __init tdx_init(void) check_tdx_erratum(); } +int tdx_guest_keyid_alloc(void) +{ + return ida_alloc_range(&tdx_guest_keyid_pool, tdx_guest_keyid_start, + tdx_guest_keyid_start + tdx_nr_guest_keyids - 1, + GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(tdx_guest_keyid_alloc); + +void tdx_guest_keyid_free(unsigned int keyid) +{ + ida_free(&tdx_guest_keyid_pool, keyid); +} +EXPORT_SYMBOL_GPL(tdx_guest_keyid_free); + static inline u64 tdx_tdr_pa(struct tdx_td *td) { return page_to_phys(td->tdr_page); -- 2.51.0