From: David Woodhouse Date: Thu, 18 Jan 2024 12:57:30 +0000 (+0000) Subject: KVM: x86: Fix systemic drift in __get_kvmclock() with TSC scaling X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=refs%2Fheads%2Fkvmclock2;p=users%2Fdwmw2%2Flinux.git KVM: x86: Fix systemic drift in __get_kvmclock() with TSC scaling Converting a host TSC delta directly to nanoseconds results in a systemic drift compared to the way the guest calculates its kvmclock. Provide a __kvmclock_from_tsc() function which calculates the kvmclock just as the guest would, by scaling to the guest TSC first and from that to nanoseconds. Signed-off-by: David Woodhouse --- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7bc1daf687417..bf9a36ec64407 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1347,6 +1347,7 @@ struct kvm_arch { u64 last_tsc_nsec; u64 last_tsc_write; u32 last_tsc_khz; + u64 last_tsc_scaling_ratio; u64 last_tsc_offset; u64 cur_tsc_nsec; u64 cur_tsc_write; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 368c3bc400eb1..8e97c47bf1466 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2682,6 +2682,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc, kvm->arch.last_tsc_nsec = ns; kvm->arch.last_tsc_write = tsc; kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; + kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; kvm->arch.last_tsc_offset = offset; vcpu->arch.last_guest_tsc = tsc; @@ -3104,11 +3105,53 @@ static unsigned long get_cpu_tsc_khz(void) return __this_cpu_read(cpu_tsc_khz); } +static uint64_t __kvmclock_from_tsc(struct kvm *kvm, uint64_t host_tsc) +{ + struct pvclock_vcpu_time_info hv_clock; + struct kvm_arch *ka = &kvm->arch; + uint64_t guest_tsc; + uint32_t tsc_khz; + + /* + * Scale the TSC delta (at host TSC frequency) to the guest TSC + * TSC frequency, before scaling that to nanoseconds. Performing + * this two-step calculation in precisely the same way the guest + * does, avoids a systemic drift due to loss of precision in the + * conversions. + * + * Use the frequency of the last-synchronized TSC by preference, + * since the scaling ratio is already calculated for that. Fall + * back to using the default TSC frequency for the KVM (for which + * the ratio must be calculated which takes a little more time), + * and finally just use the host TSC frequency if neither of the + * former were set. + */ + if ((tsc_khz = ka->last_tsc_khz) && ka->last_tsc_scaling_ratio) { + guest_tsc = kvm_scale_tsc(host_tsc - ka->master_cycle_now, + ka->last_tsc_scaling_ratio); + } else if ((tsc_khz = READ_ONCE(ka->default_tsc_khz))) { + uint64_t ratio = mul_u64_u32_div(1ULL << kvm_caps.tsc_scaling_ratio_frac_bits, + tsc_khz, get_cpu_tsc_khz()); + + guest_tsc = kvm_scale_tsc(host_tsc - ka->master_cycle_now, + ratio); + } else { + guest_tsc = host_tsc - ka->master_cycle_now; + tsc_khz = get_cpu_tsc_khz(); + } + + hv_clock.tsc_timestamp = 0; + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; + kvm_get_time_scale(NSEC_PER_SEC, tsc_khz * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + return __pvclock_read_cycles(&hv_clock, guest_tsc); + +} /* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) { struct kvm_arch *ka = &kvm->arch; - struct pvclock_vcpu_time_info hv_clock; /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); @@ -3127,12 +3170,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) data->host_tsc = rdtsc(); data->flags |= KVM_CLOCK_TSC_STABLE; - hv_clock.tsc_timestamp = ka->master_cycle_now; - hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc); + data->clock = __kvmclock_from_tsc(kvm, data->host_tsc); } else { data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset; }