]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
KVM: x86: Fix systemic drift in __get_kvmclock() with TSC scaling kvmclock2
authorDavid Woodhouse <dwmw@amazon.co.uk>
Thu, 18 Jan 2024 12:57:30 +0000 (12:57 +0000)
committerDavid Woodhouse <dwmw@amazon.co.uk>
Wed, 31 Jan 2024 00:18:01 +0000 (00:18 +0000)
Converting a host TSC delta directly to nanoseconds results in a systemic
drift compared to the way the guest calculates its kvmclock.

Provide a __kvmclock_from_tsc() function which calculates the kvmclock
just as the guest would, by scaling to the guest TSC first and from that
to nanoseconds.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/x86.c

index 7bc1daf6874174d5102d91eee51e4f860d1c0c1a..bf9a36ec64407abdb69380362862ba97b112a2da 100644 (file)
@@ -1347,6 +1347,7 @@ struct kvm_arch {
        u64 last_tsc_nsec;
        u64 last_tsc_write;
        u32 last_tsc_khz;
+       u64 last_tsc_scaling_ratio;
        u64 last_tsc_offset;
        u64 cur_tsc_nsec;
        u64 cur_tsc_write;
index 368c3bc400eb1bd39fc6054e17ea942baf4cd48c..8e97c47bf1466a029191cea21f4badd31918ee9a 100644 (file)
@@ -2682,6 +2682,7 @@ static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
        kvm->arch.last_tsc_nsec = ns;
        kvm->arch.last_tsc_write = tsc;
        kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
+       kvm->arch.last_tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
        kvm->arch.last_tsc_offset = offset;
 
        vcpu->arch.last_guest_tsc = tsc;
@@ -3104,11 +3105,53 @@ static unsigned long get_cpu_tsc_khz(void)
                return __this_cpu_read(cpu_tsc_khz);
 }
 
+static uint64_t __kvmclock_from_tsc(struct kvm *kvm, uint64_t host_tsc)
+{
+       struct pvclock_vcpu_time_info hv_clock;
+       struct kvm_arch *ka = &kvm->arch;
+       uint64_t guest_tsc;
+       uint32_t tsc_khz;
+
+       /*
+        * Scale the TSC delta (at host TSC frequency) to the guest TSC
+        * TSC frequency, before scaling that to nanoseconds. Performing
+        * this two-step calculation in precisely the same way the guest
+        * does, avoids a systemic drift due to loss of precision in the
+        * conversions.
+        *
+        * Use the frequency of the last-synchronized TSC by preference,
+        * since the scaling ratio is already calculated for that. Fall
+        * back to using the default TSC frequency for the KVM (for which
+        * the ratio must be calculated which takes a little more time),
+        * and finally just use the host TSC frequency if neither of the
+        * former were set.
+        */
+       if ((tsc_khz = ka->last_tsc_khz) && ka->last_tsc_scaling_ratio) {
+               guest_tsc = kvm_scale_tsc(host_tsc - ka->master_cycle_now,
+                                         ka->last_tsc_scaling_ratio);
+       } else if ((tsc_khz = READ_ONCE(ka->default_tsc_khz))) {
+               uint64_t ratio = mul_u64_u32_div(1ULL << kvm_caps.tsc_scaling_ratio_frac_bits,
+                                                tsc_khz, get_cpu_tsc_khz());
+
+               guest_tsc = kvm_scale_tsc(host_tsc - ka->master_cycle_now,
+                                         ratio);
+       } else {
+               guest_tsc = host_tsc - ka->master_cycle_now;
+               tsc_khz = get_cpu_tsc_khz();
+       }
+
+       hv_clock.tsc_timestamp = 0;
+       hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
+       kvm_get_time_scale(NSEC_PER_SEC, tsc_khz * 1000LL,
+                          &hv_clock.tsc_shift,
+                          &hv_clock.tsc_to_system_mul);
+       return __pvclock_read_cycles(&hv_clock, guest_tsc);
+
+}
 /* Called within read_seqcount_begin/retry for kvm->pvclock_sc.  */
 static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 {
        struct kvm_arch *ka = &kvm->arch;
-       struct pvclock_vcpu_time_info hv_clock;
 
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
@@ -3127,12 +3170,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
                data->host_tsc = rdtsc();
 
                data->flags |= KVM_CLOCK_TSC_STABLE;
-               hv_clock.tsc_timestamp = ka->master_cycle_now;
-               hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-               kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
-                                  &hv_clock.tsc_shift,
-                                  &hv_clock.tsc_to_system_mul);
-               data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
+               data->clock = __kvmclock_from_tsc(kvm, data->host_tsc);
        } else {
                data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
        }