diff mbox series

[2/2] hw/i386/kvm/clock.c: read kvmclock from guest memory if !correct_tsc_shift

Message ID 20230120011412.558538345@redhat.com (mailing list archive)
State New, archived
Headers show
Series read kvmclock from guest memory if !correct_tsc_shift | expand

Commit Message

Marcelo Tosatti Jan. 20, 2023, 1:11 a.m. UTC
Before kernel commit 78db6a5037965429c04d708281f35a6e5562d31b,
kvm_guest_time_update() would use vcpu->virtual_tsc_khz to calculate
tsc_shift value in the vcpus pvclock structure written to guest memory.

For those kernels, if vcpu->virtual_tsc_khz != tsc_khz (which can be the
case when guest state is restored via migration, or if tsc-khz option is
passed to QEMU), and TSC scaling is not enabled (which happens if the
difference between the frequency requested via KVM_SET_TSC_KHZ and the
host TSC KHZ is smaller than 250ppm), then there can be a difference
between what KVM_GET_CLOCK would return and what the guest reads as
kvmclock value.

The effect is that the guest sees a jump in kvmclock value
(either forwards or backwards) in such case.

To fix incoming migration from pre-78db6a5037965 hosts, 
read kvmclock value from guest memory.

Unless the KVM_CLOCK_CORRECT_TSC_SHIFT bit indicates
that the value retrieved by KVM_GET_CLOCK on the source
is safe to be used.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff mbox series

Patch

Index: qemu/hw/i386/kvm/clock.c
===================================================================
--- qemu.orig/hw/i386/kvm/clock.c
+++ qemu/hw/i386/kvm/clock.c
@@ -50,6 +50,16 @@  struct KVMClockState {
     /* whether the 'clock' value was obtained in a host with
      * reliable KVM_GET_CLOCK */
     bool clock_is_reliable;
+
+    /* whether machine type supports correct_tsc_shift */
+    bool mach_use_correct_tsc_shift;
+
+    /*
+     * whether the 'clock' value was obtained in a host
+     * that computes correct tsc_shift field (the one
+     * written to guest memory)
+     */
+    bool clock_correct_tsc_shift;
 };
 
 struct pvclock_vcpu_time_info {
@@ -150,6 +160,8 @@  static void kvm_update_clock(KVMClockSta
      *               read from memory
      */
     s->clock_is_reliable = kvm_has_adjust_clock_stable();
+
+    s->clock_correct_tsc_shift = kvm_has_correct_tsc_shift();
 }
 
 static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data)
@@ -176,7 +188,7 @@  static void kvmclock_vm_state_change(voi
          * If the host where s->clock was read did not support reliable
          * KVM_GET_CLOCK, read kvmclock value from memory.
          */
-        if (!s->clock_is_reliable) {
+        if (!s->clock_is_reliable || !s->clock_correct_tsc_shift) {
             uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
             /* We can't rely on the saved clock value, just discard it */
             if (pvclock_via_mem) {
@@ -252,14 +264,40 @@  static const VMStateDescription kvmclock
 };
 
 /*
+ * Sending clock_correct_tsc_shift=true means that the destination
+ * can use VMSTATE_UINT64(clock, KVMClockState) value,
+ * instead of reading from guest memory.
+ */
+static bool kvmclock_clock_correct_tsc_shift_needed(void *opaque)
+{
+    KVMClockState *s = opaque;
+
+    return s->mach_use_correct_tsc_shift;
+}
+
+static const VMStateDescription kvmclock_correct_tsc_shift = {
+    .name = "kvmclock/clock_correct_tsc_shift",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = kvmclock_clock_correct_tsc_shift_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(clock_correct_tsc_shift, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/*
  * When migrating, assume the source has an unreliable
- * KVM_GET_CLOCK unless told otherwise.
+ * KVM_GET_CLOCK (and computes tsc shift
+ * in guest memory using vcpu->virtual_tsc_khz),
+ * unless told otherwise.
  */
 static int kvmclock_pre_load(void *opaque)
 {
     KVMClockState *s = opaque;
 
     s->clock_is_reliable = false;
+    s->clock_correct_tsc_shift = false;
 
     return 0;
 }
@@ -301,6 +339,7 @@  static const VMStateDescription kvmclock
     },
     .subsections = (const VMStateDescription * []) {
         &kvmclock_reliable_get_clock,
+        &kvmclock_correct_tsc_shift,
         NULL
     }
 };
@@ -308,6 +347,8 @@  static const VMStateDescription kvmclock
 static Property kvmclock_properties[] = {
     DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
                       mach_use_reliable_get_clock, true),
+    DEFINE_PROP_BOOL("x-mach-use-correct-tsc-shift", KVMClockState,
+                      mach_use_correct_tsc_shift, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
Index: qemu/target/i386/kvm/kvm.c
===================================================================
--- qemu.orig/target/i386/kvm/kvm.c
+++ qemu/target/i386/kvm/kvm.c
@@ -164,6 +164,13 @@  bool kvm_has_adjust_clock_stable(void)
     return (ret & KVM_CLOCK_TSC_STABLE);
 }
 
+bool kvm_has_correct_tsc_shift(void)
+{
+    int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
+
+    return ret & KVM_CLOCK_CORRECT_TSC_SHIFT;
+}
+
 bool kvm_has_adjust_clock(void)
 {
     return kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
Index: qemu/target/i386/kvm/kvm_i386.h
===================================================================
--- qemu.orig/target/i386/kvm/kvm_i386.h
+++ qemu/target/i386/kvm/kvm_i386.h
@@ -35,6 +35,7 @@ 
 bool kvm_has_smm(void);
 bool kvm_has_adjust_clock(void);
 bool kvm_has_adjust_clock_stable(void);
+bool kvm_has_correct_tsc_shift(void);
 bool kvm_has_exception_payload(void);
 void kvm_synchronize_all_tsc(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);