diff mbox

[RFC] migration: kvmclock: save and load the PVCLOCK_TSC_UNSTABLE_BIT flag when migration

Message ID 1497274251-26448-1-git-send-email-jianjay.zhou@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhoujian (jay) June 12, 2017, 1:30 p.m. UTC
Guest using kvmclock will be hanged when migrating from unstable
tsc host to stable tsc host occasionally.
Sometimes, the tsc timestamp saved at the source side will be
backward when the guest stopped, and this value is transferred
to the destination side. The guest at the destination side thought
kvmclock is stable, so the protection mechanism against time
going backwards is not used.
When the first time vcpu0 enters the guest at the destination
side to update the wall clock, the result of
pvclock_clocksource_read will be backward occasionally,
which results in the wall clock drift.

Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com>
---
 hw/i386/kvm/clock.c         | 12 ++++++++++--
 linux-headers/asm-x86/kvm.h |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

Comments

Radim Krčmář June 12, 2017, 8:03 p.m. UTC | #1
2017-06-12 21:30+0800, Jay Zhou:
> Guest using kvmclock will be hanged when migrating from unstable
> tsc host to stable tsc host occasionally.
> Sometimes, the tsc timestamp saved at the source side will be
> backward when the guest stopped, and this value is transferred
> to the destination side. The guest at the destination side thought
> kvmclock is stable, so the protection mechanism against time
> going backwards is not used.
> When the first time vcpu0 enters the guest at the destination
> side to update the wall clock, the result of
> pvclock_clocksource_read will be backward occasionally,
> which results in the wall clock drift.
> 
> Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com>
> ---
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> +        uint8_t flags_at_migration;
>  
>          /*
>           * If the host where s->clock was read did not support reliable
>           * KVM_GET_CLOCK, read kvmclock value from memory.
>           */
>          if (!s->clock_is_reliable) {

'clock_is_reliable = true' on all newer KVMs (v4.9+), so I don't see a
reason to add a feature that can't be used.

> -            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s,
> +                                                    &flags_at_migration);

kvmclock_current_nsec() was introduced to work around the problem with
backward time, so we should understand why it returns a time that is
backwards if we want to do something for old KVMs ...

Is pvclock_via_mem < s->clock?

Thanks.

>              /* We can't rely on the saved clock value, just discard it */
>              if (pvclock_via_mem) {
>                  s->clock = pvclock_via_mem;
> +                /* whether src kvmclock has PVCLOCK_TSC_STABLE_BIT flag */
> +                if (!(flags_at_migration & PVCLOCK_TSC_STABLE_BIT)) {
> +                    data.flags |= MIGRATION_PVCLOCK_TSC_UNSTABLE_BIT;
> +                }
>              }
>          }
Zhoujian (jay) June 13, 2017, 1:41 p.m. UTC | #2
Hi Radim,

On 2017/6/13 4:03, Radim Krčmář wrote:
> 2017-06-12 21:30+0800, Jay Zhou:
>> Guest using kvmclock will be hanged when migrating from unstable
>> tsc host to stable tsc host occasionally.
>> Sometimes, the tsc timestamp saved at the source side will be
>> backward when the guest stopped, and this value is transferred
>> to the destination side. The guest at the destination side thought
>> kvmclock is stable, so the protection mechanism against time
>> going backwards is not used.
>> When the first time vcpu0 enters the guest at the destination
>> side to update the wall clock, the result of
>> pvclock_clocksource_read will be backward occasionally,
>> which results in the wall clock drift.
>>
>> Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com>
>> ---
>> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
>>
>>       if (running) {
>>           struct kvm_clock_data data = {};
>> +        uint8_t flags_at_migration;
>>
>>           /*
>>            * If the host where s->clock was read did not support reliable
>>            * KVM_GET_CLOCK, read kvmclock value from memory.
>>            */
>>           if (!s->clock_is_reliable) {
>
> 'clock_is_reliable = true' on all newer KVMs (v4.9+), so I don't see a
> reason to add a feature that can't be used.

After rereading the codes, yes, I agree. Sorry to disturb.

>
>> -            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
>> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s,
>> +                                                    &flags_at_migration);
>
> kvmclock_current_nsec() was introduced to work around the problem with
> backward time, so we should understand why it returns a time that is
> backwards if we want to do something for old KVMs ...
>
> Is pvclock_via_mem < s->clock?

Please see the other thread I replied to you when the bug occured.


Regards,
Jay Zhou
diff mbox

Patch

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 13eca37..fcc4f62 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -29,6 +29,7 @@ 
 
 #define TYPE_KVM_CLOCK "kvmclock"
 #define KVM_CLOCK(obj) OBJECT_CHECK(KVMClockState, (obj), TYPE_KVM_CLOCK)
+#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
 
 typedef struct KVMClockState {
     /*< private >*/
@@ -57,7 +58,7 @@  struct pvclock_vcpu_time_info {
     uint8_t    pad[2];
 } __attribute__((__packed__)); /* 32 bytes */
 
-static uint64_t kvmclock_current_nsec(KVMClockState *s)
+static uint64_t kvmclock_current_nsec(KVMClockState *s, uint8_t *flags)
 {
     CPUState *cpu = first_cpu;
     CPUX86State *env = cpu->env_ptr;
@@ -77,6 +78,7 @@  static uint64_t kvmclock_current_nsec(KVMClockState *s)
     cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
 
     assert(time.tsc_timestamp <= migration_tsc);
+    *flags = time.flags;
     delta = migration_tsc - time.tsc_timestamp;
     if (time.tsc_shift < 0) {
         delta >>= -time.tsc_shift;
@@ -153,16 +155,22 @@  static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data = {};
+        uint8_t flags_at_migration;
 
         /*
          * If the host where s->clock was read did not support reliable
          * KVM_GET_CLOCK, read kvmclock value from memory.
          */
         if (!s->clock_is_reliable) {
-            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+            uint64_t pvclock_via_mem = kvmclock_current_nsec(s,
+                                                    &flags_at_migration);
             /* We can't rely on the saved clock value, just discard it */
             if (pvclock_via_mem) {
                 s->clock = pvclock_via_mem;
+                /* whether src kvmclock has PVCLOCK_TSC_STABLE_BIT flag */
+                if (!(flags_at_migration & PVCLOCK_TSC_STABLE_BIT)) {
+                    data.flags |= MIGRATION_PVCLOCK_TSC_UNSTABLE_BIT;
+                }
             }
         }
 
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index c2824d0..9faed3e 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -360,4 +360,6 @@  struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
+#define MIGRATION_PVCLOCK_TSC_UNSTABLE_BIT (1 << 0)
+
 #endif /* _ASM_X86_KVM_H */