diff mbox

[v2] kvmclock: Ensure time in migration never goes backward

Message ID 1400253321-9239-1-git-send-email-agraf@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Graf May 16, 2014, 3:15 p.m. UTC
When we migrate we ask the kernel about its current belief on what the guest
time would be. However, I've seen cases where the kvmclock guest structure
indicates a time more recent than the kvm returned time.

To make sure we never go backwards, calculate what the guest would have seen
as time at the point of migration and use that value instead of the kernel
returned one when it's more recent.

While the underlying bug is supposedly fixed on newer KVM versions, it doesn't
hurt to base the view of the kvmclock after migration on the same foundation
in host as well as guest.

Signed-off-by: Alexander Graf <agraf@suse.de>

---

v1 -> v2:

  - always use guest structure when available
---
 hw/i386/kvm/clock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

Comments

Marcelo Tosatti May 18, 2014, 1:20 p.m. UTC | #1
On Fri, May 16, 2014 at 05:15:21PM +0200, Alexander Graf wrote:
> When we migrate we ask the kernel about its current belief on what the guest
> time would be. However, I've seen cases where the kvmclock guest structure
> indicates a time more recent than the kvm returned time.
> 
> To make sure we never go backwards, calculate what the guest would have seen
> as time at the point of migration and use that value instead of the kernel
> returned one when it's more recent.
> 
> While the underlying bug is supposedly fixed on newer KVM versions, it doesn't
> hurt to base the view of the kvmclock after migration on the same foundation
> in host as well as guest.

Remove this last phrase from the changelog please, the underlying bug is
not fixed on newer KVM versions.

Otherwise

Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 19, 2014, 11:31 a.m. UTC | #2
Il 18/05/2014 15:20, Marcelo Tosatti ha scritto:
> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>

Thanks Marcelo, applying to uq/master.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf May 21, 2014, 10:03 a.m. UTC | #3
On 19.05.14 13:31, Paolo Bonzini wrote:
> Il 18/05/2014 15:20, Marcelo Tosatti ha scritto:
>> Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Thanks Marcelo, applying to uq/master.

Same here, please also CC to stable :).


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini July 15, 2014, 7:44 p.m. UTC | #4
Il 16/05/2014 17:15, Alexander Graf ha scritto:
> When we migrate we ask the kernel about its current belief on what the guest
> time would be. However, I've seen cases where the kvmclock guest structure
> indicates a time more recent than the kvm returned time.
>
> To make sure we never go backwards, calculate what the guest would have seen
> as time at the point of migration and use that value instead of the kernel
> returned one when it's more recent.
>
> While the underlying bug is supposedly fixed on newer KVM versions, it doesn't
> hurt to base the view of the kvmclock after migration on the same foundation
> in host as well as guest.
>
> Signed-off-by: Alexander Graf <agraf@suse.de>
>
> ---
>
> v1 -> v2:
>
>   - always use guest structure when available
> ---
>  hw/i386/kvm/clock.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
>
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 892aa02..6f4ed28a 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -14,6 +14,7 @@
>   */
>
>  #include "qemu-common.h"
> +#include "qemu/host-utils.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/kvm.h"
>  #include "hw/sysbus.h"
> @@ -34,6 +35,47 @@ typedef struct KVMClockState {
>      bool clock_valid;
>  } KVMClockState;
>
> +struct pvclock_vcpu_time_info {
> +    uint32_t   version;
> +    uint32_t   pad0;
> +    uint64_t   tsc_timestamp;
> +    uint64_t   system_time;
> +    uint32_t   tsc_to_system_mul;
> +    int8_t     tsc_shift;
> +    uint8_t    flags;
> +    uint8_t    pad[2];
> +} __attribute__((__packed__)); /* 32 bytes */
> +
> +static uint64_t kvmclock_current_nsec(KVMClockState *s)
> +{
> +    CPUState *cpu = first_cpu;
> +    CPUX86State *env = cpu->env_ptr;
> +    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
> +    uint64_t migration_tsc = env->tsc;
> +    struct pvclock_vcpu_time_info time;
> +    uint64_t delta;
> +    uint64_t nsec_lo;
> +    uint64_t nsec_hi;
> +    uint64_t nsec;
> +
> +    if (!(env->system_time_msr & 1ULL)) {
> +        /* KVM clock not active */
> +        return 0;
> +    }
> +
> +    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
> +
> +    delta = migration_tsc - time.tsc_timestamp;
> +    if (time.tsc_shift < 0) {
> +        delta >>= -time.tsc_shift;
> +    } else {
> +        delta <<= time.tsc_shift;
> +    }
> +
> +    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
> +    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
> +    return nsec + time.system_time;
> +}
>
>  static void kvmclock_vm_state_change(void *opaque, int running,
>                                       RunState state)
> @@ -45,9 +87,15 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>
>      if (running) {
>          struct kvm_clock_data data;
> +        uint64_t time_at_migration = kvmclock_current_nsec(s);
>
>          s->clock_valid = false;
>
> +	/* We can't rely on the migrated clock value, just discard it */
> +	if (time_at_migration) {
> +	        s->clock = time_at_migration;
> +	}
> +
>          data.clock = s->clock;
>          data.flags = 0;
>          ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
>

I'm going to revert this patch for 2.1-rc3, since the dependent patch 
"kvmclock: Ensure proper env->tsc value for kvmclock_current_nsec 
calculation" causes a hang during migration.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 892aa02..6f4ed28a 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -14,6 +14,7 @@ 
  */
 
 #include "qemu-common.h"
+#include "qemu/host-utils.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "hw/sysbus.h"
@@ -34,6 +35,47 @@  typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUState *cpu = first_cpu;
+    CPUX86State *env = cpu->env_ptr;
+    hwaddr kvmclock_struct_pa = env->system_time_msr & ~1ULL;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+
+    cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time));
+
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
 
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
@@ -45,9 +87,15 @@  static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data;
+        uint64_t time_at_migration = kvmclock_current_nsec(s);
 
         s->clock_valid = false;
 
+	/* We can't rely on the migrated clock value, just discard it */
+	if (time_at_migration) {
+	        s->clock = time_at_migration;
+	}
+
         data.clock = s->clock;
         data.flags = 0;
         ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);