diff mbox

[Clock,Synchronization,2/4] Keep TSC synchronized across host suspend

Message ID 1293601100-32109-3-git-send-email-zamsden@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zachary Amsden Dec. 29, 2010, 5:38 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe39..63a82b0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -386,6 +386,7 @@  struct kvm_vcpu_arch {
 	u64 last_kernel_ns;
 	u64 last_tsc_nsec;
 	u64 last_tsc_write;
+	u64 tsc_offset_adjustment;
 	bool tsc_catchup;
 
 	bool nmi_pending;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b9118f4..b509c01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2042,12 +2042,20 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	}
 
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
+
+	/* Apply any externally detected TSC adjustments (due to suspend) */
+	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
+		kvm_x86_ops->adjust_tsc_offset(vcpu,
+			vcpu->arch.tsc_offset_adjustment);
+		vcpu->arch.tsc_offset_adjustment = 0;
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+	}
 	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
 		/* Make sure TSC doesn't go backwards */
 		s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
 				native_read_tsc() - vcpu->arch.last_host_tsc;
 		if (tsc_delta < 0)
-			mark_tsc_unstable("KVM discovered backwards TSC");
+			WARN_ON(!check_tsc_unstable());
 		if (check_tsc_unstable()) {
 			kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
 			vcpu->arch.tsc_catchup = 1;
@@ -5778,14 +5786,60 @@  int kvm_arch_hardware_enable(void *garbage)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
-	int i;
+	int i, ret;
+	u64 local_tsc;
+	u64 max_tsc = 0;
+	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	list_for_each_entry(kvm, &vm_list, vm_list)
-		kvm_for_each_vcpu(i, vcpu, kvm)
-			if (vcpu->cpu == smp_processor_id())
+	local_tsc = native_read_tsc();
+	stable = !check_tsc_unstable();
+	ret = kvm_x86_ops->hardware_enable(garbage);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (!stable && vcpu->cpu == smp_processor_id())
 				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
-	return kvm_x86_ops->hardware_enable(garbage);
+			if (stable && vcpu->arch.last_host_tsc > local_tsc) {
+				backwards_tsc = true;
+				if (vcpu->arch.last_host_tsc > max_tsc)
+					max_tsc = vcpu->arch.last_host_tsc;
+			}
+		}
+	}
+
+	/*
+	 * Sometimes, reliable TSCs go backwards.  This happens
+	 * on platforms that reset TSC during suspend or hibernate
+	 * actions, but maintain synchronization.  We must compensate.
+	 * Unfortunately, we can't bring the TSCs fully up to date
+	 * with real time.  The reason is that we aren't yet far
+	 * enough into CPU bringup that we know how much real time
+	 * has actually elapsed; our helper function, get_kernel_ns()
+	 * will be using boot variables that haven't been updated yet.
+	 * We simply find the maximum observed TSC above, then record
+	 * the adjustment to TSC in each VCPU.  When the VCPU later
+	 * gets loaded, the adjustment will be applied.  Note that we
+	 * accumulate adjustments, in case multiple suspend cycles
+	 * happen before the VCPU gets a chance to run again.
+	 *
+	 * Note that unreliable TSCs will be compensated already by
+	 * the logic in vcpu_load, which sets the TSC to catchup mode.
+	 * This will catchup all VCPUs to real time, but cannot
+	 * guarantee synchronization.
+	 */
+	if (backwards_tsc) {
+		u64 delta_cyc = max_tsc - local_tsc;
+		list_for_each_entry(kvm, &vm_list, vm_list)
+			kvm_for_each_vcpu(i, vcpu, kvm) {
+				vcpu->arch.tsc_offset_adjustment += delta_cyc;
+				vcpu->arch.last_host_tsc = 0;
+			}
+	}
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)