diff mbox

[RFC:,tsc,virtualization,06/20] Make TSC reference stable across frequency changes

Message ID 1260850127-9766-7-git-send-email-zamsden@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zachary Amsden Dec. 15, 2009, 4:08 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4c4d2e0..144a820 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -741,9 +741,15 @@  static DEFINE_PER_CPU(unsigned long, cpu_tsc_multiplier);
 static DEFINE_PER_CPU(int, cpu_tsc_shift);
 static DEFINE_PER_CPU(s64, cpu_tsc_offset);
 static DEFINE_PER_CPU(u64, cpu_tsc_measure_base);
+static DEFINE_PER_CPU(atomic_t, cpu_tsc_synchronized);
 static int tsc_base_cpu = -1;
 static unsigned long ref_tsc_khz;
 
+static inline int cpu_is_tsc_synchronized(int cpu)
+{
+	return (atomic_read(&per_cpu(cpu_tsc_synchronized, cpu)) != 0);
+}
+
 static inline unsigned long div_precise(unsigned long hi, unsigned long lo,
 	unsigned long divisor, unsigned long *rptr)
 {
@@ -923,6 +929,7 @@  static void kvm_sync_tsc(void *cpup)
 			accumulator -= delta[i+SYNC_TRIES];
 		accumulator = accumulator / (SYNC_TRIES*2-12);
 		per_cpu(cpu_tsc_offset, new_cpu) = accumulator;
+		atomic_set(&per_cpu(cpu_tsc_synchronized, new_cpu), 1);
 		pr_debug("%s: OUT, cpu = %d, cpu_tsc_offset = %lld, cpu_tsc_multiplier=%ld, cpu_tsc_shift=%d\n", __func__, raw_smp_processor_id(), per_cpu(cpu_tsc_offset, new_cpu), per_cpu(cpu_tsc_multiplier, new_cpu), per_cpu(cpu_tsc_shift, new_cpu));
 	}
 	local_irq_restore(flags);
@@ -931,6 +938,11 @@  static void kvm_sync_tsc(void *cpup)
 static void kvm_do_sync_tsc(int cpu)
 {
 	spin_lock(&kvm_tsc_lock);
+
+	/* tsc_base_cpu can change without tsc_lock, so recheck */
+	if (unlikely(cpu == tsc_base_cpu))
+		goto out_unlock;
+
 	if (raw_smp_processor_id() != tsc_base_cpu) {
 		smp_call_function_single(tsc_base_cpu, kvm_sync_tsc,
 					 (void *)&cpu, 0);
@@ -940,6 +952,8 @@  static void kvm_do_sync_tsc(int cpu)
 		smp_call_function_single(tsc_base_cpu, kvm_sync_tsc,
 					 (void *)&cpu, 1);
 	}
+
+out_unlock:
 	spin_unlock(&kvm_tsc_lock);
 }
 
@@ -1656,7 +1670,6 @@  out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
-	BUG_ON(per_cpu(cpu_tsc_khz, cpu) == 0);
 	kvm_request_guest_time_update(vcpu);
 }
 
@@ -3461,11 +3474,46 @@  int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
 
-static void bounce_off(void *info)
+static void evict(void *info)
 {
 	/* nothing */
 }
 
+static struct execute_work resync_work;
+static int work_scheduled;
+
+static void resync_user(struct work_struct *work)
+{
+	int cpu;
+
+	work_scheduled = 0;
+	for_each_online_cpu(cpu)
+		if (cpu != tsc_base_cpu)
+			kvm_do_sync_tsc(cpu);
+}
+
+static void resync(void *info)
+{
+	int cpu;
+	u64 tsc;
+
+	/* Fixup our own values to stay in sync with the reference */
+	cpu = raw_smp_processor_id();
+	tsc = compute_ref_tsc(cpu);
+	per_cpu(cpu_tsc_measure_base, cpu) = native_read_tsc();
+	per_cpu(cpu_tsc_offset, cpu) = tsc;
+	compute_best_multiplier(ref_tsc_khz, per_cpu(cpu_tsc_khz, cpu),
+			&per_cpu(cpu_tsc_multiplier, cpu),
+			&per_cpu(cpu_tsc_shift, cpu));
+	atomic_set(&per_cpu(cpu_tsc_synchronized, cpu), 1);
+
+	/* Then, get everybody else on board */
+	if (!work_scheduled) {
+		work_scheduled = 1;
+		execute_in_process_context(resync_user, &resync_work);
+	}
+}
+
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				     void *data)
 {
@@ -3474,39 +3522,68 @@  static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 	struct kvm_vcpu *vcpu;
 	int i, send_ipi = 0;
 
-	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
-		return 0;
-	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
-		return 0;
-	per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
-
-	spin_lock(&kvm_lock);
-	list_for_each_entry(kvm, &vm_list, vm_list) {
-		kvm_for_each_vcpu(i, vcpu, kvm) {
-			if (vcpu->cpu != freq->cpu)
-				continue;
-			if (!kvm_request_guest_time_update(vcpu))
-				continue;
-			if (vcpu->cpu != smp_processor_id())
-				send_ipi++;
+	/*
+	 * There is no way to precisely know the TSC value at which time the
+	 * CPU frequency actually changed, and these callbacks may happen at
+	 * different times, thus there will be drift in the reference TSC
+	 * clock across all CPUs.  To avoid this problem, we forcibly evict
+	 * any CPUs which may be running in hardware virtualization.
+	 *
+	 * We do this by setting cpu_tsc_synchronized to zero and polling for
+	 * this value to change when entering hardware virtualization.
+	 */
+	if (val == CPUFREQ_PRECHANGE) {
+		get_online_cpus();
+		atomic_set(&per_cpu(cpu_tsc_synchronized, freq->cpu), 0);
+		spin_lock(&kvm_lock);
+		list_for_each_entry(kvm, &vm_list, vm_list) {
+			kvm_for_each_vcpu(i, vcpu, kvm) {
+				if (vcpu->cpu != freq->cpu)
+					continue;
+				if (vcpu->cpu != smp_processor_id())
+					send_ipi++;
+				kvm_request_guest_time_update(vcpu);
+			}
+		}
+		spin_unlock(&kvm_lock);
+		if (send_ipi) {
+			/*
+			 * In case we update the frequency for another cpu
+			 * (which might be in guest context) send an interrupt
+			 * to kick the cpu out of guest context.  Next time
+			 * guest context is entered kvmclock will be updated,
+			 * so the guest will not see stale values.
+			 */
+			smp_call_function_single(freq->cpu, evict, NULL, 1);
 		}
-	}
-	spin_unlock(&kvm_lock);
 
-	if (freq->old < freq->new && send_ipi) {
 		/*
-		 * We upscale the frequency.  Must make the guest
-		 * doesn't see old kvmclock values while running with
-		 * the new frequency, otherwise we risk the guest sees
-		 * time go backwards.
-		 *
-		 * In case we update the frequency for another cpu
-		 * (which might be in guest context) send an interrupt
-		 * to kick the cpu out of guest context.  Next time
-		 * guest context is entered kvmclock will be updated,
-		 * so the guest will not see stale values.
+		 * The update of the frequency can't happen while a VM
+		 * is running, nor can it happen during init when we can
+		 * race against the init code setting the first known freq.
+		 * Just use the vm_tsc_lock for a mutex.
 		 */
-		smp_call_function_single(freq->cpu, bounce_off, NULL, 1);
+		spin_lock(&kvm_tsc_lock);
+		per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
+		spin_unlock(&kvm_tsc_lock);
+
+		return 0;
+	}
+
+	/*
+	 * After the change, we must resynchronize affected CPUs.
+	 * If the master reference changes, that means all CPUs.
+	 * Note that some CPUs may be resynchronized twice, once
+	 * by the master, and once by themselves, depending on the
+	 * order in which this notifier is called; this is harmless.
+	 */
+	if (val == CPUFREQ_POSTCHANGE) {
+		if (freq->cpu == tsc_base_cpu)
+			smp_call_function_single(freq->cpu, resync, NULL, 1);
+		else if (cpu_is_tsc_synchronized(tsc_base_cpu))
+			/* Can't do this if base is not yet updated */
+			kvm_do_sync_tsc(freq->cpu);
+		put_online_cpus();
 	}
 	return 0;
 }
@@ -3536,8 +3613,7 @@  static int kvm_x86_cpu_hotplug(struct notifier_block *notifier,
 
 	case CPU_DYING:
 	case CPU_UP_CANCELED:
-		if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
-			per_cpu(cpu_tsc_khz, cpu) = 0;
+		atomic_set(&per_cpu(cpu_tsc_synchronized, cpu), 0);
 		break;
 
 	case CPU_ONLINE:
@@ -3590,6 +3666,12 @@  static void kvm_timer_init(void)
 		}
 	}
 #endif
+
+	/*
+	 * Register notifiers for both CPU add / remove and CPU frequency
+	 * change.  Must be careful to avoid subtle races, as frequency
+	 * can change at any time.
+	 */
 	register_cpu_notifier(&kvm_x86_cpu_notifier);
 	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
@@ -3598,7 +3680,10 @@  static void kvm_timer_init(void)
 			unsigned long khz = cpufreq_get(cpu);
 			if (!khz)
 				khz = tsc_khz;
-			per_cpu(cpu_tsc_khz, cpu) = khz;
+			spin_lock(&kvm_tsc_lock);
+			if (!per_cpu(cpu_tsc_khz, cpu))
+				per_cpu(cpu_tsc_khz, cpu) = khz;
+			spin_unlock(&kvm_tsc_lock);
 		}
 	} else {
 		for_each_possible_cpu(cpu)
@@ -3606,12 +3691,7 @@  static void kvm_timer_init(void)
 	}
 	tsc_base_cpu = get_cpu();
 	ref_tsc_khz = per_cpu(cpu_tsc_khz, tsc_base_cpu);
-	per_cpu(cpu_tsc_multiplier, tsc_base_cpu) = 1;
-	per_cpu(cpu_tsc_shift, tsc_base_cpu) = 0;
-	per_cpu(cpu_tsc_offset, tsc_base_cpu) = 0;
-	for_each_online_cpu(cpu)
-		if (cpu != tsc_base_cpu)
-			kvm_do_sync_tsc(cpu);
+	resync(NULL);
 	put_cpu();
 }
 
@@ -4097,7 +4177,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	clear_bit(KVM_REQ_KICK, &vcpu->requests);
 	smp_mb__after_clear_bit();
 
-	if (vcpu->requests || need_resched() || signal_pending(current)) {
+	if (vcpu->requests || need_resched() || signal_pending(current) ||
+	    !cpu_is_tsc_synchronized(smp_processor_id())) {
 		set_bit(KVM_REQ_KICK, &vcpu->requests);
 		local_irq_enable();
 		preempt_enable();