diff mbox

[RFC:,tsc,virtualization,17/20] Periodically measure TSC skew

Message ID 1260850127-9766-18-git-send-email-zamsden@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zachary Amsden Dec. 15, 2009, 4:08 a.m. UTC
None
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 792c895..3a854ec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -750,9 +750,10 @@  struct cpu_tsc_vars
 	u64			last_ref;
 };
 static DEFINE_PER_CPU(struct cpu_tsc_vars, cpu_tsc_vars);
-
 static int tsc_base_cpu = -1;
 static unsigned long ref_tsc_khz;
+static u64 tsc_drift;
+static struct timer_list resync_timer;
 
 static inline int cpu_is_tsc_synchronized(int cpu)
 {
@@ -935,6 +936,7 @@  static void sync_tsc_helper(int measure_cpu, s64 *delta, atomic_t *ready)
  * Average and trim the samples of any outliers; we use > 2 x sigma
  */
 static u64 tsc_deviation;
+static u64 tsc_skew;
 static s64 average_samples(s64 *samples, unsigned num_samples)
 {
 	unsigned i, j;
@@ -993,10 +995,24 @@  static void kvm_sync_tsc(void *cpup)
 	s64 *delta1, *delta2;
 	static atomic_t ready ____cacheline_aligned = ATOMIC_INIT(1);
 	struct cpu_tsc_vars *cv = &per_cpu(cpu_tsc_vars, new_cpu);
+	static u64 old_base;
+	static s64 old_offset;
+	static unsigned long old_multiplier;
+	static unsigned int old_shift;
 
 	BUG_ON(tsc_base_cpu == -1);
 	local_irq_save(flags);
+
+	/*
+	 * First, the new CPU may be just coming up to sync or might have
+	 * changed frequency, which means the measurement base must be
+	 * adjusted.  If not, we can use it to compute a skew estimate.
+	 */
 	if (raw_smp_processor_id() == new_cpu) {
+		old_multiplier = cv->tsc_multiplier;
+		old_shift = cv->tsc_shift;
+		old_base = cv->tsc_measure_base;
+		old_offset = cv->tsc_offset;
 		cv->tsc_measure_base = native_read_tsc();
 		cv->tsc_offset = 0;
 		compute_best_multiplier(ref_tsc_khz, cv->tsc_khz,
@@ -1005,10 +1021,12 @@  static void kvm_sync_tsc(void *cpup)
 			 " tsc_base_cpu = %d\n", __func__, new_cpu, cv->tsc_khz,
 			 cv->tsc_measure_base, tsc_base_cpu);
 	}
+
 	delta1 = per_cpu(delta_array, tsc_base_cpu).delta;
 	delta2 = per_cpu(delta_array, new_cpu).delta;
 	sync_tsc_helper(tsc_base_cpu, delta1, &ready);
 	sync_tsc_helper(new_cpu, delta2, &ready);
+
 	if (raw_smp_processor_id() == new_cpu) {
 		s64 accumulator = 0;
 
@@ -1024,8 +1042,40 @@  static void kvm_sync_tsc(void *cpup)
 		accumulator += average_samples(&delta1[2], SYNC_TRIES-3);
 		accumulator -= average_samples(&delta2[2], SYNC_TRIES-3);
 		accumulator /= 2;
-
 		cv->tsc_offset = accumulator;
+
+		/*
+		 * Skew can be computed over a constant multiplier as follows:
+		 *
+		 * ref_new = (tsc_new - base_new) * mult + off_new
+		 * ref_old = (tsc_old - base_old) * mult + off_old
+		 *
+		 * skew = ref_new - (ref_old + delta_ref)
+		 *
+		 * skew = off_new - off_old + mult(tsc_new - tsc_old)
+		 *                - mult(base_new - base_old) - delta_ref
+		 *
+		 * The tsc_old / tsc_new values are not recoverable, but
+		 * observe that mult(tsc_new - tsc_old) == delta_ref, so
+		 *
+		 *    skew = delta(off) - mult(delta base)
+		 *
+		 * To avoid problems with signed computation, we multiply
+		 * unsigned numbers first before switching to signed arithmetic
+		 */
+		if (old_multiplier == cv->tsc_multiplier &&
+		    old_shift == cv->tsc_shift) {
+			u64 sbo = old_base, sbn = cv->tsc_measure_base;
+			s64 skew;
+			sbo = mult_precise(sbo, old_multiplier, old_shift);
+			sbn = mult_precise(sbn, old_multiplier, old_shift);
+			skew = cv->tsc_offset - old_offset + (sbo - sbn);
+			if (skew < 0)
+				skew = -skew;
+			if (skew > tsc_skew)
+				tsc_skew = skew;
+		}
+
 		smp_wmb();
 		++cv->tsc_generation;
 		atomic_set(&cv->tsc_synchronized, 1);
@@ -3611,6 +3661,8 @@  static long resync(void *unused)
 	struct cpu_tsc_vars *cv = &__get_cpu_var(cpu_tsc_vars);
 	u64 tsc = 0;
 	int cpu;
+	static unsigned long jif_old;
+	unsigned long jif_delta;
 
 	/*
 	 * First, make sure we are on the right CPU; between when the work got
@@ -3643,17 +3695,28 @@  static long resync(void *unused)
 	cv->tsc_generation++; // XXX needed? */
 	compute_best_multiplier(ref_tsc_khz, cv->tsc_khz, &cv->tsc_multiplier,
 				&cv->tsc_shift);
+	tsc_skew = 0;
 	atomic_set(&cv->tsc_synchronized, 1);
+	smp_wmb();
 
 	for_each_online_cpu(cpu)
 		kvm_do_sync_tsc(cpu);
 
+	for_each_online_cpu(cpu)
+		while (!cpu_is_tsc_synchronized(cpu))
+			cpu_relax();
+
+	smp_rmb();
+	jif_delta = jiffies - jif_old;
+	pr_debug("max TSC skew now estimated at %llu over %lu jiffies\n",
+		 tsc_skew, jif_delta);
+	jif_old = jiffies;
+	mod_timer(&resync_timer, jiffies + HZ * 50);
 	put_cpu();
 	return 0;
 }
 
 static DEFINE_MUTEX(resync_lock);
-
 static void resync_all(void)
 {
 	mutex_lock(&resync_lock);
@@ -3662,6 +3725,18 @@  static void resync_all(void)
 	mutex_unlock(&resync_lock);
 }
 
+static struct work_struct resync_work;
+static void resync_work_fn(struct work_struct *work)
+{
+	resync_all();
+}
+
+static void resync_callout(unsigned long unused)
+{
+	INIT_WORK(&resync_work, resync_work_fn);
+	schedule_work(&resync_work);
+}
+
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 				     void *data)
 {
@@ -3836,6 +3911,15 @@  static void kvm_timer_init(void)
 		for_each_possible_cpu(cpu)
 			per_cpu(cpu_tsc_vars, cpu).tsc_khz = tsc_khz;
 	}
+
+	/*
+	 * Now, pick a CPU to make the master and synchronize all other
+	 * CPUs to it's clock.  Periodically check for drift as well.
+	 * Our initial drift estimate is 1 ppm / sec.
+	 */
+	tsc_drift = ref_tsc_khz / 1000;
+	init_timer(&resync_timer);
+	resync_timer.function = resync_callout;
 	tsc_base_cpu = get_cpu();
 	put_cpu();
 	resync_all();
@@ -3898,6 +3982,9 @@  void kvm_arch_exit(void)
 			pci_write_config_byte(*nb, 0x87, disabled_c1_ramp);
 	}
 #endif
+	mutex_lock(&resync_lock);
+	del_timer(&resync_timer);
+	mutex_unlock(&resync_lock);
 }
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)