@@ -951,6 +951,14 @@ static void __update_vcpu_system_time(struct vcpu *v, int force)
_u.tsc_timestamp = tsc_stamp;
_u.system_time = t->stamp.local_stime;
+ /*
+ * It's expected that domains cope with this bit changing on every
+ * pvclock read to check whether they can resort solely on this tuple
+ * or if it further requires monotonicity checks with other vcpus.
+ */
+ if ( clocksource_is_tsc() )
+ _u.flags |= XEN_PVCLOCK_TSC_STABLE_BIT;
+
if ( is_hvm_domain(d) )
_u.tsc_timestamp += v->arch.hvm_vcpu.cache_tsc_offset;
@@ -1409,6 +1417,22 @@ static void time_calibration_std_rendezvous(void *_r)
time_calibration_rendezvous_tail(r);
}
+/*
+ * Rendezvous function used when clocksource is TSC and
+ * no CPU hotplug will be performed.
+ */
+static void time_calibration_nop_rendezvous(void *rv)
+{
+ const struct calibration_rendezvous *r = rv;
+ struct cpu_time_stamp *c = &this_cpu(cpu_calibration);
+
+ c->local_tsc = r->master_tsc_stamp;
+ c->local_stime = r->master_stime;
+ c->master_stime = r->master_stime;
+
+ raise_softirq(TIME_CALIBRATE_SOFTIRQ);
+}
+
static void (*time_calibration_rendezvous_fn)(void *) =
time_calibration_std_rendezvous;
@@ -1418,6 +1442,13 @@ static void time_calibration(void *unused)
.semaphore = ATOMIC_INIT(0)
};
+ if ( clocksource_is_tsc() )
+ {
+ local_irq_disable();
+ r.master_stime = read_platform_stime(&r.master_tsc_stamp);
+ local_irq_enable();
+ }
+
cpumask_copy(&r.cpu_calibration_map, &cpu_online_map);
/* @wait=1 because we must wait for all cpus before freeing @r. */
@@ -1587,6 +1618,13 @@ static int __init verify_tsc_reliability(void)
*/
on_selected_cpus(&cpu_online_map, reset_percpu_time, NULL, 1);
+ /*
+ * We won't do CPU Hotplug and TSC clocksource is being used which
+ * means we have a reliable TSC, plus we don't sync with any other
+ * clocksource so no need for rendezvous.
+ */
+ time_calibration_rendezvous_fn = time_calibration_nop_rendezvous;
+
/* Finish platform timer switch. */
try_platform_timer_tail();
This patch proposes relying on host TSC synchronization and passthrough to the guest, when running on a TSC-safe platform. On time_calibration we retrieve the platform time in ns and the counter read by the clocksource that was used to compute system time. We introduce a new rendezous function which doesn't require synchronization between master and slave CPUS and just reads calibration_rendezvous struct and writes it down the stime and stamp to the cpu_calibration struct to be used later on. We can guarantee that on a platform with a constant and reliable TSC, that the time read on vcpu B right after A is bigger independently of the VCPU calibration error. Since pvclock time infos are monotonic as seen by any vCPU set PVCLOCK_TSC_STABLE_BIT, which then enables usage of VDSO on Linux. IIUC, this is similar to how it's implemented on KVM. Add also a comment regarding this bit changing and that guests are expected to check this bit on every read. Should note that I've yet to see time going backwards in a long running test for 2 weeks (in a dual socket machine), plus few other tests I did on older platforms, including migration. Signed-off-by: Joao Martins <joao.m.martins@oracle.com> --- Cc: Jan Beulich <jbeulich@suse.com> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Changes since v3: - Do not adjust time_calibration_rendezvous_tail for nop_rendezvous and instead set cpu_time_stamp directly on the rendezvous function. - Move CPU Hotplug checks into patch 2 - Add a commit and code comment regarding guests cope with this bit changing on hosts. - s/host_tsc_is_clocksource/clocksource_is_tsc Changes since v2: - Add XEN_ prefix to pvclock flags. - Adapter time_calibration_rendezvous_tail to have the case of setting master tsc/stime and use it for the nop_rendezvous. - Removed hotplug CPU option that was added in v1 - Prevent online of CPUs when clocksource is tsc. - Remove use_tsc_stable_bit, since clocksource is only used to seed values. So instead we test if hotplug is possible, and prevent clocksource=tsc to be used. - Remove 1st paragrah of commit message since the behaviour described no longer applies since b64438c. Changes since v1: - Change approach to skip std_rendezvous by introducing a nop_rendezvous - Change commit message reflecting the change above. - Use TSC_STABLE_BIT only if cpu hotplug isn't possible. - Add command line option to override it if no cpu hotplug is intended. --- xen/arch/x86/time.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)