Message ID | 1461259285-28472-1-git-send-email-rkagan@virtuozzo.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote: > From: Paolo Bonzini <pbonzini@redhat.com> > > The test checks the relative precision of the reference TSC page > and the time reference counter. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > [Adjust types to pass printf type checks - Roman Kagan] > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > --- > x86/Makefile.common | 3 + > x86/Makefile.x86_64 | 1 + > x86/hyperv.h | 9 +++ > x86/hyperv_clock.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > x86/unittests.cfg | 5 ++ > 5 files changed, 215 insertions(+) > create mode 100644 x86/hyperv_clock.c > > diff --git a/x86/Makefile.common b/x86/Makefile.common > index ca80367..456e188 100644 > --- a/x86/Makefile.common > +++ b/x86/Makefile.common > @@ -123,6 +123,9 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > $(TEST_DIR)/hyperv_stimer.o > > +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > + $(TEST_DIR)/hyperv_clock.o > + > $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o > > arch_clean: > diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64 > index 6b7ccfb..56de0f0 100644 > --- a/x86/Makefile.x86_64 > +++ b/x86/Makefile.x86_64 > @@ -14,5 +14,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > tests += $(TEST_DIR)/svm.flat > tests += $(TEST_DIR)/vmx.flat > tests += $(TEST_DIR)/tscdeadline_latency.flat > +tests += $(TEST_DIR)/hyperv_clock.flat > > include $(TEST_DIR)/Makefile.common > diff --git a/x86/hyperv.h b/x86/hyperv.h > index faf931b..974df56 100644 > --- a/x86/hyperv.h > +++ b/x86/hyperv.h > @@ -12,6 +12,7 @@ > #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) > > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > /* Define synthetic interrupt controller model specific registers. */ > #define HV_X64_MSR_SCONTROL 0x40000080 > @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi); > void synic_sint_set(int vcpu, int sint); > void synic_sint_destroy(int vcpu, int sint); > > +struct hv_reference_tsc_page { > + uint32_t tsc_sequence; > + uint32_t res1; > + uint64_t tsc_scale; > + int64_t tsc_offset; > +}; > + > + > #endif > diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c > new file mode 100644 > index 0000000..a346d99 > --- /dev/null > +++ b/x86/hyperv_clock.c > @@ -0,0 +1,197 @@ > +#include "libcflat.h" > +#include "smp.h" > +#include "atomic.h" > +#include "processor.h" > +#include "hyperv.h" > +#include "vm.h" > + > +#define MAX_CPU 4 > +#define TICKS_PER_SEC (1000000000 / 100) > + > +struct hv_reference_tsc_page *tsc_ref; > + > +/* > + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, > + * yielding a 64-bit result. > + */ > +static inline u64 scale_delta(u64 delta, u64 mul_frac) > +{ > + u64 product, unused; > + > + __asm__ ( > + "mul %3" > + : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) ); > + > + return product; > +} > + > +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc) > +{ > + return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset; > +} > + > +/* > + * Reads a consistent set of time-base values from hypervisor, > + * into a shadow data area. > + */ > +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow, > + struct hv_reference_tsc_page *page) > +{ > + int seq; > + do { > + seq = page->tsc_sequence; > + rmb(); /* fetch version before data */ > + *shadow = *page; > + rmb(); /* test version after fetching data */ > + } while (shadow->tsc_sequence != seq); > +} > + > +u64 tsc_ref_read(void) > +{ > + struct hv_reference_tsc_page shadow; > + > + hvclock_get_time_values(&shadow, tsc_ref); > + return hvclock_tsc_to_ticks(&shadow, rdtsc()); > +} > + > +atomic_t cpus_left; > +bool ok[MAX_CPU]; > +u64 loops[MAX_CPU]; > + > +static void tsc_ref_test(void *data) > +{ > + int i = smp_id(); > + unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > + unsigned long long end = t + 3 * TICKS_PER_SEC; > + > + ok[i] = true; > + do { > + u64 now = tsc_ref_read(); > + if (now < t) { > + printf("warp on CPU %d!\n", smp_id()); > + ok[i] = false; > + break; > + } > + t = now; > + } while(t < end); > + > + barrier(); *** > + if (t >= end) { > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > + if (i == 0) > + printf("Time reference MSR drift: %lld\n\n", ref - end); > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; This is prone to fail: guest can be scheduled at "***" above and test will fail. The [-5,5] comes from the standard? What the standard dictates? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote: > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote: > > From: Paolo Bonzini <pbonzini@redhat.com> > > > > The test checks the relative precision of the reference TSC page > > and the time reference counter. > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > [Adjust types to pass printf type checks - Roman Kagan] > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > > --- > > x86/Makefile.common | 3 + > > x86/Makefile.x86_64 | 1 + > > x86/hyperv.h | 9 +++ > > x86/hyperv_clock.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > > x86/unittests.cfg | 5 ++ > > 5 files changed, 215 insertions(+) > > create mode 100644 x86/hyperv_clock.c > > > > diff --git a/x86/Makefile.common b/x86/Makefile.common > > index ca80367..456e188 100644 > > --- a/x86/Makefile.common > > +++ b/x86/Makefile.common > > @@ -123,6 +123,9 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > > $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > > $(TEST_DIR)/hyperv_stimer.o > > > > +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ > > + $(TEST_DIR)/hyperv_clock.o > > + > > $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o > > > > arch_clean: > > diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64 > > index 6b7ccfb..56de0f0 100644 > > --- a/x86/Makefile.x86_64 > > +++ b/x86/Makefile.x86_64 > > @@ -14,5 +14,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ > > tests += $(TEST_DIR)/svm.flat > > tests += $(TEST_DIR)/vmx.flat > > tests += $(TEST_DIR)/tscdeadline_latency.flat > > +tests += $(TEST_DIR)/hyperv_clock.flat > > > > include $(TEST_DIR)/Makefile.common > > diff --git a/x86/hyperv.h b/x86/hyperv.h > > index faf931b..974df56 100644 > > --- a/x86/hyperv.h > > +++ b/x86/hyperv.h > > @@ -12,6 +12,7 @@ > > #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) > > > > #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 > > +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 > > > > /* Define synthetic interrupt controller model specific registers. */ > > #define HV_X64_MSR_SCONTROL 0x40000080 > > @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi); > > void synic_sint_set(int vcpu, int sint); > > void synic_sint_destroy(int vcpu, int sint); > > > > +struct hv_reference_tsc_page { > > + uint32_t tsc_sequence; > > + uint32_t res1; > > + uint64_t tsc_scale; > > + int64_t tsc_offset; > > +}; > > + > > + > > #endif > > diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c > > new file mode 100644 > > index 0000000..a346d99 > > --- /dev/null > > +++ b/x86/hyperv_clock.c > > @@ -0,0 +1,197 @@ > > +#include "libcflat.h" > > +#include "smp.h" > > +#include "atomic.h" > > +#include "processor.h" > > +#include "hyperv.h" > > +#include "vm.h" > > + > > +#define MAX_CPU 4 > > +#define TICKS_PER_SEC (1000000000 / 100) > > + > > +struct hv_reference_tsc_page *tsc_ref; > > + > > +/* > > + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, > > + * yielding a 64-bit result. > > + */ > > +static inline u64 scale_delta(u64 delta, u64 mul_frac) > > +{ > > + u64 product, unused; > > + > > + __asm__ ( > > + "mul %3" > > + : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) ); > > + > > + return product; > > +} > > + > > +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc) > > +{ > > + return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset; > > +} > > + > > +/* > > + * Reads a consistent set of time-base values from hypervisor, > > + * into a shadow data area. > > + */ > > +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow, > > + struct hv_reference_tsc_page *page) > > +{ > > + int seq; > > + do { > > + seq = page->tsc_sequence; > > + rmb(); /* fetch version before data */ > > + *shadow = *page; > > + rmb(); /* test version after fetching data */ > > + } while (shadow->tsc_sequence != seq); > > +} > > + > > +u64 tsc_ref_read(void) > > +{ > > + struct hv_reference_tsc_page shadow; > > + > > + hvclock_get_time_values(&shadow, tsc_ref); > > + return hvclock_tsc_to_ticks(&shadow, rdtsc()); > > +} > > + > > +atomic_t cpus_left; > > +bool ok[MAX_CPU]; > > +u64 loops[MAX_CPU]; > > + > > +static void tsc_ref_test(void *data) > > +{ > > + int i = smp_id(); > > + unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > + unsigned long long end = t + 3 * TICKS_PER_SEC; > > + > > + ok[i] = true; > > + do { > > + u64 now = tsc_ref_read(); > > + if (now < t) { > > + printf("warp on CPU %d!\n", smp_id()); > > + ok[i] = false; > > + break; > > + } > > + t = now; > > + } while(t < end); > > + > > + barrier(); > > *** > > > > + if (t >= end) { > > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > + if (i == 0) > > + printf("Time reference MSR drift: %lld\n\n", ref - end); > > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; > > This is prone to fail: guest can be scheduled at "***" above and > test will fail. > > The [-5,5] comes from the standard? What the standard dictates? I suppose this would be a better test do { u64 now_refpage; tsc_ref_read(); u64 now_refcount; now_refpage = tsc_ref_read(); now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT); if (now_refpage > now_refcount) { printf("now_refpage %ld > now_refcount %ld\n", now_refpage, now_refcount); } which fails once you run void main(void) { int ret; struct timex tx; char *ptr; memset((void*)&tx, 0, sizeof(tx)); tx.freq = -6553600; //tx.freq = -237507; tx.modes = ADJ_FREQUENCY; ret = adjtimex(&tx); (such failure would crash applications with time backwards). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 23/05/2016 23:55, Marcelo Tosatti wrote: >> > + >> > + barrier(); > *** > > >> > + if (t >= end) { >> > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); >> > + if (i == 0) >> > + printf("Time reference MSR drift: %lld\n\n", ref - end); >> > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; > This is prone to fail: guest can be scheduled at "***" above and > test will fail. Indeed. > The [-5,5] comes from the standard? No, it simply should be very small because the two are counting the same time. Thanks, Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote: > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote: > > From: Paolo Bonzini <pbonzini@redhat.com> > > > > The test checks the relative precision of the reference TSC page > > and the time reference counter. > > > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > [Adjust types to pass printf type checks - Roman Kagan] > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > [...] > > + if (t >= end) { > > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > + if (i == 0) > > + printf("Time reference MSR drift: %lld\n\n", ref - end); > > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; > > This is prone to fail: guest can be scheduled at "***" above and > test will fail. > > The [-5,5] comes from the standard? What the standard dictates? No standard of course, it's arbitrary. I tend to think a better test would be to alternate clock reads from the MSR and the TSC page and check that neither goes ahead of the other. Roman. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote: > On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote: > > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote: > > > + if (t >= end) { > > > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > > + if (i == 0) > > > + printf("Time reference MSR drift: %lld\n\n", ref - end); > > > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; > > > > This is prone to fail: guest can be scheduled at "***" above and > > test will fail. > > I suppose this would be a better test > > do { > u64 now_refpage; tsc_ref_read(); > u64 now_refcount; > > now_refpage = tsc_ref_read(); > now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > if (now_refpage > now_refcount) { > printf("now_refpage %ld > now_refcount %ld\n", > now_refpage, now_refcount); Yes that's similar to what I posted in my other message on this thread. I'd also test that now_refpage >= pref_refcount, too. I'm cooking a patch with these changes, will post soonish. > which fails once you run > > void main(void) > { > int ret; > struct timex tx; > char *ptr; > > memset((void*)&tx, 0, sizeof(tx)); > > tx.freq = -6553600; > //tx.freq = -237507; > tx.modes = ADJ_FREQUENCY; > ret = adjtimex(&tx); Right, which is a problem with kvm-clock too (as I wrote in another thread, pvclock_gtod_data updates don't currently trigger per-VM masterclock updates). I'm still struggling through the multiple lengthy discussions trying to figure out if it's a bug or a feature... Roman. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 25, 2016 at 09:30:02PM +0300, Roman Kagan wrote: > On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote: > > On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote: > > > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote: > > > > + if (t >= end) { > > > > + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > > > + if (i == 0) > > > > + printf("Time reference MSR drift: %lld\n\n", ref - end); > > > > + ok[i] &= (ref - end) > -5 && (ref - end) < 5; > > > > > > This is prone to fail: guest can be scheduled at "***" above and > > > test will fail. > > > > I suppose this would be a better test > > > > do { > > u64 now_refpage; tsc_ref_read(); > > u64 now_refcount; > > > > now_refpage = tsc_ref_read(); > > now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT); > > > > if (now_refpage > now_refcount) { > > printf("now_refpage %ld > now_refcount %ld\n", > > now_refpage, now_refcount); > > Yes that's similar to what I posted in my other message on this thread. > I'd also test that now_refpage >= pref_refcount, too. > > I'm cooking a patch with these changes, will post soonish. > > > which fails once you run > > > > void main(void) > > { > > int ret; > > struct timex tx; > > char *ptr; > > > > memset((void*)&tx, 0, sizeof(tx)); > > > > tx.freq = -6553600; > > //tx.freq = -237507; > > tx.modes = ADJ_FREQUENCY; > > ret = adjtimex(&tx); > > Right, which is a problem with kvm-clock too (as I wrote in another > thread, pvclock_gtod_data updates don't currently trigger per-VM > masterclock updates). I'm still struggling through the multiple lengthy > discussions trying to figure out if it's a bug or a feature... > > Roman. Its a bug, i am writing an improvement based on Paolo's change to update the multiplier. But the hyperv tsc reference page patches should not depend on it. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, May 29, 2016 at 07:29:27PM -0300, Marcelo Tosatti wrote: > On Wed, May 25, 2016 at 09:30:02PM +0300, Roman Kagan wrote: > > On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote: > > > On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote: > > > which fails once you run > > > > > > void main(void) > > > { > > > int ret; > > > struct timex tx; > > > char *ptr; > > > > > > memset((void*)&tx, 0, sizeof(tx)); > > > > > > tx.freq = -6553600; > > > //tx.freq = -237507; > > > tx.modes = ADJ_FREQUENCY; > > > ret = adjtimex(&tx); > > > > Right, which is a problem with kvm-clock too (as I wrote in another > > thread, pvclock_gtod_data updates don't currently trigger per-VM > > masterclock updates). I'm still struggling through the multiple lengthy > > discussions trying to figure out if it's a bug or a feature... > > Its a bug, i am writing an improvement based on Paolo's change to > update the multiplier. > > But the hyperv tsc reference page patches should not depend on it. Unfortunately it does: since hyperv tsc reference page doesn't have seqlock semantics, it's emulated (per Paolo's suggestion) by marking the contents invalid with seqcount == 0, which makes Windows use MSR-based clock. And the two clocks are not expected to diverge. Roman. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/x86/Makefile.common b/x86/Makefile.common index ca80367..456e188 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -123,6 +123,9 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ $(TEST_DIR)/hyperv_stimer.o +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \ + $(TEST_DIR)/hyperv_clock.o + $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o arch_clean: diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64 index 6b7ccfb..56de0f0 100644 --- a/x86/Makefile.x86_64 +++ b/x86/Makefile.x86_64 @@ -14,5 +14,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \ tests += $(TEST_DIR)/svm.flat tests += $(TEST_DIR)/vmx.flat tests += $(TEST_DIR)/tscdeadline_latency.flat +tests += $(TEST_DIR)/hyperv_clock.flat include $(TEST_DIR)/Makefile.common diff --git a/x86/hyperv.h b/x86/hyperv.h index faf931b..974df56 100644 --- a/x86/hyperv.h +++ b/x86/hyperv.h @@ -12,6 +12,7 @@ #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 /* Define synthetic interrupt controller model specific registers. */ #define HV_X64_MSR_SCONTROL 0x40000080 @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi); void synic_sint_set(int vcpu, int sint); void synic_sint_destroy(int vcpu, int sint); +struct hv_reference_tsc_page { + uint32_t tsc_sequence; + uint32_t res1; + uint64_t tsc_scale; + int64_t tsc_offset; +}; + + #endif diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c new file mode 100644 index 0000000..a346d99 --- /dev/null +++ b/x86/hyperv_clock.c @@ -0,0 +1,197 @@ +#include "libcflat.h" +#include "smp.h" +#include "atomic.h" +#include "processor.h" +#include "hyperv.h" +#include "vm.h" + +#define MAX_CPU 4 +#define TICKS_PER_SEC (1000000000 / 100) + +struct hv_reference_tsc_page *tsc_ref; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u64 mul_frac) +{ + u64 product, unused; + + __asm__ ( + "mul %3" + : "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) ); + + return product; +} + +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc) +{ + return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset; +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow, + struct hv_reference_tsc_page *page) +{ + int seq; + do { + seq = page->tsc_sequence; + rmb(); /* fetch version before data */ + *shadow = *page; + rmb(); /* test version after fetching data */ + } while (shadow->tsc_sequence != seq); +} + +u64 tsc_ref_read(void) +{ + struct hv_reference_tsc_page shadow; + + hvclock_get_time_values(&shadow, tsc_ref); + return hvclock_tsc_to_ticks(&shadow, rdtsc()); +} + +atomic_t cpus_left; +bool ok[MAX_CPU]; +u64 loops[MAX_CPU]; + +static void tsc_ref_test(void *data) +{ + int i = smp_id(); + unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + unsigned long long end = t + 3 * TICKS_PER_SEC; + + ok[i] = true; + do { + u64 now = tsc_ref_read(); + if (now < t) { + printf("warp on CPU %d!\n", smp_id()); + ok[i] = false; + break; + } + t = now; + } while(t < end); + + barrier(); + if (t >= end) { + long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + if (i == 0) + printf("Time reference MSR drift: %lld\n\n", ref - end); + ok[i] &= (ref - end) > -5 && (ref - end) < 5; + } + + atomic_dec(&cpus_left); +} + +static void check_test(int ncpus) +{ + int i; + bool pass; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, tsc_ref_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + pass = true; + for (i = ncpus - 1; i >= 0; i--) + pass &= ok[i]; + + report("TSC reference precision test", pass); +} + +static void hv_perf_test(void *data) +{ + u64 t = tsc_ref_read(); + u64 end = t + TICKS_PER_SEC; + u64 local_loops = 0; + + do { + t = tsc_ref_read(); + local_loops++; + } while(t < end); + + loops[smp_id()] = local_loops; + atomic_dec(&cpus_left); +} + +static void perf_test(int ncpus) +{ + int i; + unsigned long long total_loops; + + atomic_set(&cpus_left, ncpus); + for (i = ncpus - 1; i >= 0; i--) + on_cpu_async(i, hv_perf_test, NULL); + + /* Wait for the end of other vcpu */ + while(atomic_read(&cpus_left)) + ; + + total_loops = 0; + for (i = ncpus - 1; i >= 0; i--) + total_loops += loops[i]; + printf("iterations/sec: %lld\n", total_loops / ncpus); +} + +int main(int ac, char **av) +{ + int nerr = 0; + int ncpus; + struct hv_reference_tsc_page shadow; + unsigned long long tsc1, t1, tsc2, t2; + unsigned long long ref1, ref2; + + setup_vm(); + smp_init(); + + tsc_ref = alloc_page(); + wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)tsc_ref | 1); + report("MSR value after enabling", + rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)tsc_ref | 1)); + + hvclock_get_time_values(&shadow, tsc_ref); + if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) { + printf("Reference TSC page not available\n"); + exit(1); + } + + printf("scale: %llx offset: %lld\n", + (unsigned long long) shadow.tsc_scale, + (long long) shadow.tsc_offset); + ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + tsc1 = rdtsc(); + t1 = hvclock_tsc_to_ticks(&shadow, tsc1); + printf("refcnt %lld, TSC %llx, TSC reference %lld\n", + ref1, tsc1, t1); + + do + ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + while (ref2 < ref1 + 2 * TICKS_PER_SEC); + + tsc2 = rdtsc(); + t2 = hvclock_tsc_to_ticks(&shadow, tsc2); + printf("refcnt %lld (delta %lld), TSC %llx, TSC reference %lld (delta %lld)\n", + ref2, ref2 - ref1, tsc2, t2, t2 - t1); + + /* re-sync scale and offset to make drift measurement more accurate */ + wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)tsc_ref | 1); + + ncpus = cpu_count(); + if (ncpus > MAX_CPU) + ncpus = MAX_CPU; + + check_test(ncpus); + perf_test(ncpus); + + wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL); + report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0); + + return nerr > 0 ? 1 : 0; +} diff --git a/x86/unittests.cfg b/x86/unittests.cfg index fcee6f9..417d12f 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -203,3 +203,8 @@ extra_params = -cpu kvm64,hv_synic -device hyperv-testdev file = hyperv_stimer.flat smp = 2 extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev + +[hyperv_clock] +file = hyperv_clock.flat +smp = 2 +extra_params = -cpu kvm64,hv_time