diff mbox

[kvm-unit-tests,v2] KVM: x86: add hyperv clock test case

Message ID 1461259285-28472-1-git-send-email-rkagan@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Kagan April 21, 2016, 5:21 p.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

The test checks the relative precision of the reference TSC page
and the time reference counter.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
[Adjust types to pass printf type checks - Roman Kagan]
Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
---
 x86/Makefile.common |   3 +
 x86/Makefile.x86_64 |   1 +
 x86/hyperv.h        |   9 +++
 x86/hyperv_clock.c  | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/unittests.cfg   |   5 ++
 5 files changed, 215 insertions(+)
 create mode 100644 x86/hyperv_clock.c

Comments

Marcelo Tosatti May 23, 2016, 9:55 p.m. UTC | #1
On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> The test checks the relative precision of the reference TSC page
> and the time reference counter.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> [Adjust types to pass printf type checks - Roman Kagan]
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> ---
>  x86/Makefile.common |   3 +
>  x86/Makefile.x86_64 |   1 +
>  x86/hyperv.h        |   9 +++
>  x86/hyperv_clock.c  | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  x86/unittests.cfg   |   5 ++
>  5 files changed, 215 insertions(+)
>  create mode 100644 x86/hyperv_clock.c
> 
> diff --git a/x86/Makefile.common b/x86/Makefile.common
> index ca80367..456e188 100644
> --- a/x86/Makefile.common
> +++ b/x86/Makefile.common
> @@ -123,6 +123,9 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
>  $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
>                                 $(TEST_DIR)/hyperv_stimer.o
>  
> +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
> +                               $(TEST_DIR)/hyperv_clock.o
> +
>  $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o
>  
>  arch_clean:
> diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> index 6b7ccfb..56de0f0 100644
> --- a/x86/Makefile.x86_64
> +++ b/x86/Makefile.x86_64
> @@ -14,5 +14,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
>  tests += $(TEST_DIR)/svm.flat
>  tests += $(TEST_DIR)/vmx.flat
>  tests += $(TEST_DIR)/tscdeadline_latency.flat
> +tests += $(TEST_DIR)/hyperv_clock.flat
>  
>  include $(TEST_DIR)/Makefile.common
> diff --git a/x86/hyperv.h b/x86/hyperv.h
> index faf931b..974df56 100644
> --- a/x86/hyperv.h
> +++ b/x86/hyperv.h
> @@ -12,6 +12,7 @@
>  #define HV_X64_MSR_SYNTIMER_AVAILABLE           (1 << 3)
>  
>  #define HV_X64_MSR_TIME_REF_COUNT               0x40000020
> +#define HV_X64_MSR_REFERENCE_TSC                0x40000021
>  
>  /* Define synthetic interrupt controller model specific registers. */
>  #define HV_X64_MSR_SCONTROL                     0x40000080
> @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi);
>  void synic_sint_set(int vcpu, int sint);
>  void synic_sint_destroy(int vcpu, int sint);
>  
> +struct hv_reference_tsc_page {
> +        uint32_t tsc_sequence;
> +        uint32_t res1;
> +        uint64_t tsc_scale;
> +        int64_t tsc_offset;
> +};
> +
> +
>  #endif
> diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
> new file mode 100644
> index 0000000..a346d99
> --- /dev/null
> +++ b/x86/hyperv_clock.c
> @@ -0,0 +1,197 @@
> +#include "libcflat.h"
> +#include "smp.h"
> +#include "atomic.h"
> +#include "processor.h"
> +#include "hyperv.h"
> +#include "vm.h"
> +
> +#define MAX_CPU 4
> +#define TICKS_PER_SEC (1000000000 / 100)
> +
> +struct hv_reference_tsc_page *tsc_ref;
> +
> +/*
> + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
> + * yielding a 64-bit result.
> + */
> +static inline u64 scale_delta(u64 delta, u64 mul_frac)
> +{
> +	u64 product, unused;
> +
> +	__asm__ (
> +		"mul %3"
> +		: "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) );
> +
> +	return product;
> +}
> +
> +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc)
> +{
> +	return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset;
> +}
> +
> +/*
> + * Reads a consistent set of time-base values from hypervisor,
> + * into a shadow data area.
> + */
> +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow,
> +				    struct hv_reference_tsc_page *page)
> +{
> +	int seq;
> +	do {
> +		seq = page->tsc_sequence;
> +		rmb();		/* fetch version before data */
> +		*shadow = *page;
> +		rmb();		/* test version after fetching data */
> +	} while (shadow->tsc_sequence != seq);
> +}
> +
> +u64 tsc_ref_read(void)
> +{
> +	struct hv_reference_tsc_page shadow;
> +
> +	hvclock_get_time_values(&shadow, tsc_ref);
> +	return hvclock_tsc_to_ticks(&shadow, rdtsc());
> +}
> +
> +atomic_t cpus_left;
> +bool ok[MAX_CPU];
> +u64 loops[MAX_CPU];
> +
> +static void tsc_ref_test(void *data)
> +{
> +	int i = smp_id();
> +	unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> +	unsigned long long end = t + 3 * TICKS_PER_SEC;
> +
> +	ok[i] = true;
> +	do {
> +		u64 now = tsc_ref_read();
> +		if (now < t) {
> +			printf("warp on CPU %d!\n", smp_id());
> +			ok[i] = false;
> +			break;
> +		}
> +		t = now;
> +	} while(t < end);
> +
> +	barrier();

***


> +	if (t >= end) {
> +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> +		if (i == 0)
> +			printf("Time reference MSR drift: %lld\n\n", ref - end);
> +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;

This is prone to fail: guest can be scheduled at "***" above and 
test will fail.

The [-5,5] comes from the standard? What the standard dictates?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 23, 2016, 11:44 p.m. UTC | #2
On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote:
> On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote:
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > The test checks the relative precision of the reference TSC page
> > and the time reference counter.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > [Adjust types to pass printf type checks - Roman Kagan]
> > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> > ---
> >  x86/Makefile.common |   3 +
> >  x86/Makefile.x86_64 |   1 +
> >  x86/hyperv.h        |   9 +++
> >  x86/hyperv_clock.c  | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  x86/unittests.cfg   |   5 ++
> >  5 files changed, 215 insertions(+)
> >  create mode 100644 x86/hyperv_clock.c
> > 
> > diff --git a/x86/Makefile.common b/x86/Makefile.common
> > index ca80367..456e188 100644
> > --- a/x86/Makefile.common
> > +++ b/x86/Makefile.common
> > @@ -123,6 +123,9 @@ $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
> >  $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
> >                                 $(TEST_DIR)/hyperv_stimer.o
> >  
> > +$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
> > +                               $(TEST_DIR)/hyperv_clock.o
> > +
> >  $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o
> >  
> >  arch_clean:
> > diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
> > index 6b7ccfb..56de0f0 100644
> > --- a/x86/Makefile.x86_64
> > +++ b/x86/Makefile.x86_64
> > @@ -14,5 +14,6 @@ tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
> >  tests += $(TEST_DIR)/svm.flat
> >  tests += $(TEST_DIR)/vmx.flat
> >  tests += $(TEST_DIR)/tscdeadline_latency.flat
> > +tests += $(TEST_DIR)/hyperv_clock.flat
> >  
> >  include $(TEST_DIR)/Makefile.common
> > diff --git a/x86/hyperv.h b/x86/hyperv.h
> > index faf931b..974df56 100644
> > --- a/x86/hyperv.h
> > +++ b/x86/hyperv.h
> > @@ -12,6 +12,7 @@
> >  #define HV_X64_MSR_SYNTIMER_AVAILABLE           (1 << 3)
> >  
> >  #define HV_X64_MSR_TIME_REF_COUNT               0x40000020
> > +#define HV_X64_MSR_REFERENCE_TSC                0x40000021
> >  
> >  /* Define synthetic interrupt controller model specific registers. */
> >  #define HV_X64_MSR_SCONTROL                     0x40000080
> > @@ -180,4 +181,12 @@ void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi);
> >  void synic_sint_set(int vcpu, int sint);
> >  void synic_sint_destroy(int vcpu, int sint);
> >  
> > +struct hv_reference_tsc_page {
> > +        uint32_t tsc_sequence;
> > +        uint32_t res1;
> > +        uint64_t tsc_scale;
> > +        int64_t tsc_offset;
> > +};
> > +
> > +
> >  #endif
> > diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
> > new file mode 100644
> > index 0000000..a346d99
> > --- /dev/null
> > +++ b/x86/hyperv_clock.c
> > @@ -0,0 +1,197 @@
> > +#include "libcflat.h"
> > +#include "smp.h"
> > +#include "atomic.h"
> > +#include "processor.h"
> > +#include "hyperv.h"
> > +#include "vm.h"
> > +
> > +#define MAX_CPU 4
> > +#define TICKS_PER_SEC (1000000000 / 100)
> > +
> > +struct hv_reference_tsc_page *tsc_ref;
> > +
> > +/*
> > + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
> > + * yielding a 64-bit result.
> > + */
> > +static inline u64 scale_delta(u64 delta, u64 mul_frac)
> > +{
> > +	u64 product, unused;
> > +
> > +	__asm__ (
> > +		"mul %3"
> > +		: "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) );
> > +
> > +	return product;
> > +}
> > +
> > +static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc)
> > +{
> > +	return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset;
> > +}
> > +
> > +/*
> > + * Reads a consistent set of time-base values from hypervisor,
> > + * into a shadow data area.
> > + */
> > +static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow,
> > +				    struct hv_reference_tsc_page *page)
> > +{
> > +	int seq;
> > +	do {
> > +		seq = page->tsc_sequence;
> > +		rmb();		/* fetch version before data */
> > +		*shadow = *page;
> > +		rmb();		/* test version after fetching data */
> > +	} while (shadow->tsc_sequence != seq);
> > +}
> > +
> > +u64 tsc_ref_read(void)
> > +{
> > +	struct hv_reference_tsc_page shadow;
> > +
> > +	hvclock_get_time_values(&shadow, tsc_ref);
> > +	return hvclock_tsc_to_ticks(&shadow, rdtsc());
> > +}
> > +
> > +atomic_t cpus_left;
> > +bool ok[MAX_CPU];
> > +u64 loops[MAX_CPU];
> > +
> > +static void tsc_ref_test(void *data)
> > +{
> > +	int i = smp_id();
> > +	unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > +	unsigned long long end = t + 3 * TICKS_PER_SEC;
> > +
> > +	ok[i] = true;
> > +	do {
> > +		u64 now = tsc_ref_read();
> > +		if (now < t) {
> > +			printf("warp on CPU %d!\n", smp_id());
> > +			ok[i] = false;
> > +			break;
> > +		}
> > +		t = now;
> > +	} while(t < end);
> > +
> > +	barrier();
> 
> ***
> 
> 
> > +	if (t >= end) {
> > +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > +		if (i == 0)
> > +			printf("Time reference MSR drift: %lld\n\n", ref - end);
> > +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
> 
> This is prone to fail: guest can be scheduled at "***" above and 
> test will fail.
> 
> The [-5,5] comes from the standard? What the standard dictates?




I suppose this would be a better test

        do {
                u64 now_refpage; tsc_ref_read();
                u64 now_refcount;

                now_refpage = tsc_ref_read();
                now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT);

                if (now_refpage > now_refcount) {
                        printf("now_refpage %ld > now_refcount %ld\n", 
                                now_refpage, now_refcount);
                }

which fails once you run 

void main(void)
{
        int ret;
        struct timex tx;
        char *ptr;

        memset((void*)&tx, 0, sizeof(tx));

        tx.freq = -6553600;
        //tx.freq = -237507;
        tx.modes = ADJ_FREQUENCY;
        ret = adjtimex(&tx);


(such failure would crash applications with time backwards).

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 24, 2016, 10:41 a.m. UTC | #3
On 23/05/2016 23:55, Marcelo Tosatti wrote:
>> > +
>> > +	barrier();
> ***
> 
> 
>> > +	if (t >= end) {
>> > +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
>> > +		if (i == 0)
>> > +			printf("Time reference MSR drift: %lld\n\n", ref - end);
>> > +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
> This is prone to fail: guest can be scheduled at "***" above and 
> test will fail.

Indeed.

> The [-5,5] comes from the standard?

No, it simply should be very small because the two are counting the same
time.

Thanks,

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roman Kagan May 24, 2016, 10:57 a.m. UTC | #4
On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote:
> On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote:
> > From: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > The test checks the relative precision of the reference TSC page
> > and the time reference counter.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > [Adjust types to pass printf type checks - Roman Kagan]
> > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> [...]
> > +	if (t >= end) {
> > +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > +		if (i == 0)
> > +			printf("Time reference MSR drift: %lld\n\n", ref - end);
> > +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
> 
> This is prone to fail: guest can be scheduled at "***" above and 
> test will fail.
> 
> The [-5,5] comes from the standard? What the standard dictates?

No standard of course, it's arbitrary.

I tend to think a better test would be to alternate clock reads from the
MSR and the TSC page and check that neither goes ahead of the other.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roman Kagan May 25, 2016, 6:30 p.m. UTC | #5
On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote:
> On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote:
> > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote:
> > > +	if (t >= end) {
> > > +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > > +		if (i == 0)
> > > +			printf("Time reference MSR drift: %lld\n\n", ref - end);
> > > +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
> > 
> > This is prone to fail: guest can be scheduled at "***" above and 
> > test will fail.
> 
> I suppose this would be a better test
> 
>         do {
>                 u64 now_refpage; tsc_ref_read();
>                 u64 now_refcount;
> 
>                 now_refpage = tsc_ref_read();
>                 now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> 
>                 if (now_refpage > now_refcount) {
>                         printf("now_refpage %ld > now_refcount %ld\n", 
>                                 now_refpage, now_refcount);

Yes that's similar to what I posted in my other message on this thread.
I'd also test that now_refpage >= pref_refcount, too.

I'm cooking a patch with these changes, will post soonish.

> which fails once you run 
> 
> void main(void)
> {
>         int ret;
>         struct timex tx;
>         char *ptr;
> 
>         memset((void*)&tx, 0, sizeof(tx));
> 
>         tx.freq = -6553600;
>         //tx.freq = -237507;
>         tx.modes = ADJ_FREQUENCY;
>         ret = adjtimex(&tx);

Right, which is a problem with kvm-clock too (as I wrote in another
thread, pvclock_gtod_data updates don't currently trigger per-VM
masterclock updates).  I'm still struggling through the multiple lengthy
discussions trying to figure out if it's a bug or a feature...

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti May 29, 2016, 10:29 p.m. UTC | #6
On Wed, May 25, 2016 at 09:30:02PM +0300, Roman Kagan wrote:
> On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote:
> > On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote:
> > > On Thu, Apr 21, 2016 at 08:21:25PM +0300, Roman Kagan wrote:
> > > > +	if (t >= end) {
> > > > +		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > > > +		if (i == 0)
> > > > +			printf("Time reference MSR drift: %lld\n\n", ref - end);
> > > > +		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
> > > 
> > > This is prone to fail: guest can be scheduled at "***" above and 
> > > test will fail.
> > 
> > I suppose this would be a better test
> > 
> >         do {
> >                 u64 now_refpage; tsc_ref_read();
> >                 u64 now_refcount;
> > 
> >                 now_refpage = tsc_ref_read();
> >                 now_refcount = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
> > 
> >                 if (now_refpage > now_refcount) {
> >                         printf("now_refpage %ld > now_refcount %ld\n", 
> >                                 now_refpage, now_refcount);
> 
> Yes that's similar to what I posted in my other message on this thread.
> I'd also test that now_refpage >= pref_refcount, too.
> 
> I'm cooking a patch with these changes, will post soonish.
> 
> > which fails once you run 
> > 
> > void main(void)
> > {
> >         int ret;
> >         struct timex tx;
> >         char *ptr;
> > 
> >         memset((void*)&tx, 0, sizeof(tx));
> > 
> >         tx.freq = -6553600;
> >         //tx.freq = -237507;
> >         tx.modes = ADJ_FREQUENCY;
> >         ret = adjtimex(&tx);
> 
> Right, which is a problem with kvm-clock too (as I wrote in another
> thread, pvclock_gtod_data updates don't currently trigger per-VM
> masterclock updates).  I'm still struggling through the multiple lengthy
> discussions trying to figure out if it's a bug or a feature...
> 
> Roman.

Its a bug, i am writing an improvement based on Paolo's change to
update the multiplier. 

But the hyperv tsc reference page patches should not depend on it.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roman Kagan May 30, 2016, 6:09 p.m. UTC | #7
On Sun, May 29, 2016 at 07:29:27PM -0300, Marcelo Tosatti wrote:
> On Wed, May 25, 2016 at 09:30:02PM +0300, Roman Kagan wrote:
> > On Mon, May 23, 2016 at 08:44:03PM -0300, Marcelo Tosatti wrote:
> > > On Mon, May 23, 2016 at 06:55:06PM -0300, Marcelo Tosatti wrote:
> > > which fails once you run 
> > > 
> > > void main(void)
> > > {
> > >         int ret;
> > >         struct timex tx;
> > >         char *ptr;
> > > 
> > >         memset((void*)&tx, 0, sizeof(tx));
> > > 
> > >         tx.freq = -6553600;
> > >         //tx.freq = -237507;
> > >         tx.modes = ADJ_FREQUENCY;
> > >         ret = adjtimex(&tx);
> > 
> > Right, which is a problem with kvm-clock too (as I wrote in another
> > thread, pvclock_gtod_data updates don't currently trigger per-VM
> > masterclock updates).  I'm still struggling through the multiple lengthy
> > discussions trying to figure out if it's a bug or a feature...
> 
> Its a bug, i am writing an improvement based on Paolo's change to
> update the multiplier. 
> 
> But the hyperv tsc reference page patches should not depend on it.

Unfortunately it does: since hyperv tsc reference page doesn't have
seqlock semantics, it's emulated (per Paolo's suggestion) by marking the
contents invalid with seqcount == 0, which makes Windows use MSR-based
clock.  And the two clocks are not expected to diverge.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/x86/Makefile.common b/x86/Makefile.common
index ca80367..456e188 100644
--- a/x86/Makefile.common
+++ b/x86/Makefile.common
@@ -123,6 +123,9 @@  $(TEST_DIR)/hyperv_synic.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
 $(TEST_DIR)/hyperv_stimer.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
                                $(TEST_DIR)/hyperv_stimer.o
 
+$(TEST_DIR)/hyperv_clock.elf: $(cstart.o) $(TEST_DIR)/hyperv.o \
+                               $(TEST_DIR)/hyperv_clock.o
+
 $(TEST_DIR)/setjmp.elf: $(cstart.o) $(TEST_DIR)/setjmp.o
 
 arch_clean:
diff --git a/x86/Makefile.x86_64 b/x86/Makefile.x86_64
index 6b7ccfb..56de0f0 100644
--- a/x86/Makefile.x86_64
+++ b/x86/Makefile.x86_64
@@ -14,5 +14,6 @@  tests = $(TEST_DIR)/access.flat $(TEST_DIR)/apic.flat \
 tests += $(TEST_DIR)/svm.flat
 tests += $(TEST_DIR)/vmx.flat
 tests += $(TEST_DIR)/tscdeadline_latency.flat
+tests += $(TEST_DIR)/hyperv_clock.flat
 
 include $(TEST_DIR)/Makefile.common
diff --git a/x86/hyperv.h b/x86/hyperv.h
index faf931b..974df56 100644
--- a/x86/hyperv.h
+++ b/x86/hyperv.h
@@ -12,6 +12,7 @@ 
 #define HV_X64_MSR_SYNTIMER_AVAILABLE           (1 << 3)
 
 #define HV_X64_MSR_TIME_REF_COUNT               0x40000020
+#define HV_X64_MSR_REFERENCE_TSC                0x40000021
 
 /* Define synthetic interrupt controller model specific registers. */
 #define HV_X64_MSR_SCONTROL                     0x40000080
@@ -180,4 +181,12 @@  void synic_sint_create(int vcpu, int sint, int vec, bool auto_eoi);
 void synic_sint_set(int vcpu, int sint);
 void synic_sint_destroy(int vcpu, int sint);
 
+struct hv_reference_tsc_page {
+        uint32_t tsc_sequence;
+        uint32_t res1;
+        uint64_t tsc_scale;
+        int64_t tsc_offset;
+};
+
+
 #endif
diff --git a/x86/hyperv_clock.c b/x86/hyperv_clock.c
new file mode 100644
index 0000000..a346d99
--- /dev/null
+++ b/x86/hyperv_clock.c
@@ -0,0 +1,197 @@ 
+#include "libcflat.h"
+#include "smp.h"
+#include "atomic.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "vm.h"
+
+#define MAX_CPU 4
+#define TICKS_PER_SEC (1000000000 / 100)
+
+struct hv_reference_tsc_page *tsc_ref;
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u64 mul_frac)
+{
+	u64 product, unused;
+
+	__asm__ (
+		"mul %3"
+		: "=d" (product), "=a" (unused) : "1" (delta), "rm" ((u64)mul_frac) );
+
+	return product;
+}
+
+static u64 hvclock_tsc_to_ticks(struct hv_reference_tsc_page *shadow, u64 tsc)
+{
+	return scale_delta(tsc, shadow->tsc_scale) + shadow->tsc_offset;
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static void hvclock_get_time_values(struct hv_reference_tsc_page *shadow,
+				    struct hv_reference_tsc_page *page)
+{
+	int seq;
+	do {
+		seq = page->tsc_sequence;
+		rmb();		/* fetch version before data */
+		*shadow = *page;
+		rmb();		/* test version after fetching data */
+	} while (shadow->tsc_sequence != seq);
+}
+
+u64 tsc_ref_read(void)
+{
+	struct hv_reference_tsc_page shadow;
+
+	hvclock_get_time_values(&shadow, tsc_ref);
+	return hvclock_tsc_to_ticks(&shadow, rdtsc());
+}
+
+atomic_t cpus_left;
+bool ok[MAX_CPU];
+u64 loops[MAX_CPU];
+
+static void tsc_ref_test(void *data)
+{
+	int i = smp_id();
+	unsigned long long t = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	unsigned long long end = t + 3 * TICKS_PER_SEC;
+
+	ok[i] = true;
+	do {
+		u64 now = tsc_ref_read();
+		if (now < t) {
+			printf("warp on CPU %d!\n", smp_id());
+			ok[i] = false;
+			break;
+		}
+		t = now;
+	} while(t < end);
+
+	barrier();
+	if (t >= end) {
+		long long ref = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+		if (i == 0)
+			printf("Time reference MSR drift: %lld\n\n", ref - end);
+		ok[i] &= (ref - end) > -5 && (ref - end) < 5;
+	}
+
+	atomic_dec(&cpus_left);
+}
+
+static void check_test(int ncpus)
+{
+	int i;
+	bool pass;
+
+	atomic_set(&cpus_left, ncpus);
+	for (i = ncpus - 1; i >= 0; i--)
+		on_cpu_async(i, tsc_ref_test, NULL);
+
+	/* Wait for the end of other vcpu */
+	while(atomic_read(&cpus_left))
+		;
+
+	pass = true;
+	for (i = ncpus - 1; i >= 0; i--)
+		pass &= ok[i];
+
+	report("TSC reference precision test", pass);
+}
+
+static void hv_perf_test(void *data)
+{
+	u64 t = tsc_ref_read();
+	u64 end = t + TICKS_PER_SEC;
+	u64 local_loops = 0;
+
+	do {
+		t = tsc_ref_read();
+		local_loops++;
+	} while(t < end);
+
+	loops[smp_id()] = local_loops;
+	atomic_dec(&cpus_left);
+}
+
+static void perf_test(int ncpus)
+{
+	int i;
+	unsigned long long total_loops;
+
+	atomic_set(&cpus_left, ncpus);
+	for (i = ncpus - 1; i >= 0; i--)
+		on_cpu_async(i, hv_perf_test, NULL);
+
+	/* Wait for the end of other vcpu */
+	while(atomic_read(&cpus_left))
+		;
+
+	total_loops = 0;
+	for (i = ncpus - 1; i >= 0; i--)
+		total_loops += loops[i];
+	printf("iterations/sec:  %lld\n", total_loops / ncpus);
+}
+
+int main(int ac, char **av)
+{
+	int nerr = 0;
+	int ncpus;
+	struct hv_reference_tsc_page shadow;
+	unsigned long long tsc1, t1, tsc2, t2;
+	unsigned long long ref1, ref2;
+
+	setup_vm();
+	smp_init();
+
+	tsc_ref = alloc_page();
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)tsc_ref | 1);
+	report("MSR value after enabling",
+	       rdmsr(HV_X64_MSR_REFERENCE_TSC) == ((u64)(uintptr_t)tsc_ref | 1));
+
+	hvclock_get_time_values(&shadow, tsc_ref);
+	if (shadow.tsc_sequence == 0 || shadow.tsc_sequence == 0xFFFFFFFF) {
+		printf("Reference TSC page not available\n");
+		exit(1);
+	}
+
+	printf("scale: %llx offset: %lld\n",
+	       (unsigned long long) shadow.tsc_scale,
+	       (long long) shadow.tsc_offset);
+	ref1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	tsc1 = rdtsc();
+	t1 = hvclock_tsc_to_ticks(&shadow, tsc1);
+	printf("refcnt %lld, TSC %llx, TSC reference %lld\n",
+	       ref1, tsc1, t1);
+
+	do
+		ref2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+	while (ref2 < ref1 + 2 * TICKS_PER_SEC);
+
+	tsc2 = rdtsc();
+	t2 = hvclock_tsc_to_ticks(&shadow, tsc2);
+	printf("refcnt %lld (delta %lld), TSC %llx, TSC reference %lld (delta %lld)\n",
+	       ref2, ref2 - ref1, tsc2, t2, t2 - t1);
+
+	/* re-sync scale and offset to make drift measurement more accurate */
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, (u64)(uintptr_t)tsc_ref | 1);
+
+	ncpus = cpu_count();
+	if (ncpus > MAX_CPU)
+		ncpus = MAX_CPU;
+
+	check_test(ncpus);
+	perf_test(ncpus);
+
+	wrmsr(HV_X64_MSR_REFERENCE_TSC, 0LL);
+	report("MSR value after disabling", rdmsr(HV_X64_MSR_REFERENCE_TSC) == 0);
+
+	return nerr > 0 ? 1 : 0;
+}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index fcee6f9..417d12f 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -203,3 +203,8 @@  extra_params = -cpu kvm64,hv_synic -device hyperv-testdev
 file = hyperv_stimer.flat
 smp = 2
 extra_params = -cpu kvm64,hv_time,hv_synic,hv_stimer -device hyperv-testdev
+
+[hyperv_clock]
+file = hyperv_clock.flat
+smp = 2
+extra_params = -cpu kvm64,hv_time