Message ID | 20191218144233.15372-7-liuwe@microsoft.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implement Hyper-V reference TSC based clock source | expand |
> -----Original Message----- > From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu > Sent: 18 December 2019 14:43 > To: Xen Development List <xen-devel@lists.xenproject.org> > Cc: Michael Kelley <mikelley@microsoft.com>; Durrant, Paul > <pdurrant@amazon.com>; Wei Liu <liuwe@microsoft.com>; Jan Beulich > <jbeulich@suse.com>; Andrew Cooper <andrew.cooper3@citrix.com>; Wei Liu > <wl@xen.org>; Roger Pau Monné <roger.pau@citrix.com> > Subject: [PATCH v2 6/6] x86: implement Hyper-V clock source > > Implement a clock source using Hyper-V's reference TSC page. > > Signed-off-by: Wei Liu <liuwe@microsoft.com> > --- > v2: > 1. Address Jan's comments. > > Relevant spec: > > https://github.com/MicrosoftDocs/Virtualization- > Documentation/raw/live/tlfs/Hypervisor%20Top%20Level%20Functional%20Specif > ication%20v5.0C.pdf > > Section 12.6. > --- > xen/arch/x86/time.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 101 insertions(+) > > diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c > index 216169a025..8b96b2e9a5 100644 > --- a/xen/arch/x86/time.c > +++ b/xen/arch/x86/time.c > @@ -31,6 +31,7 @@ > #include <asm/processor.h> > #include <asm/fixmap.h> > #include <asm/guest.h> > +#include <asm/guest/hyperv-tlfs.h> > #include <asm/mc146818rtc.h> > #include <asm/div64.h> > #include <asm/acpi.h> > @@ -644,6 +645,103 @@ static struct platform_timesource __initdata > plt_xen_timer = > }; > #endif > > +#ifdef CONFIG_HYPERV_GUEST > +/************************************************************ > + * HYPER-V REFERENCE TSC > + */ > + > +static struct ms_hyperv_tsc_page *hyperv_tsc; > +static struct page_info *hyperv_tsc_page; > + > +static int64_t __init init_hyperv_timer(struct platform_timesource *pts) > +{ > + paddr_t maddr; > + uint64_t tsc_msr, freq; > + > + if ( !(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) ) > + return 0; > + > + hyperv_tsc_page = alloc_domheap_page(NULL, 0); > + if ( !hyperv_tsc_page ) > + return 0; > + > + hyperv_tsc = __map_domain_page_global(hyperv_tsc_page); > + if ( !hyperv_tsc ) > + { > + free_domheap_page(hyperv_tsc_page); > + hyperv_tsc_page = NULL; > + return 0; > + } > + > + maddr = page_to_maddr(hyperv_tsc_page); > + > + /* > + * Per Hyper-V TLFS: > + * 1. Read existing MSR value > + * 2. Preserve bits [11:1] > + * 3. Set bits [63:12] to be guest physical address of tsc page > + * 4. Set enabled bit (0) > + * 5. Write back new MSR value > + */ > + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > + tsc_msr &= 0xffeULL; > + tsc_msr |= maddr | 1 /* enabled */; > + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > + You need to check for the HV_X64_ACCESS_FREQUENCY_MSRS feature or you risk a #GP below I think. > + /* Get TSC frequency from Hyper-V */ > + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); > + pts->frequency = freq; > + > + return freq; > +} > + > +static inline uint64_t read_hyperv_timer(void) > +{ > + uint64_t scale, offset, ret, tsc; > + uint32_t seq; > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > + > + do { > + seq = tsc_page->tsc_sequence; > + > + /* Seq 0 is special. It means the TSC enlightenment is not > + * available at the moment. The reference time can only be > + * obtained from the Reference Counter MSR. > + */ > + if ( seq == 0 ) Older versions of the spec used to use 0xFFFFFFFF I think, although when I look again they seem to have been retro-actively fixed. In any case I think you should treat both 0xFFFFFFFF and 0 as invalid. > + { > + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, ret); > + return ret; > + } > + > + /* rdtsc_ordered already contains a load fence */ > + tsc = rdtsc_ordered(); > + scale = tsc_page->tsc_scale; > + offset = tsc_page->tsc_offset; > + > + smp_rmb(); > + > + } while (tsc_page->tsc_sequence != seq); > + > + /* ret = ((tsc * scale) >> 64) + offset; */ > + asm ( "mul %[scale]; add %[offset], %[ret]" > + : "+a" (tsc), [ret] "=d" (ret) > + : [scale] "rm" (scale), [offset] "rm" (offset) ); > + It would be nice to common this up with scale_tsc() in viridian/time.c. Paul > + return ret; > +} > + > +static struct platform_timesource __initdata plt_hyperv_timer = > +{ > + .id = "hyperv", > + .name = "HYPER-V REFERENCE TSC", > + .read_counter = read_hyperv_timer, > + .init = init_hyperv_timer, > + /* See TSC time source for why counter_bits is set to 63 */ > + .counter_bits = 63, > +}; > +#endif > + > /************************************************************ > * GENERIC PLATFORM TIMER INFRASTRUCTURE > */ > @@ -793,6 +891,9 @@ static u64 __init init_platform_timer(void) > static struct platform_timesource * __initdata plt_timers[] = { > #ifdef CONFIG_XEN_GUEST > &plt_xen_timer, > +#endif > +#ifdef CONFIG_HYPERV_GUEST > + &plt_hyperv_timer, > #endif > &plt_hpet, &plt_pmtimer, &plt_pit > }; > -- > 2.20.1
From: Durrant, Paul <pdurrant@amazon.com> Sent: Wednesday, December 18, 2019 7:24 AM > > From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu > > Sent: 18 December 2019 14:43 [snip] > > + > > +static inline uint64_t read_hyperv_timer(void) > > +{ > > + uint64_t scale, offset, ret, tsc; > > + uint32_t seq; > > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > > + > > + do { > > + seq = tsc_page->tsc_sequence; > > + > > + /* Seq 0 is special. It means the TSC enlightenment is not > > + * available at the moment. The reference time can only be > > + * obtained from the Reference Counter MSR. > > + */ > > + if ( seq == 0 ) > > Older versions of the spec used to use 0xFFFFFFFF I think, although when I look again they > seem to have been retro-actively fixed. In any case I think you should treat both > 0xFFFFFFFF and 0 as invalid. FWIW, the 0xFFFFFFFF was just a bug in the spec. Hyper-V implementations only set the value to 0 to indicate invalid. The equivalent Linux code checks only for 0. Michael
On Wed, 18 Dec 2019 at 20:24, Michael Kelley <mikelley@microsoft.com> wrote: > > From: Durrant, Paul <pdurrant@amazon.com> Sent: Wednesday, December 18, 2019 7:24 AM > > > > From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu > > > Sent: 18 December 2019 14:43 > > [snip] > > > > + > > > +static inline uint64_t read_hyperv_timer(void) > > > +{ > > > + uint64_t scale, offset, ret, tsc; > > > + uint32_t seq; > > > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > > > + > > > + do { > > > + seq = tsc_page->tsc_sequence; > > > + > > > + /* Seq 0 is special. It means the TSC enlightenment is not > > > + * available at the moment. The reference time can only be > > > + * obtained from the Reference Counter MSR. > > > + */ > > > + if ( seq == 0 ) > > > > Older versions of the spec used to use 0xFFFFFFFF I think, although when I look again they > > seem to have been retro-actively fixed. In any case I think you should treat both > > 0xFFFFFFFF and 0 as invalid. > > FWIW, the 0xFFFFFFFF was just a bug in the spec. Hyper-V implementations only > set the value to 0 to indicate invalid. The equivalent Linux code checks only for 0. > Thanks for chiming in, Michael. In that case I will submit a fix to change Xen's viridian code to remove the wrong value there. Wei. > Michael
> -----Original Message----- > From: Wei Liu <wl@xen.org> > Sent: 18 December 2019 22:21 > To: Michael Kelley <mikelley@microsoft.com> > Cc: Durrant, Paul <pdurrant@amazon.com>; Wei Liu <wl@xen.org>; Xen > Development List <xen-devel@lists.xenproject.org>; Wei Liu > <liuwe@microsoft.com>; Jan Beulich <jbeulich@suse.com>; Andrew Cooper > <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com> > Subject: Re: [PATCH v2 6/6] x86: implement Hyper-V clock source > > On Wed, 18 Dec 2019 at 20:24, Michael Kelley <mikelley@microsoft.com> > wrote: > > > > From: Durrant, Paul <pdurrant@amazon.com> Sent: Wednesday, December 18, > 2019 7:24 AM > > > > > > From: Wei Liu <wei.liu.xen@gmail.com> On Behalf Of Wei Liu > > > > Sent: 18 December 2019 14:43 > > > > [snip] > > > > > > + > > > > +static inline uint64_t read_hyperv_timer(void) > > > > +{ > > > > + uint64_t scale, offset, ret, tsc; > > > > + uint32_t seq; > > > > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > > > > + > > > > + do { > > > > + seq = tsc_page->tsc_sequence; > > > > + > > > > + /* Seq 0 is special. It means the TSC enlightenment is not > > > > + * available at the moment. The reference time can only be > > > > + * obtained from the Reference Counter MSR. > > > > + */ > > > > + if ( seq == 0 ) > > > > > > Older versions of the spec used to use 0xFFFFFFFF I think, although > when I look again they > > > seem to have been retro-actively fixed. In any case I think you should > treat both > > > 0xFFFFFFFF and 0 as invalid. > > > > FWIW, the 0xFFFFFFFF was just a bug in the spec. Hyper-V > implementations only > > set the value to 0 to indicate invalid. The equivalent Linux code > checks only for 0. > > > > Thanks for chiming in, Michael. > > In that case I will submit a fix to change Xen's viridian code to > remove the wrong value there. If no consuming version of Windows is going to be upset seeing all-Fs then that's fine. Thanks for the clarification. Cheers, Paul
On 18.12.2019 15:42, Wei Liu wrote: > --- a/xen/arch/x86/time.c > +++ b/xen/arch/x86/time.c > @@ -31,6 +31,7 @@ > #include <asm/processor.h> > #include <asm/fixmap.h> > #include <asm/guest.h> > +#include <asm/guest/hyperv-tlfs.h> Can this please move ... > @@ -644,6 +645,103 @@ static struct platform_timesource __initdata plt_xen_timer = > }; > #endif > > +#ifdef CONFIG_HYPERV_GUEST ... here, to avoid the dependency on the header when the option is off? > +/************************************************************ > + * HYPER-V REFERENCE TSC > + */ > + > +static struct ms_hyperv_tsc_page *hyperv_tsc; > +static struct page_info *hyperv_tsc_page; > + > +static int64_t __init init_hyperv_timer(struct platform_timesource *pts) > +{ > + paddr_t maddr; > + uint64_t tsc_msr, freq; > + > + if ( !(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) ) > + return 0; > + > + hyperv_tsc_page = alloc_domheap_page(NULL, 0); > + if ( !hyperv_tsc_page ) > + return 0; > + > + hyperv_tsc = __map_domain_page_global(hyperv_tsc_page); > + if ( !hyperv_tsc ) > + { > + free_domheap_page(hyperv_tsc_page); > + hyperv_tsc_page = NULL; > + return 0; > + } > + > + maddr = page_to_maddr(hyperv_tsc_page); > + > + /* > + * Per Hyper-V TLFS: > + * 1. Read existing MSR value > + * 2. Preserve bits [11:1] > + * 3. Set bits [63:12] to be guest physical address of tsc page > + * 4. Set enabled bit (0) > + * 5. Write back new MSR value > + */ > + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > + tsc_msr &= 0xffeULL; There's no real need for the ULL suffix. > + tsc_msr |= maddr | 1 /* enabled */; Stray double blank after |= ? > + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > + > + /* Get TSC frequency from Hyper-V */ > + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); > + pts->frequency = freq; > + > + return freq; > +} > + > +static inline uint64_t read_hyperv_timer(void) > +{ > + uint64_t scale, offset, ret, tsc; > + uint32_t seq; > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > + > + do { > + seq = tsc_page->tsc_sequence; > + > + /* Seq 0 is special. It means the TSC enlightenment is not > + * available at the moment. The reference time can only be > + * obtained from the Reference Counter MSR. > + */ > + if ( seq == 0 ) > + { > + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, ret); > + return ret; > + } > + > + /* rdtsc_ordered already contains a load fence */ > + tsc = rdtsc_ordered(); > + scale = tsc_page->tsc_scale; > + offset = tsc_page->tsc_offset; > + > + smp_rmb(); > + > + } while (tsc_page->tsc_sequence != seq); > + > + /* ret = ((tsc * scale) >> 64) + offset; */ > + asm ( "mul %[scale]; add %[offset], %[ret]" > + : "+a" (tsc), [ret] "=d" (ret) This needs to be "=&d", or else %rdx may be used to address %[offset] (when in memory). With these taken care of Reviewed-by: Jan Beulich <jbeulich@suse.com> Jan
On 20/12/2019 16:05, Jan Beulich wrote: >> + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); >> + >> + /* Get TSC frequency from Hyper-V */ >> + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); >> + pts->frequency = freq; >> + >> + return freq; >> +} >> + >> +static inline uint64_t read_hyperv_timer(void) >> +{ >> + uint64_t scale, offset, ret, tsc; >> + uint32_t seq; >> + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; >> + >> + do { >> + seq = tsc_page->tsc_sequence; >> + >> + /* Seq 0 is special. It means the TSC enlightenment is not >> + * available at the moment. The reference time can only be >> + * obtained from the Reference Counter MSR. >> + */ >> + if ( seq == 0 ) >> + { >> + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, ret); >> + return ret; >> + } >> + >> + /* rdtsc_ordered already contains a load fence */ >> + tsc = rdtsc_ordered(); >> + scale = tsc_page->tsc_scale; >> + offset = tsc_page->tsc_offset; >> + >> + smp_rmb(); >> + >> + } while (tsc_page->tsc_sequence != seq); Style. ~Andrew
On Fri, Dec 20, 2019 at 05:05:24PM +0100, Jan Beulich wrote: > On 18.12.2019 15:42, Wei Liu wrote: > > --- a/xen/arch/x86/time.c > > +++ b/xen/arch/x86/time.c > > @@ -31,6 +31,7 @@ > > #include <asm/processor.h> > > #include <asm/fixmap.h> > > #include <asm/guest.h> > > +#include <asm/guest/hyperv-tlfs.h> > > Can this please move ... > > > @@ -644,6 +645,103 @@ static struct platform_timesource __initdata plt_xen_timer = > > }; > > #endif > > > > +#ifdef CONFIG_HYPERV_GUEST > > ... here, to avoid the dependency on the header when the option is > off? > > > +/************************************************************ > > + * HYPER-V REFERENCE TSC > > + */ > > + > > +static struct ms_hyperv_tsc_page *hyperv_tsc; > > +static struct page_info *hyperv_tsc_page; > > + > > +static int64_t __init init_hyperv_timer(struct platform_timesource *pts) > > +{ > > + paddr_t maddr; > > + uint64_t tsc_msr, freq; > > + > > + if ( !(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) ) > > + return 0; > > + > > + hyperv_tsc_page = alloc_domheap_page(NULL, 0); > > + if ( !hyperv_tsc_page ) > > + return 0; > > + > > + hyperv_tsc = __map_domain_page_global(hyperv_tsc_page); > > + if ( !hyperv_tsc ) > > + { > > + free_domheap_page(hyperv_tsc_page); > > + hyperv_tsc_page = NULL; > > + return 0; > > + } > > + > > + maddr = page_to_maddr(hyperv_tsc_page); > > + > > + /* > > + * Per Hyper-V TLFS: > > + * 1. Read existing MSR value > > + * 2. Preserve bits [11:1] > > + * 3. Set bits [63:12] to be guest physical address of tsc page > > + * 4. Set enabled bit (0) > > + * 5. Write back new MSR value > > + */ > > + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > > + tsc_msr &= 0xffeULL; > > There's no real need for the ULL suffix. > > > + tsc_msr |= maddr | 1 /* enabled */; > > Stray double blank after |= ? > > > + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); > > + > > + /* Get TSC frequency from Hyper-V */ > > + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); > > + pts->frequency = freq; > > + > > + return freq; > > +} > > + > > +static inline uint64_t read_hyperv_timer(void) > > +{ > > + uint64_t scale, offset, ret, tsc; > > + uint32_t seq; > > + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; > > + > > + do { > > + seq = tsc_page->tsc_sequence; > > + > > + /* Seq 0 is special. It means the TSC enlightenment is not > > + * available at the moment. The reference time can only be > > + * obtained from the Reference Counter MSR. > > + */ > > + if ( seq == 0 ) > > + { > > + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, ret); > > + return ret; > > + } > > + > > + /* rdtsc_ordered already contains a load fence */ > > + tsc = rdtsc_ordered(); > > + scale = tsc_page->tsc_scale; > > + offset = tsc_page->tsc_offset; > > + > > + smp_rmb(); > > + > > + } while (tsc_page->tsc_sequence != seq); > > + > > + /* ret = ((tsc * scale) >> 64) + offset; */ > > + asm ( "mul %[scale]; add %[offset], %[ret]" > > + : "+a" (tsc), [ret] "=d" (ret) > > This needs to be "=&d", or else %rdx may be used to address > %[offset] (when in memory). > > With these taken care of > Reviewed-by: Jan Beulich <jbeulich@suse.com> > Thanks. I have addressed yours, Andrew's and Paul's comment (checking HV_X64_ACCESS_FREQUENCY_MSRS) and will push patches that are needed for this clock source to work (patch 1, 5 and 6). Patches for cleaning up viridian code (2-4) will be posted separately with Paul's comments addressed. Wei. > Jan
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c index 216169a025..8b96b2e9a5 100644 --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -31,6 +31,7 @@ #include <asm/processor.h> #include <asm/fixmap.h> #include <asm/guest.h> +#include <asm/guest/hyperv-tlfs.h> #include <asm/mc146818rtc.h> #include <asm/div64.h> #include <asm/acpi.h> @@ -644,6 +645,103 @@ static struct platform_timesource __initdata plt_xen_timer = }; #endif +#ifdef CONFIG_HYPERV_GUEST +/************************************************************ + * HYPER-V REFERENCE TSC + */ + +static struct ms_hyperv_tsc_page *hyperv_tsc; +static struct page_info *hyperv_tsc_page; + +static int64_t __init init_hyperv_timer(struct platform_timesource *pts) +{ + paddr_t maddr; + uint64_t tsc_msr, freq; + + if ( !(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) ) + return 0; + + hyperv_tsc_page = alloc_domheap_page(NULL, 0); + if ( !hyperv_tsc_page ) + return 0; + + hyperv_tsc = __map_domain_page_global(hyperv_tsc_page); + if ( !hyperv_tsc ) + { + free_domheap_page(hyperv_tsc_page); + hyperv_tsc_page = NULL; + return 0; + } + + maddr = page_to_maddr(hyperv_tsc_page); + + /* + * Per Hyper-V TLFS: + * 1. Read existing MSR value + * 2. Preserve bits [11:1] + * 3. Set bits [63:12] to be guest physical address of tsc page + * 4. Set enabled bit (0) + * 5. Write back new MSR value + */ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); + tsc_msr &= 0xffeULL; + tsc_msr |= maddr | 1 /* enabled */; + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr); + + /* Get TSC frequency from Hyper-V */ + rdmsrl(HV_X64_MSR_TSC_FREQUENCY, freq); + pts->frequency = freq; + + return freq; +} + +static inline uint64_t read_hyperv_timer(void) +{ + uint64_t scale, offset, ret, tsc; + uint32_t seq; + const struct ms_hyperv_tsc_page *tsc_page = hyperv_tsc; + + do { + seq = tsc_page->tsc_sequence; + + /* Seq 0 is special. It means the TSC enlightenment is not + * available at the moment. The reference time can only be + * obtained from the Reference Counter MSR. + */ + if ( seq == 0 ) + { + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, ret); + return ret; + } + + /* rdtsc_ordered already contains a load fence */ + tsc = rdtsc_ordered(); + scale = tsc_page->tsc_scale; + offset = tsc_page->tsc_offset; + + smp_rmb(); + + } while (tsc_page->tsc_sequence != seq); + + /* ret = ((tsc * scale) >> 64) + offset; */ + asm ( "mul %[scale]; add %[offset], %[ret]" + : "+a" (tsc), [ret] "=d" (ret) + : [scale] "rm" (scale), [offset] "rm" (offset) ); + + return ret; +} + +static struct platform_timesource __initdata plt_hyperv_timer = +{ + .id = "hyperv", + .name = "HYPER-V REFERENCE TSC", + .read_counter = read_hyperv_timer, + .init = init_hyperv_timer, + /* See TSC time source for why counter_bits is set to 63 */ + .counter_bits = 63, +}; +#endif + /************************************************************ * GENERIC PLATFORM TIMER INFRASTRUCTURE */ @@ -793,6 +891,9 @@ static u64 __init init_platform_timer(void) static struct platform_timesource * __initdata plt_timers[] = { #ifdef CONFIG_XEN_GUEST &plt_xen_timer, +#endif +#ifdef CONFIG_HYPERV_GUEST + &plt_hyperv_timer, #endif &plt_hpet, &plt_pmtimer, &plt_pit };
Implement a clock source using Hyper-V's reference TSC page. Signed-off-by: Wei Liu <liuwe@microsoft.com> --- v2: 1. Address Jan's comments. Relevant spec: https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/live/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v5.0C.pdf Section 12.6. --- xen/arch/x86/time.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+)