diff mbox series

[v3,2/4] x86/APIC: calibrate against platform timer when possible

Message ID cb7dd6bc-79a4-2137-5e82-805b9daf5d58@suse.com (mailing list archive)
State New, archived
Headers show
Series x86: further improve timer freq calibration accuracy | expand

Commit Message

Jan Beulich Feb. 14, 2022, 9:25 a.m. UTC
Use the original calibration against PIT only when the platform timer
is PIT. This implicitly excludes the "xen_guest" case from using the PIT
logic (init_pit() fails there, and as of 5e73b2594c54 ["x86/time: minor
adjustments to init_pit()"] using_pit also isn't being set too early
anymore), so the respective hack there can be dropped at the same time.
This also reduces calibration time from 100ms to 50ms, albeit this step
is being skipped as of 0731a56c7c72 ("x86/APIC: no need for timer
calibration when using TDT") anyway.

While re-indenting the PIT logic in calibrate_APIC_clock(), besides
adjusting style also switch around the 2nd TSC/TMCCT read pair, to match
the order of the 1st one, yielding more consistent deltas.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Open-coding apic_read() in read_tmcct() isn't overly nice, but I wanted
to avoid x2apic_enabled being evaluated twice in close succession. (The
barrier is there just in case only anyway: While this RDMSR isn't
serializing, I'm unaware of any statement whether it can also be
executed speculatively, like RDTSC can.) An option might be to move the
function to apic.c such that it would also be used by
calibrate_APIC_clock().

Unlike the CPU frequencies enumerated in CPUID leaf 0x16 (which aren't
precise), using CPUID[0x15].ECX - if populated - may be an option to
skip calibration altogether. Iirc the value there is precise, but using
the systems I have easy access to I cannot verify this: In the sample
of three I have, none have ECX populated.

I wonder whether the secondary CPU freq measurement (used for display
purposes only) wouldn't better be dropped at this occasion.
---
v2: New.

Comments

Roger Pau Monné March 11, 2022, 1:45 p.m. UTC | #1
On Mon, Feb 14, 2022 at 10:25:11AM +0100, Jan Beulich wrote:
> Use the original calibration against PIT only when the platform timer
> is PIT. This implicitly excludes the "xen_guest" case from using the PIT
> logic (init_pit() fails there, and as of 5e73b2594c54 ["x86/time: minor
> adjustments to init_pit()"] using_pit also isn't being set too early
> anymore), so the respective hack there can be dropped at the same time.
> This also reduces calibration time from 100ms to 50ms, albeit this step
> is being skipped as of 0731a56c7c72 ("x86/APIC: no need for timer
> calibration when using TDT") anyway.
> 
> While re-indenting the PIT logic in calibrate_APIC_clock(), besides
> adjusting style also switch around the 2nd TSC/TMCCT read pair, to match
> the order of the 1st one, yielding more consistent deltas.
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> Open-coding apic_read() in read_tmcct() isn't overly nice, but I wanted
> to avoid x2apic_enabled being evaluated twice in close succession. (The
> barrier is there just in case only anyway: While this RDMSR isn't
> serializing, I'm unaware of any statement whether it can also be
> executed speculatively, like RDTSC can.) An option might be to move the
> function to apic.c such that it would also be used by
> calibrate_APIC_clock().

I think that would make sense. Or else it's kind of orthogonal that we
use a barrier in calibrate_apic_timer but not in calibrate_APIC_clock.
But maybe we can get rid of the open-coded PIT calibration in
calibrate_APIC_clock? (see below)

> --- a/xen/arch/x86/time.c
> +++ b/xen/arch/x86/time.c
> @@ -26,6 +26,7 @@
>  #include <xen/symbols.h>
>  #include <xen/keyhandler.h>
>  #include <xen/guest_access.h>
> +#include <asm/apic.h>
>  #include <asm/io.h>
>  #include <asm/iocap.h>
>  #include <asm/msr.h>
> @@ -1004,6 +1005,78 @@ static u64 __init init_platform_timer(vo
>      return rc;
>  }
>  
> +static uint32_t __init read_tmcct(void)
> +{
> +    if ( x2apic_enabled )
> +    {
> +        alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
> +        return apic_rdmsr(APIC_TMCCT);
> +    }
> +
> +    return apic_mem_read(APIC_TMCCT);
> +}
> +
> +static uint64_t __init read_pt_and_tmcct(uint32_t *tmcct)
> +{
> +    uint32_t tmcct_prev = *tmcct = read_tmcct(), tmcct_min = ~0;
> +    uint64_t best = best;
> +    unsigned int i;
> +
> +    for ( i = 0; ; ++i )
> +    {
> +        uint64_t pt = plt_src.read_counter();
> +        uint32_t tmcct_cur = read_tmcct();
> +        uint32_t tmcct_delta = tmcct_prev - tmcct_cur;
> +
> +        if ( tmcct_delta < tmcct_min )
> +        {
> +            tmcct_min = tmcct_delta;
> +            *tmcct = tmcct_cur;
> +            best = pt;
> +        }
> +        else if ( i > 2 )
> +            break;
> +
> +        tmcct_prev = tmcct_cur;
> +    }
> +
> +    return best;
> +}
> +
> +uint64_t __init calibrate_apic_timer(void)
> +{
> +    uint32_t start, end;
> +    uint64_t count = read_pt_and_tmcct(&start), elapsed;
> +    uint64_t target = CALIBRATE_VALUE(plt_src.frequency), actual;
> +    uint64_t mask = (uint64_t)~0 >> (64 - plt_src.counter_bits);
> +
> +    /*
> +     * PIT cannot be used here as it requires the timer interrupt to maintain
> +     * its 32-bit software counter, yet here we run with IRQs disabled.
> +     */

The reasoning in calibrate_APIC_clock to have interrupts disabled
doesn't apply anymore I would think (interrupts are already enabled
when we get there), and hence it seems to me that calibrate_APIC_clock
could be called with interrupts enabled and we could remove the
open-coded usage of the PIT in calibrate_APIC_clock.

Thanks, Roger.
Jan Beulich March 14, 2022, 4:19 p.m. UTC | #2
On 11.03.2022 14:45, Roger Pau Monné wrote:
> On Mon, Feb 14, 2022 at 10:25:11AM +0100, Jan Beulich wrote:
>> Use the original calibration against PIT only when the platform timer
>> is PIT. This implicitly excludes the "xen_guest" case from using the PIT
>> logic (init_pit() fails there, and as of 5e73b2594c54 ["x86/time: minor
>> adjustments to init_pit()"] using_pit also isn't being set too early
>> anymore), so the respective hack there can be dropped at the same time.
>> This also reduces calibration time from 100ms to 50ms, albeit this step
>> is being skipped as of 0731a56c7c72 ("x86/APIC: no need for timer
>> calibration when using TDT") anyway.
>>
>> While re-indenting the PIT logic in calibrate_APIC_clock(), besides
>> adjusting style also switch around the 2nd TSC/TMCCT read pair, to match
>> the order of the 1st one, yielding more consistent deltas.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> Open-coding apic_read() in read_tmcct() isn't overly nice, but I wanted
>> to avoid x2apic_enabled being evaluated twice in close succession. (The
>> barrier is there just in case only anyway: While this RDMSR isn't
>> serializing, I'm unaware of any statement whether it can also be
>> executed speculatively, like RDTSC can.) An option might be to move the
>> function to apic.c such that it would also be used by
>> calibrate_APIC_clock().
> 
> I think that would make sense. Or else it's kind of orthogonal that we
> use a barrier in calibrate_apic_timer but not in calibrate_APIC_clock.

But there is a barrier there, via rdtsc_ordered(). Thinking about
this again, I'm not not even sure I'd like to use the helper in
calibrate_APIC_clock(), as there's no need to have two barriers
there.

But I guess I'll move the function in any event, so it at least
feels less like a layering violation. But I still would want to
avoid calling apic_read(), i.e. the function would remain as is
(albeit perhaps renamed as becoming non-static).

> But maybe we can get rid of the open-coded PIT calibration in
> calibrate_APIC_clock? (see below)
> 
>> --- a/xen/arch/x86/time.c
>> +++ b/xen/arch/x86/time.c
>> @@ -26,6 +26,7 @@
>>  #include <xen/symbols.h>
>>  #include <xen/keyhandler.h>
>>  #include <xen/guest_access.h>
>> +#include <asm/apic.h>
>>  #include <asm/io.h>
>>  #include <asm/iocap.h>
>>  #include <asm/msr.h>
>> @@ -1004,6 +1005,78 @@ static u64 __init init_platform_timer(vo
>>      return rc;
>>  }
>>  
>> +static uint32_t __init read_tmcct(void)
>> +{
>> +    if ( x2apic_enabled )
>> +    {
>> +        alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
>> +        return apic_rdmsr(APIC_TMCCT);
>> +    }
>> +
>> +    return apic_mem_read(APIC_TMCCT);
>> +}
>> +
>> +static uint64_t __init read_pt_and_tmcct(uint32_t *tmcct)
>> +{
>> +    uint32_t tmcct_prev = *tmcct = read_tmcct(), tmcct_min = ~0;
>> +    uint64_t best = best;
>> +    unsigned int i;
>> +
>> +    for ( i = 0; ; ++i )
>> +    {
>> +        uint64_t pt = plt_src.read_counter();
>> +        uint32_t tmcct_cur = read_tmcct();
>> +        uint32_t tmcct_delta = tmcct_prev - tmcct_cur;
>> +
>> +        if ( tmcct_delta < tmcct_min )
>> +        {
>> +            tmcct_min = tmcct_delta;
>> +            *tmcct = tmcct_cur;
>> +            best = pt;
>> +        }
>> +        else if ( i > 2 )
>> +            break;
>> +
>> +        tmcct_prev = tmcct_cur;
>> +    }
>> +
>> +    return best;
>> +}
>> +
>> +uint64_t __init calibrate_apic_timer(void)
>> +{
>> +    uint32_t start, end;
>> +    uint64_t count = read_pt_and_tmcct(&start), elapsed;
>> +    uint64_t target = CALIBRATE_VALUE(plt_src.frequency), actual;
>> +    uint64_t mask = (uint64_t)~0 >> (64 - plt_src.counter_bits);
>> +
>> +    /*
>> +     * PIT cannot be used here as it requires the timer interrupt to maintain
>> +     * its 32-bit software counter, yet here we run with IRQs disabled.
>> +     */
> 
> The reasoning in calibrate_APIC_clock to have interrupts disabled
> doesn't apply anymore I would think (interrupts are already enabled
> when we get there),

setup_boot_APIC_clock() disables IRQs before calling
calibrate_APIC_clock(). Whether the reasoning still applies is hard
to tell - I at least cannot claim I fully understand the concern.

> and hence it seems to me that calibrate_APIC_clock
> could be called with interrupts enabled and we could remove the
> open-coded usage of the PIT in calibrate_APIC_clock.

I won't exclude this might be possible, but it would mean changing
a path which is hardly ever used nowadays. While on one hand this
means hardly anyone might notice, otoh it also means possible
breakage might not be noticed until far in the future. It anyway
feels too much for a single change to also alter calibration against
PIT right here.

One thing seems quite clear though: Doing any of this with interrupts
enabled increases the chances for the read pairs to not properly
correlate, due to an interrupt happening in the middle. This alone is
a reason for me to want to keep IRQs off here.

Jan
Roger Pau Monné March 15, 2022, 9:12 a.m. UTC | #3
On Mon, Mar 14, 2022 at 05:19:37PM +0100, Jan Beulich wrote:
> On 11.03.2022 14:45, Roger Pau Monné wrote:
> > On Mon, Feb 14, 2022 at 10:25:11AM +0100, Jan Beulich wrote:
> >> Use the original calibration against PIT only when the platform timer
> >> is PIT. This implicitly excludes the "xen_guest" case from using the PIT
> >> logic (init_pit() fails there, and as of 5e73b2594c54 ["x86/time: minor
> >> adjustments to init_pit()"] using_pit also isn't being set too early
> >> anymore), so the respective hack there can be dropped at the same time.
> >> This also reduces calibration time from 100ms to 50ms, albeit this step
> >> is being skipped as of 0731a56c7c72 ("x86/APIC: no need for timer
> >> calibration when using TDT") anyway.
> >>
> >> While re-indenting the PIT logic in calibrate_APIC_clock(), besides
> >> adjusting style also switch around the 2nd TSC/TMCCT read pair, to match
> >> the order of the 1st one, yielding more consistent deltas.
> >>
> >> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> >> ---
> >> Open-coding apic_read() in read_tmcct() isn't overly nice, but I wanted
> >> to avoid x2apic_enabled being evaluated twice in close succession. (The
> >> barrier is there just in case only anyway: While this RDMSR isn't
> >> serializing, I'm unaware of any statement whether it can also be
> >> executed speculatively, like RDTSC can.) An option might be to move the
> >> function to apic.c such that it would also be used by
> >> calibrate_APIC_clock().
> > 
> > I think that would make sense. Or else it's kind of orthogonal that we
> > use a barrier in calibrate_apic_timer but not in calibrate_APIC_clock.
> 
> But there is a barrier there, via rdtsc_ordered(). Thinking about
> this again, I'm not not even sure I'd like to use the helper in
> calibrate_APIC_clock(), as there's no need to have two barriers
> there.
> 
> But I guess I'll move the function in any event, so it at least
> feels less like a layering violation. But I still would want to
> avoid calling apic_read(), i.e. the function would remain as is
> (albeit perhaps renamed as becoming non-static).
> 
> > But maybe we can get rid of the open-coded PIT calibration in
> > calibrate_APIC_clock? (see below)
> > 
> >> --- a/xen/arch/x86/time.c
> >> +++ b/xen/arch/x86/time.c
> >> @@ -26,6 +26,7 @@
> >>  #include <xen/symbols.h>
> >>  #include <xen/keyhandler.h>
> >>  #include <xen/guest_access.h>
> >> +#include <asm/apic.h>
> >>  #include <asm/io.h>
> >>  #include <asm/iocap.h>
> >>  #include <asm/msr.h>
> >> @@ -1004,6 +1005,78 @@ static u64 __init init_platform_timer(vo
> >>      return rc;
> >>  }
> >>  
> >> +static uint32_t __init read_tmcct(void)
> >> +{
> >> +    if ( x2apic_enabled )
> >> +    {
> >> +        alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
> >> +        return apic_rdmsr(APIC_TMCCT);
> >> +    }
> >> +
> >> +    return apic_mem_read(APIC_TMCCT);
> >> +}
> >> +
> >> +static uint64_t __init read_pt_and_tmcct(uint32_t *tmcct)
> >> +{
> >> +    uint32_t tmcct_prev = *tmcct = read_tmcct(), tmcct_min = ~0;
> >> +    uint64_t best = best;
> >> +    unsigned int i;
> >> +
> >> +    for ( i = 0; ; ++i )
> >> +    {
> >> +        uint64_t pt = plt_src.read_counter();
> >> +        uint32_t tmcct_cur = read_tmcct();
> >> +        uint32_t tmcct_delta = tmcct_prev - tmcct_cur;
> >> +
> >> +        if ( tmcct_delta < tmcct_min )
> >> +        {
> >> +            tmcct_min = tmcct_delta;
> >> +            *tmcct = tmcct_cur;
> >> +            best = pt;
> >> +        }
> >> +        else if ( i > 2 )
> >> +            break;
> >> +
> >> +        tmcct_prev = tmcct_cur;
> >> +    }
> >> +
> >> +    return best;
> >> +}
> >> +
> >> +uint64_t __init calibrate_apic_timer(void)
> >> +{
> >> +    uint32_t start, end;
> >> +    uint64_t count = read_pt_and_tmcct(&start), elapsed;
> >> +    uint64_t target = CALIBRATE_VALUE(plt_src.frequency), actual;
> >> +    uint64_t mask = (uint64_t)~0 >> (64 - plt_src.counter_bits);
> >> +
> >> +    /*
> >> +     * PIT cannot be used here as it requires the timer interrupt to maintain
> >> +     * its 32-bit software counter, yet here we run with IRQs disabled.
> >> +     */
> > 
> > The reasoning in calibrate_APIC_clock to have interrupts disabled
> > doesn't apply anymore I would think (interrupts are already enabled
> > when we get there),
> 
> setup_boot_APIC_clock() disables IRQs before calling
> calibrate_APIC_clock(). Whether the reasoning still applies is hard
> to tell - I at least cannot claim I fully understand the concern.

Me neither, I'm not sure what will explicitly need the first
interrupt, and why further interrupts won't be fine.

Also interrupts are already enabled before calling
calibrate_APIC_clock() (as it's setup_boot_APIC_clock() that disables
them), so this whole thing about getting the first interrupt seems
very bogus and plain wrong.

> > and hence it seems to me that calibrate_APIC_clock
> > could be called with interrupts enabled and we could remove the
> > open-coded usage of the PIT in calibrate_APIC_clock.
> 
> I won't exclude this might be possible, but it would mean changing
> a path which is hardly ever used nowadays. While on one hand this
> means hardly anyone might notice, otoh it also means possible
> breakage might not be noticed until far in the future. It anyway
> feels too much for a single change to also alter calibration against
> PIT right here.

You are already changing this path by using a clocksource different
than PIT to perform the calibration.

> One thing seems quite clear though: Doing any of this with interrupts
> enabled increases the chances for the read pairs to not properly
> correlate, due to an interrupt happening in the middle. This alone is
> a reason for me to want to keep IRQs off here.

Right, TSC calibration is also done with interrupts disabled, so it
does seem correct to do the same here for APIC.

Maybe it would be cleaner to hide the specific PIT logic in
calibrate_apic_timer() so that we could remove get_8254_timer_count()
and wait_8254_wraparound() from apic.c and apic.c doesn't have any PIT
specific code anymore?

I think using channel 2 like it's used for the TSC calibration won't
be possible at this point, since it will skew read_pit_count() users?
In any case if we disable interrupts those will already be skewed
because the timer won't be rearmed until interrupts are enabled.

Thanks, Roger.
Jan Beulich March 15, 2022, 10:39 a.m. UTC | #4
On 15.03.2022 10:12, Roger Pau Monné wrote:
> On Mon, Mar 14, 2022 at 05:19:37PM +0100, Jan Beulich wrote:
>> One thing seems quite clear though: Doing any of this with interrupts
>> enabled increases the chances for the read pairs to not properly
>> correlate, due to an interrupt happening in the middle. This alone is
>> a reason for me to want to keep IRQs off here.
> 
> Right, TSC calibration is also done with interrupts disabled, so it
> does seem correct to do the same here for APIC.
> 
> Maybe it would be cleaner to hide the specific PIT logic in
> calibrate_apic_timer() so that we could remove get_8254_timer_count()
> and wait_8254_wraparound() from apic.c and apic.c doesn't have any PIT
> specific code anymore?

Yes, that's certainly a further cleanup step to take (saying this
without actually having tried, so there may be obstacles).

Jan

> I think using channel 2 like it's used for the TSC calibration won't
> be possible at this point, since it will skew read_pit_count() users?
> In any case if we disable interrupts those will already be skewed
> because the timer won't be rearmed until interrupts are enabled.
> 
> Thanks, Roger.
>
Roger Pau Monné March 15, 2022, 2:57 p.m. UTC | #5
On Tue, Mar 15, 2022 at 11:39:29AM +0100, Jan Beulich wrote:
> On 15.03.2022 10:12, Roger Pau Monné wrote:
> > On Mon, Mar 14, 2022 at 05:19:37PM +0100, Jan Beulich wrote:
> >> One thing seems quite clear though: Doing any of this with interrupts
> >> enabled increases the chances for the read pairs to not properly
> >> correlate, due to an interrupt happening in the middle. This alone is
> >> a reason for me to want to keep IRQs off here.
> > 
> > Right, TSC calibration is also done with interrupts disabled, so it
> > does seem correct to do the same here for APIC.
> > 
> > Maybe it would be cleaner to hide the specific PIT logic in
> > calibrate_apic_timer() so that we could remove get_8254_timer_count()
> > and wait_8254_wraparound() from apic.c and apic.c doesn't have any PIT
> > specific code anymore?
> 
> Yes, that's certainly a further cleanup step to take (saying this
> without actually having tried, so there may be obstacles).

OK, I think you are planning to post a new version of this to avoid
open-coding apic_read() in read_tmcct()?

TBH the PIT calibration done in calibrate_APIC_clock seems fairly
bogus, as it's possible the counter wraps around more than once
between calls when running virtualized. Maybe reprogramming channel 2
would be better, as then at least wrap around would be detected
(albeit it's unclear how much delta we would have between the counter
reaching 0 and Xen realizing).

Thanks, Roger.
diff mbox series

Patch

--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1182,20 +1182,6 @@  static void __init check_deadline_errata
            "please update microcode to version %#x (or later)\n", rev);
 }
 
-static void __init wait_tick_pvh(void)
-{
-    u64 lapse_ns = 1000000000ULL / HZ;
-    s_time_t start, curr_time;
-
-    start = NOW();
-
-    /* Won't wrap around */
-    do {
-        cpu_relax();
-        curr_time = NOW();
-    } while ( curr_time - start < lapse_ns );
-}
-
 /*
  * In this function we calibrate APIC bus clocks to the external
  * timer. Unfortunately we cannot use jiffies and the timer irq
@@ -1211,9 +1197,6 @@  static void __init wait_tick_pvh(void)
 
 static void __init calibrate_APIC_clock(void)
 {
-    unsigned long long t1, t2;
-    unsigned long tt1, tt2;
-    unsigned int i;
     unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
     unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
 #define LOOPS_FRAC 10U      /* measure for one tenth of a second */
@@ -1226,39 +1209,38 @@  static void __init calibrate_APIC_clock(
      */
     __setup_APIC_LVTT(0xffffffff);
 
-    if ( !xen_guest )
+    bus_freq = calibrate_apic_timer();
+    if ( !bus_freq )
+    {
+        unsigned int i, tt1, tt2;
+        unsigned long t1, t2;
+
+        ASSERT(!xen_guest);
+
         /*
-         * The timer chip counts down to zero. Let's wait
-         * for a wraparound to start exact measurement:
-         * (the current tick might have been already half done)
+         * The timer chip counts down to zero. Let's wait for a wraparound to
+         * start exact measurement (the current tick might have been already
+         * half done):
          */
         wait_8254_wraparound();
-    else
-        wait_tick_pvh();
 
-    /*
-     * We wrapped around just now. Let's start:
-     */
-    t1 = rdtsc_ordered();
-    tt1 = apic_read(APIC_TMCCT);
+        /* We wrapped around just now. Let's start: */
+        t1 = rdtsc_ordered();
+        tt1 = apic_read(APIC_TMCCT);
 
-    /*
-     * Let's wait HZ / LOOPS_FRAC ticks:
-     */
-    for (i = 0; i < HZ / LOOPS_FRAC; i++)
-        if ( !xen_guest )
+        /* Let's wait HZ / LOOPS_FRAC ticks: */
+        for ( i = 0; i < HZ / LOOPS_FRAC; ++i )
             wait_8254_wraparound();
-        else
-            wait_tick_pvh();
 
-    tt2 = apic_read(APIC_TMCCT);
-    t2 = rdtsc_ordered();
+        t2 = rdtsc_ordered();
+        tt2 = apic_read(APIC_TMCCT);
 
-    bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
+        bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
 
-    apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
-                ((unsigned long)(t2 - t1) * LOOPS_FRAC) / 1000000,
-                (((unsigned long)(t2 - t1) * LOOPS_FRAC) / 100) % 10000);
+        apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
+                    ((t2 - t1) * LOOPS_FRAC) / 1000000,
+                    (((t2 - t1) * LOOPS_FRAC) / 100) % 10000);
+    }
 
     apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
                 bus_freq / 1000000, (bus_freq / 100) % 10000);
--- a/xen/arch/x86/include/asm/apic.h
+++ b/xen/arch/x86/include/asm/apic.h
@@ -192,6 +192,8 @@  extern void record_boot_APIC_mode(void);
 extern enum apic_mode current_local_apic_mode(void);
 extern void check_for_unexpected_msi(unsigned int vector);
 
+uint64_t calibrate_apic_timer(void);
+
 extern void check_nmi_watchdog(void);
 
 extern unsigned int nmi_watchdog;
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -26,6 +26,7 @@ 
 #include <xen/symbols.h>
 #include <xen/keyhandler.h>
 #include <xen/guest_access.h>
+#include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/iocap.h>
 #include <asm/msr.h>
@@ -1004,6 +1005,78 @@  static u64 __init init_platform_timer(vo
     return rc;
 }
 
+static uint32_t __init read_tmcct(void)
+{
+    if ( x2apic_enabled )
+    {
+        alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
+        return apic_rdmsr(APIC_TMCCT);
+    }
+
+    return apic_mem_read(APIC_TMCCT);
+}
+
+static uint64_t __init read_pt_and_tmcct(uint32_t *tmcct)
+{
+    uint32_t tmcct_prev = *tmcct = read_tmcct(), tmcct_min = ~0;
+    uint64_t best = best;
+    unsigned int i;
+
+    for ( i = 0; ; ++i )
+    {
+        uint64_t pt = plt_src.read_counter();
+        uint32_t tmcct_cur = read_tmcct();
+        uint32_t tmcct_delta = tmcct_prev - tmcct_cur;
+
+        if ( tmcct_delta < tmcct_min )
+        {
+            tmcct_min = tmcct_delta;
+            *tmcct = tmcct_cur;
+            best = pt;
+        }
+        else if ( i > 2 )
+            break;
+
+        tmcct_prev = tmcct_cur;
+    }
+
+    return best;
+}
+
+uint64_t __init calibrate_apic_timer(void)
+{
+    uint32_t start, end;
+    uint64_t count = read_pt_and_tmcct(&start), elapsed;
+    uint64_t target = CALIBRATE_VALUE(plt_src.frequency), actual;
+    uint64_t mask = (uint64_t)~0 >> (64 - plt_src.counter_bits);
+
+    /*
+     * PIT cannot be used here as it requires the timer interrupt to maintain
+     * its 32-bit software counter, yet here we run with IRQs disabled.
+     */
+    if ( using_pit )
+        return 0;
+
+    while ( ((plt_src.read_counter() - count) & mask) < target )
+        continue;
+
+    actual = read_pt_and_tmcct(&end) - count;
+    elapsed = start - end;
+
+    if ( likely(actual > target) )
+    {
+        /* See the comment in calibrate_tsc(). */
+        while ( unlikely(actual > (uint32_t)actual) )
+        {
+            actual >>= 1;
+            target >>= 1;
+        }
+        elapsed = muldiv64(elapsed, target, actual);
+    }
+
+    return elapsed * CALIBRATE_FRAC;
+}
+
 u64 stime2tsc(s_time_t stime)
 {
     struct cpu_time *t;