diff mbox series

[v8,10/11] viridian: add implementation of synthetic timers

Message ID 20190318112059.21910-11-paul.durrant@citrix.com (mailing list archive)
State Superseded
Headers show
Series viridian: implement more enlightenments | expand

Commit Message

Paul Durrant March 18, 2019, 11:20 a.m. UTC
This patch introduces an implementation of the STIMER0-15_CONFIG/COUNT MSRs
and hence a the first SynIC message source.

The new (and documented) 'stimer' viridian enlightenment group may be
specified to enable this feature.

While in the neighbourhood, this patch adds a missing check for an
attempt to write the time reference count MSR, which should result in an
exception (but not be reported as an unimplemented MSR).

NOTE: It is necessary for correct operation that timer expiration and
      message delivery time-stamping use the same time source as the guest.
      The specification is ambiguous but testing with a Windows 10 1803
      guest has shown that using the partition reference counter as a
      source whilst the guest is using RDTSC and the reference tsc page
      does not work correctly. Therefore the time_now() function is used.
      This implements the algorithm for acquiring partition reference time
      that is documented in the specifiction.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Tim Deegan <tim@xen.org>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v8:
 - Squash in https://lists.xenproject.org/archives/html/xen-devel/2019-03/msg01333.html

v7:
 - Make sure missed count cannot be zero if expiration < now

v6:
 - Stop using the reference tsc page in time_now()
 - Address further comments from Jan

v5:
 - Fix time_now() to read TSC as the guest would see it

v4:
 - Address comments from Jan

v3:
 - Re-worked missed ticks calculation
---
 docs/man/xl.cfg.5.pod.in               |  12 +-
 tools/libxl/libxl.h                    |   6 +
 tools/libxl/libxl_dom.c                |   4 +
 tools/libxl/libxl_types.idl            |   1 +
 xen/arch/x86/hvm/viridian/private.h    |   9 +-
 xen/arch/x86/hvm/viridian/synic.c      |  55 +++-
 xen/arch/x86/hvm/viridian/time.c       | 386 ++++++++++++++++++++++++-
 xen/arch/x86/hvm/viridian/viridian.c   |   5 +
 xen/include/asm-x86/hvm/viridian.h     |  32 +-
 xen/include/public/arch-x86/hvm/save.h |   2 +
 xen/include/public/hvm/params.h        |   7 +-
 11 files changed, 506 insertions(+), 13 deletions(-)

Comments

Jan Beulich March 18, 2019, 2:24 p.m. UTC | #1
>>> On 18.03.19 at 12:20, <paul.durrant@citrix.com> wrote:
> @@ -72,11 +77,14 @@ static void update_reference_tsc(struct domain *d, bool initialize)
>       * ticks per 100ns shifted left by 64.
>       */
>      p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
> +    smp_wmb();
> +
> +    seq = p->TscSequence + 1;
> +    if ( seq == 0xFFFFFFFF || seq == 0 ) /* Avoid both 'invalid' values */
> +        seq = 1;
>  
> -    p->TscSequence++;
> -    if ( p->TscSequence == 0xFFFFFFFF ||
> -         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
> -        p->TscSequence = 1;
> +    p->TscSequence = seq;
> +    vd->reference_tsc_valid = true;

Strictly speaking, don't you need another smp_wmb() between
these two lines?

> +static void start_stimer(struct viridian_stimer *vs)
> +{
> +    const struct vcpu *v = vs->v;
> +    struct viridian_vcpu *vv = v->arch.hvm.viridian;
> +    unsigned int stimerx = vs - &vv->stimer[0];
> +    int64_t now = time_now(v->domain);
> +    int64_t expiration;
> +    s_time_t timeout;
> +
> +    if ( !test_and_set_bit(stimerx, &vv->stimer_enabled) )
> +        printk(XENLOG_G_INFO "%pv: VIRIDIAN STIMER%u: enabled\n", v,
> +               stimerx);
> +
> +    if ( vs->config.periodic )
> +    {
> +        /*
> +         * The specification says that if the timer is lazy then we
> +         * skip over any missed expirations so we can treat this case
> +         * as the same as if the timer is currently stopped, i.e. we
> +         * just schedule expiration to be 'count' ticks from now.
> +         */
> +        if ( !vs->started || vs->config.lazy )
> +            expiration = now + vs->count;
> +        else
> +        {
> +            unsigned int missed = 0;
> +
> +            /*
> +             * The timer is already started, so we're re-scheduling.
> +             * Hence advance the timer expiration by one tick.
> +             */
> +            expiration = vs->expiration + vs->count;
> +
> +            /* Now check to see if any expirations have been missed */
> +            if ( expiration - now <= 0 )
> +                missed = ((now - expiration) / vs->count) + 1;
> +
> +            /*
> +             * The specification says that if the timer is not lazy then
> +             * a non-zero missed count should be used to reduce the period
> +             * of the timer until it catches up, unless the count has
> +             * reached a 'significant number', in which case the timer
> +             * should be treated as lazy. Unfortunately the specification
> +             * does not state what that number is so the choice of number
> +             * here is a pure guess.
> +             */
> +            if ( missed > 3 )
> +                expiration = now + vs->count;
> +            else if ( missed )
> +                expiration = now + (vs->count / missed);
> +        }
> +    }
> +    else
> +    {
> +        expiration = vs->count;
> +        if ( expiration - now <= 0 )
> +        {
> +            vs->expiration = expiration;
> +            stimer_expire(vs);

Aren't you introducing a risk for races by calling the timer function
directly from here? start_timer(), after all, gets called from quite a
few places. 

> +            return;
> +        }
> +    }
> +    ASSERT(expiration - now > 0);
> +
> +    vs->expiration = expiration;
> +    timeout = (expiration - now) * 100ull;
> +
> +    vs->started = true;
> +    migrate_timer(&vs->timer, smp_processor_id());

Why is this smp_processor_id() when viridian_time_vcpu_init() uses
v->processor? How relevant is it in the first place to trace the pCPU
the vCPU runs on for the timer?

Jan
Paul Durrant March 18, 2019, 2:37 p.m. UTC | #2
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 18 March 2019 14:24
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; George Dunlap <George.Dunlap@citrix.com>; Ian
> Jackson <Ian.Jackson@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>; xen-devel <xen-
> devel@lists.xenproject.org>; Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>; Tim (Xen.org)
> <tim@xen.org>
> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of synthetic timers
> 
> >>> On 18.03.19 at 12:20, <paul.durrant@citrix.com> wrote:
> > @@ -72,11 +77,14 @@ static void update_reference_tsc(struct domain *d, bool initialize)
> >       * ticks per 100ns shifted left by 64.
> >       */
> >      p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
> > +    smp_wmb();
> > +
> > +    seq = p->TscSequence + 1;
> > +    if ( seq == 0xFFFFFFFF || seq == 0 ) /* Avoid both 'invalid' values */
> > +        seq = 1;
> >
> > -    p->TscSequence++;
> > -    if ( p->TscSequence == 0xFFFFFFFF ||
> > -         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
> > -        p->TscSequence = 1;
> > +    p->TscSequence = seq;
> > +    vd->reference_tsc_valid = true;
> 
> Strictly speaking, don't you need another smp_wmb() between
> these two lines?
> 

Since the data in the page is not used by time_now() I don't think so.

> > +static void start_stimer(struct viridian_stimer *vs)
> > +{
> > +    const struct vcpu *v = vs->v;
> > +    struct viridian_vcpu *vv = v->arch.hvm.viridian;
> > +    unsigned int stimerx = vs - &vv->stimer[0];
> > +    int64_t now = time_now(v->domain);
> > +    int64_t expiration;
> > +    s_time_t timeout;
> > +
> > +    if ( !test_and_set_bit(stimerx, &vv->stimer_enabled) )
> > +        printk(XENLOG_G_INFO "%pv: VIRIDIAN STIMER%u: enabled\n", v,
> > +               stimerx);
> > +
> > +    if ( vs->config.periodic )
> > +    {
> > +        /*
> > +         * The specification says that if the timer is lazy then we
> > +         * skip over any missed expirations so we can treat this case
> > +         * as the same as if the timer is currently stopped, i.e. we
> > +         * just schedule expiration to be 'count' ticks from now.
> > +         */
> > +        if ( !vs->started || vs->config.lazy )
> > +            expiration = now + vs->count;
> > +        else
> > +        {
> > +            unsigned int missed = 0;
> > +
> > +            /*
> > +             * The timer is already started, so we're re-scheduling.
> > +             * Hence advance the timer expiration by one tick.
> > +             */
> > +            expiration = vs->expiration + vs->count;
> > +
> > +            /* Now check to see if any expirations have been missed */
> > +            if ( expiration - now <= 0 )
> > +                missed = ((now - expiration) / vs->count) + 1;
> > +
> > +            /*
> > +             * The specification says that if the timer is not lazy then
> > +             * a non-zero missed count should be used to reduce the period
> > +             * of the timer until it catches up, unless the count has
> > +             * reached a 'significant number', in which case the timer
> > +             * should be treated as lazy. Unfortunately the specification
> > +             * does not state what that number is so the choice of number
> > +             * here is a pure guess.
> > +             */
> > +            if ( missed > 3 )
> > +                expiration = now + vs->count;
> > +            else if ( missed )
> > +                expiration = now + (vs->count / missed);
> > +        }
> > +    }
> > +    else
> > +    {
> > +        expiration = vs->count;
> > +        if ( expiration - now <= 0 )
> > +        {
> > +            vs->expiration = expiration;
> > +            stimer_expire(vs);
> 
> Aren't you introducing a risk for races by calling the timer function
> directly from here? start_timer(), after all, gets called from quite a
> few places.
> 

In practice I don't think there should be any problematic race, but I'll check again.

> > +            return;
> > +        }
> > +    }
> > +    ASSERT(expiration - now > 0);
> > +
> > +    vs->expiration = expiration;
> > +    timeout = (expiration - now) * 100ull;
> > +
> > +    vs->started = true;
> > +    migrate_timer(&vs->timer, smp_processor_id());
> 
> Why is this smp_processor_id() when viridian_time_vcpu_init() uses
> v->processor? How relevant is it in the first place to trace the pCPU
> the vCPU runs on for the timer?
> 

I was just following suit with other timer code. It seems to be the norm to migrate to the current pCPU just prior to starting a timer.

  Paul

> Jan
>
Jan Beulich March 18, 2019, 3:20 p.m. UTC | #3
>>> On 18.03.19 at 15:37, <Paul.Durrant@citrix.com> wrote:
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 18 March 2019 14:24
>> 
>> >>> On 18.03.19 at 12:20, <paul.durrant@citrix.com> wrote:
>> > @@ -72,11 +77,14 @@ static void update_reference_tsc(struct domain *d, bool initialize)
>> >       * ticks per 100ns shifted left by 64.
>> >       */
>> >      p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
>> > +    smp_wmb();
>> > +
>> > +    seq = p->TscSequence + 1;
>> > +    if ( seq == 0xFFFFFFFF || seq == 0 ) /* Avoid both 'invalid' values */
>> > +        seq = 1;
>> >
>> > -    p->TscSequence++;
>> > -    if ( p->TscSequence == 0xFFFFFFFF ||
>> > -         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
>> > -        p->TscSequence = 1;
>> > +    p->TscSequence = seq;
>> > +    vd->reference_tsc_valid = true;
>> 
>> Strictly speaking, don't you need another smp_wmb() between
>> these two lines?
>> 
> 
> Since the data in the page is not used by time_now() I don't think so.

Oh, have I been remembering an old version of the patch, where
there was a consumer of p->TscSequence?

>> > +            return;
>> > +        }
>> > +    }
>> > +    ASSERT(expiration - now > 0);
>> > +
>> > +    vs->expiration = expiration;
>> > +    timeout = (expiration - now) * 100ull;
>> > +
>> > +    vs->started = true;
>> > +    migrate_timer(&vs->timer, smp_processor_id());
>> 
>> Why is this smp_processor_id() when viridian_time_vcpu_init() uses
>> v->processor? How relevant is it in the first place to trace the pCPU
>> the vCPU runs on for the timer?
> 
> I was just following suit with other timer code. It seems to be the norm to 
> migrate to the current pCPU just prior to starting a timer.

But wouldn't v->processor then be more visibly correct (besides
likely being cheaper to get at), as to the correlation to the vCPU
in question? I can't actually see why "migrate to the current pCPU"
would be the norm; I could only see this as an implication from
that other code you looked at simply acting on the current vCPU.

Then again I'm having trouble spotting why it would be important
for the timer to run on the same CPU the vCPU runs one. By the
time the timer fires, the vCPU may have gone elsewhere.

Jan
Paul Durrant March 18, 2019, 3:36 p.m. UTC | #4
> -----Original Message-----
> From: Xen-devel [mailto:xen-devel-bounces@lists.xenproject.org] On Behalf Of Jan Beulich
> Sent: 18 March 2019 15:21
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>; Wei Liu <wei.liu2@citrix.com>; Konrad Rzeszutek Wilk
> <konrad.wilk@oracle.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; Tim (Xen.org) <tim@xen.org>;
> George Dunlap <George.Dunlap@citrix.com>; Julien Grall <julien.grall@arm.com>; xen-devel <xen-
> devel@lists.xenproject.org>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of synthetic timers
> 
> >>> On 18.03.19 at 15:37, <Paul.Durrant@citrix.com> wrote:
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 18 March 2019 14:24
> >>
> >> >>> On 18.03.19 at 12:20, <paul.durrant@citrix.com> wrote:
> >> > @@ -72,11 +77,14 @@ static void update_reference_tsc(struct domain *d, bool initialize)
> >> >       * ticks per 100ns shifted left by 64.
> >> >       */
> >> >      p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
> >> > +    smp_wmb();
> >> > +
> >> > +    seq = p->TscSequence + 1;
> >> > +    if ( seq == 0xFFFFFFFF || seq == 0 ) /* Avoid both 'invalid' values */
> >> > +        seq = 1;
> >> >
> >> > -    p->TscSequence++;
> >> > -    if ( p->TscSequence == 0xFFFFFFFF ||
> >> > -         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
> >> > -        p->TscSequence = 1;
> >> > +    p->TscSequence = seq;
> >> > +    vd->reference_tsc_valid = true;
> >>
> >> Strictly speaking, don't you need another smp_wmb() between
> >> these two lines?
> >>
> >
> > Since the data in the page is not used by time_now() I don't think so.
> 
> Oh, have I been remembering an old version of the patch, where
> there was a consumer of p->TscSequence?

Yes, it was in a previous version of the patch. The reason reference_tsc_valid was added was so that time_now() no longer needs to check any contents of the guest page.

> 
> >> > +            return;
> >> > +        }
> >> > +    }
> >> > +    ASSERT(expiration - now > 0);
> >> > +
> >> > +    vs->expiration = expiration;
> >> > +    timeout = (expiration - now) * 100ull;
> >> > +
> >> > +    vs->started = true;
> >> > +    migrate_timer(&vs->timer, smp_processor_id());
> >>
> >> Why is this smp_processor_id() when viridian_time_vcpu_init() uses
> >> v->processor? How relevant is it in the first place to trace the pCPU
> >> the vCPU runs on for the timer?
> >
> > I was just following suit with other timer code. It seems to be the norm to
> > migrate to the current pCPU just prior to starting a timer.
> 
> But wouldn't v->processor then be more visibly correct (besides
> likely being cheaper to get at), as to the correlation to the vCPU
> in question? I can't actually see why "migrate to the current pCPU"
> would be the norm; I could only see this as an implication from
> that other code you looked at simply acting on the current vCPU.
> 
> Then again I'm having trouble spotting why it would be important
> for the timer to run on the same CPU the vCPU runs one. By the
> time the timer fires, the vCPU may have gone elsewhere.
> 

I have no problem dropping the migrate call. As I said, I was following prior example e.g. in the VCPUOP_set_singleshot_timer handler and in vcpu_periodic_timer_work(), which do indeed run on current... but then so will start_timer() in the vast majority of invocations (the invocation in viridian_time_vcpu_thaw() being the exception). I'm happy for you to swap it for v->processor on commit unless you want me to send a v9 with the change?

  Paul

> Jan
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel
Paul Durrant March 18, 2019, 3:46 p.m. UTC | #5
> -----Original Message-----
[snip]
> > > +    {
> > > +        expiration = vs->count;
> > > +        if ( expiration - now <= 0 )
> > > +        {
> > > +            vs->expiration = expiration;
> > > +            stimer_expire(vs);
> >
> > Aren't you introducing a risk for races by calling the timer function
> > directly from here? start_timer(), after all, gets called from quite a
> > few places.
> >
> 
> In practice I don't think there should be any problematic race, but I'll check again.
> 

I think the 'periodic' name might be confusing things... The Xen timers are all single-shot, it's just that start_stimer() is re-called after a successful poll if the viridian timer is configured to be periodic. So I don't think there is case where the underlying Xen timer could actually be running when we enter start_stimer().

  Paul
Jan Beulich March 18, 2019, 4:15 p.m. UTC | #6
>>> On 18.03.19 at 16:36, <Paul.Durrant@citrix.com> wrote:
>> From: Xen-devel [mailto:xen-devel-bounces@lists.xenproject.org] On Behalf Of Jan Beulich
>> Sent: 18 March 2019 15:21
>> 
>> >>> On 18.03.19 at 15:37, <Paul.Durrant@citrix.com> wrote:
>> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> Sent: 18 March 2019 14:24
>> >>
>> >> >>> On 18.03.19 at 12:20, <paul.durrant@citrix.com> wrote:
>> >> > +    ASSERT(expiration - now > 0);
>> >> > +
>> >> > +    vs->expiration = expiration;
>> >> > +    timeout = (expiration - now) * 100ull;
>> >> > +
>> >> > +    vs->started = true;
>> >> > +    migrate_timer(&vs->timer, smp_processor_id());
>> >>
>> >> Why is this smp_processor_id() when viridian_time_vcpu_init() uses
>> >> v->processor? How relevant is it in the first place to trace the pCPU
>> >> the vCPU runs on for the timer?
>> >
>> > I was just following suit with other timer code. It seems to be the norm to
>> > migrate to the current pCPU just prior to starting a timer.
>> 
>> But wouldn't v->processor then be more visibly correct (besides
>> likely being cheaper to get at), as to the correlation to the vCPU
>> in question? I can't actually see why "migrate to the current pCPU"
>> would be the norm; I could only see this as an implication from
>> that other code you looked at simply acting on the current vCPU.
>> 
>> Then again I'm having trouble spotting why it would be important
>> for the timer to run on the same CPU the vCPU runs one. By the
>> time the timer fires, the vCPU may have gone elsewhere.
>> 
> 
> I have no problem dropping the migrate call. As I said, I was following 
> prior example e.g. in the VCPUOP_set_singleshot_timer handler and in 
> vcpu_periodic_timer_work(), which do indeed run on current... but then so 
> will start_timer() in the vast majority of invocations (the invocation in 
> viridian_time_vcpu_thaw() being the exception). I'm happy for you to swap it 
> for v->processor on commit unless you want me to send a v9 with the change?

No need to send v9 just for this.

Jan
Jan Beulich March 18, 2019, 4:22 p.m. UTC | #7
>>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
>> > > +    {
>> > > +        expiration = vs->count;
>> > > +        if ( expiration - now <= 0 )
>> > > +        {
>> > > +            vs->expiration = expiration;
>> > > +            stimer_expire(vs);
>> >
>> > Aren't you introducing a risk for races by calling the timer function
>> > directly from here? start_timer(), after all, gets called from quite a
>> > few places.
>> 
>> In practice I don't think there should be any problematic race, but I'll check again.
> 
> I think the 'periodic' name might be confusing things... The Xen timers are 
> all single-shot, it's just that start_stimer() is re-called after a 
> successful poll if the viridian timer is configured to be periodic. So I 
> don't think there is case where the underlying Xen timer could actually be 
> running when we enter start_stimer().

One of the callers of the function is the WRMSR handler. Why would
it be guaranteed that the timer isn't active when such a WRMSR
occurs? Of course, this alone is not enough for there to be a problem,
as we're fine as long as the guest can only harm itself. But I don't
think it goes without saying that there's no issue here; having looked
a 2nd time just now, I think I agree though that there's no risk for our
own health here.

Jan
Paul Durrant March 18, 2019, 4:26 p.m. UTC | #8
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 18 March 2019 16:23
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; George Dunlap
> <George.Dunlap@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>;
> xen-devel <xen-devel@lists.xenproject.org>; Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>; Tim
> (Xen.org) <tim@xen.org>
> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of synthetic timers
> 
> >>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
> >> > > +    {
> >> > > +        expiration = vs->count;
> >> > > +        if ( expiration - now <= 0 )
> >> > > +        {
> >> > > +            vs->expiration = expiration;
> >> > > +            stimer_expire(vs);
> >> >
> >> > Aren't you introducing a risk for races by calling the timer function
> >> > directly from here? start_timer(), after all, gets called from quite a
> >> > few places.
> >>
> >> In practice I don't think there should be any problematic race, but I'll check again.
> >
> > I think the 'periodic' name might be confusing things... The Xen timers are
> > all single-shot, it's just that start_stimer() is re-called after a
> > successful poll if the viridian timer is configured to be periodic. So I
> > don't think there is case where the underlying Xen timer could actually be
> > running when we enter start_stimer().
> 
> One of the callers of the function is the WRMSR handler. Why would
> it be guaranteed that the timer isn't active when such a WRMSR
> occurs?

It's not guaranteed on entry, but the WRMSR handler always calls stop_stimer() before calling start_stimer() which AFAICT should guarantee the timer is not running when start_stimer() is called.

  Paul

> Of course, this alone is not enough for there to be a problem,
> as we're fine as long as the guest can only harm itself. But I don't
> think it goes without saying that there's no issue here; having looked
> a 2nd time just now, I think I agree though that there's no risk for our
> own health here.
> 
> Jan
>
Jan Beulich March 18, 2019, 4:55 p.m. UTC | #9
>>> On 18.03.19 at 17:26, <Paul.Durrant@citrix.com> wrote:
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 18 March 2019 16:23
>> 
>> >>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
>> >> > > +    {
>> >> > > +        expiration = vs->count;
>> >> > > +        if ( expiration - now <= 0 )
>> >> > > +        {
>> >> > > +            vs->expiration = expiration;
>> >> > > +            stimer_expire(vs);
>> >> >
>> >> > Aren't you introducing a risk for races by calling the timer function
>> >> > directly from here? start_timer(), after all, gets called from quite a
>> >> > few places.
>> >>
>> >> In practice I don't think there should be any problematic race, but I'll check again.
>> >
>> > I think the 'periodic' name might be confusing things... The Xen timers are
>> > all single-shot, it's just that start_stimer() is re-called after a
>> > successful poll if the viridian timer is configured to be periodic. So I
>> > don't think there is case where the underlying Xen timer could actually be
>> > running when we enter start_stimer().
>> 
>> One of the callers of the function is the WRMSR handler. Why would
>> it be guaranteed that the timer isn't active when such a WRMSR
>> occurs?
> 
> It's not guaranteed on entry, but the WRMSR handler always calls 
> stop_stimer() before calling start_stimer() which AFAICT should guarantee the 
> timer is not running when start_stimer() is called.

I've looked only briefly, but the stop_timer() -> deactivate_timer() call
chain doesn't look to wait for the timer handler to not be active anymore
on another CPU before returning.

Jan
Paul Durrant March 18, 2019, 5:06 p.m. UTC | #10
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 18 March 2019 16:56
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; George Dunlap
> <George.Dunlap@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>;
> xen-devel <xen-devel@lists.xenproject.org>; Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>; Tim
> (Xen.org) <tim@xen.org>
> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of synthetic timers
> 
> >>> On 18.03.19 at 17:26, <Paul.Durrant@citrix.com> wrote:
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 18 March 2019 16:23
> >>
> >> >>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
> >> >> > > +    {
> >> >> > > +        expiration = vs->count;
> >> >> > > +        if ( expiration - now <= 0 )
> >> >> > > +        {
> >> >> > > +            vs->expiration = expiration;
> >> >> > > +            stimer_expire(vs);
> >> >> >
> >> >> > Aren't you introducing a risk for races by calling the timer function
> >> >> > directly from here? start_timer(), after all, gets called from quite a
> >> >> > few places.
> >> >>
> >> >> In practice I don't think there should be any problematic race, but I'll check again.
> >> >
> >> > I think the 'periodic' name might be confusing things... The Xen timers are
> >> > all single-shot, it's just that start_stimer() is re-called after a
> >> > successful poll if the viridian timer is configured to be periodic. So I
> >> > don't think there is case where the underlying Xen timer could actually be
> >> > running when we enter start_stimer().
> >>
> >> One of the callers of the function is the WRMSR handler. Why would
> >> it be guaranteed that the timer isn't active when such a WRMSR
> >> occurs?
> >
> > It's not guaranteed on entry, but the WRMSR handler always calls
> > stop_stimer() before calling start_stimer() which AFAICT should guarantee the
> > timer is not running when start_stimer() is called.
> 
> I've looked only briefly, but the stop_timer() -> deactivate_timer() call
> chain doesn't look to wait for the timer handler to not be active anymore
> on another CPU before returning.

Oh, it looked to me like the locking would ensure the timer was deactivated and would not run... but I guess I may have misunderstood the code. Still even if both occurrences make it past the test of config.enabled all they'll do is both set the pending bit, as the prior version of the patch did. Although I guess there's now a possibility that, for one occurrence, the poll could occur before config.enabled is cleared and thus the timer may be rescheduled and immediately expire again. I'm not sure whether this is really a problem or not.

  Paul

> 
> Jan
>
Jan Beulich March 19, 2019, 8:03 a.m. UTC | #11
>>> On 18.03.19 at 18:06, <Paul.Durrant@citrix.com> wrote:
>>  -----Original Message-----
>> From: Jan Beulich [mailto:JBeulich@suse.com]
>> Sent: 18 March 2019 16:56
>> To: Paul Durrant <Paul.Durrant@citrix.com>
>> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper 
><Andrew.Cooper3@citrix.com>; George Dunlap
>> <George.Dunlap@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau 
> Monne
>> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; Stefano Stabellini 
> <sstabellini@kernel.org>;
>> xen-devel <xen-devel@lists.xenproject.org>; Konrad Rzeszutek Wilk 
> <konrad.wilk@oracle.com>; Tim
>> (Xen.org) <tim@xen.org>
>> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of 
> synthetic timers
>> 
>> >>> On 18.03.19 at 17:26, <Paul.Durrant@citrix.com> wrote:
>> >> From: Jan Beulich [mailto:JBeulich@suse.com]
>> >> Sent: 18 March 2019 16:23
>> >>
>> >> >>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
>> >> >> > > +    {
>> >> >> > > +        expiration = vs->count;
>> >> >> > > +        if ( expiration - now <= 0 )
>> >> >> > > +        {
>> >> >> > > +            vs->expiration = expiration;
>> >> >> > > +            stimer_expire(vs);
>> >> >> >
>> >> >> > Aren't you introducing a risk for races by calling the timer function
>> >> >> > directly from here? start_timer(), after all, gets called from quite a
>> >> >> > few places.
>> >> >>
>> >> >> In practice I don't think there should be any problematic race, but I'll 
> check again.
>> >> >
>> >> > I think the 'periodic' name might be confusing things... The Xen timers 
> are
>> >> > all single-shot, it's just that start_stimer() is re-called after a
>> >> > successful poll if the viridian timer is configured to be periodic. So I
>> >> > don't think there is case where the underlying Xen timer could actually be
>> >> > running when we enter start_stimer().
>> >>
>> >> One of the callers of the function is the WRMSR handler. Why would
>> >> it be guaranteed that the timer isn't active when such a WRMSR
>> >> occurs?
>> >
>> > It's not guaranteed on entry, but the WRMSR handler always calls
>> > stop_stimer() before calling start_stimer() which AFAICT should guarantee 
> the
>> > timer is not running when start_stimer() is called.
>> 
>> I've looked only briefly, but the stop_timer() -> deactivate_timer() call
>> chain doesn't look to wait for the timer handler to not be active anymore
>> on another CPU before returning.
> 
> Oh, it looked to me like the locking would ensure the timer was deactivated 
> and would not run... but I guess I may have misunderstood the code.

The lock gets dropped around the call to the handler (in execute_timer()).

> Still 
> even if both occurrences make it past the test of config.enabled all they'll 
> do is both set the pending bit, as the prior version of the patch did. 
> Although I guess there's now a possibility that, for one occurrence, the poll 
> could occur before config.enabled is cleared and thus the timer may be 
> rescheduled and immediately expire again.

So perhaps config.enabled should get cleared before setting the bit
in stimer_pending? Would seem to also be better for this to happen
before vcpu_kick(), wouldn't it? (Moving the two lines up would again
be easy enough to do while committing, so long as you agree.)

> I'm not sure whether this is really a problem or not.

Neither am I.

Jan
Paul Durrant March 19, 2019, 8:31 a.m. UTC | #12
> -----Original Message-----
> From: Jan Beulich [mailto:JBeulich@suse.com]
> Sent: 19 March 2019 08:03
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper <Andrew.Cooper3@citrix.com>; George Dunlap
> <George.Dunlap@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau Monne
> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>;
> xen-devel <xen-devel@lists.xenproject.org>; Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>; Tim
> (Xen.org) <tim@xen.org>
> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of synthetic timers
> 
> >>> On 18.03.19 at 18:06, <Paul.Durrant@citrix.com> wrote:
> >>  -----Original Message-----
> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> Sent: 18 March 2019 16:56
> >> To: Paul Durrant <Paul.Durrant@citrix.com>
> >> Cc: Julien Grall <julien.grall@arm.com>; Andrew Cooper
> ><Andrew.Cooper3@citrix.com>; George Dunlap
> >> <George.Dunlap@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; Roger Pau
> > Monne
> >> <roger.pau@citrix.com>; Wei Liu <wei.liu2@citrix.com>; Stefano Stabellini
> > <sstabellini@kernel.org>;
> >> xen-devel <xen-devel@lists.xenproject.org>; Konrad Rzeszutek Wilk
> > <konrad.wilk@oracle.com>; Tim
> >> (Xen.org) <tim@xen.org>
> >> Subject: Re: [Xen-devel] [PATCH v8 10/11] viridian: add implementation of
> > synthetic timers
> >>
> >> >>> On 18.03.19 at 17:26, <Paul.Durrant@citrix.com> wrote:
> >> >> From: Jan Beulich [mailto:JBeulich@suse.com]
> >> >> Sent: 18 March 2019 16:23
> >> >>
> >> >> >>> On 18.03.19 at 16:46, <Paul.Durrant@citrix.com> wrote:
> >> >> >> > > +    {
> >> >> >> > > +        expiration = vs->count;
> >> >> >> > > +        if ( expiration - now <= 0 )
> >> >> >> > > +        {
> >> >> >> > > +            vs->expiration = expiration;
> >> >> >> > > +            stimer_expire(vs);
> >> >> >> >
> >> >> >> > Aren't you introducing a risk for races by calling the timer function
> >> >> >> > directly from here? start_timer(), after all, gets called from quite a
> >> >> >> > few places.
> >> >> >>
> >> >> >> In practice I don't think there should be any problematic race, but I'll
> > check again.
> >> >> >
> >> >> > I think the 'periodic' name might be confusing things... The Xen timers
> > are
> >> >> > all single-shot, it's just that start_stimer() is re-called after a
> >> >> > successful poll if the viridian timer is configured to be periodic. So I
> >> >> > don't think there is case where the underlying Xen timer could actually be
> >> >> > running when we enter start_stimer().
> >> >>
> >> >> One of the callers of the function is the WRMSR handler. Why would
> >> >> it be guaranteed that the timer isn't active when such a WRMSR
> >> >> occurs?
> >> >
> >> > It's not guaranteed on entry, but the WRMSR handler always calls
> >> > stop_stimer() before calling start_stimer() which AFAICT should guarantee
> > the
> >> > timer is not running when start_stimer() is called.
> >>
> >> I've looked only briefly, but the stop_timer() -> deactivate_timer() call
> >> chain doesn't look to wait for the timer handler to not be active anymore
> >> on another CPU before returning.
> >
> > Oh, it looked to me like the locking would ensure the timer was deactivated
> > and would not run... but I guess I may have misunderstood the code.
> 
> The lock gets dropped around the call to the handler (in execute_timer()).
> 
> > Still
> > even if both occurrences make it past the test of config.enabled all they'll
> > do is both set the pending bit, as the prior version of the patch did.
> > Although I guess there's now a possibility that, for one occurrence, the poll
> > could occur before config.enabled is cleared and thus the timer may be
> > rescheduled and immediately expire again.
> 
> So perhaps config.enabled should get cleared before setting the bit
> in stimer_pending? Would seem to also be better for this to happen
> before vcpu_kick(), wouldn't it? (Moving the two lines up would again
> be easy enough to do while committing, so long as you agree.)
> 
> > I'm not sure whether this is really a problem or not.
> 
> Neither am I.

Ok. To err on the safe side it is probably best to only access the timer config from current context and have timer expiration simply set the pending bit and kick the vcpu. I'll send a v9.

  Paul

> 
> Jan
>
diff mbox series

Patch

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index ad81af1ed8..355c654693 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -2167,11 +2167,19 @@  This group incorporates the crash control MSRs. These enlightenments
 allow Windows to write crash information such that it can be logged
 by Xen.
 
+=item B<stimer>
+
+This set incorporates the SynIC and synthetic timer MSRs. Windows will
+use synthetic timers in preference to emulated HPET for a source of
+ticks and hence enabling this group will ensure that ticks will be
+consistent with use of an enlightened time source (B<time_ref_count> or
+B<reference_tsc>).
+
 =item B<defaults>
 
 This is a special value that enables the default set of groups, which
-is currently the B<base>, B<freq>, B<time_ref_count>, B<apic_assist>
-and B<crash_ctl> groups.
+is currently the B<base>, B<freq>, B<time_ref_count>, B<apic_assist>,
+B<crash_ctl> and B<stimer> groups.
 
 =item B<all>
 
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index a923a380d3..c8f219b0d3 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -324,6 +324,12 @@ 
  */
 #define LIBXL_HAVE_VIRIDIAN_SYNIC 1
 
+/*
+ * LIBXL_HAVE_VIRIDIAN_STIMER indicates that the 'stimer' value
+ * is present in the viridian enlightenment enumeration.
+ */
+#define LIBXL_HAVE_VIRIDIAN_STIMER 1
+
 /*
  * LIBXL_HAVE_BUILDINFO_HVM_ACPI_LAPTOP_SLATE indicates that
  * libxl_domain_build_info has the u.hvm.acpi_laptop_slate field.
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index fb758d2ac3..2ee0f82ee7 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -269,6 +269,7 @@  static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
+        libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER);
     }
 
     libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
@@ -320,6 +321,9 @@  static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_SYNIC))
         mask |= HVMPV_synic;
 
+    if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER))
+        mask |= HVMPV_time_ref_count | HVMPV_synic | HVMPV_stimer;
+
     if (mask != 0 &&
         xc_hvm_param_set(CTX->xch,
                          domid,
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 9860bcaf5f..1cce249de4 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -236,6 +236,7 @@  libxl_viridian_enlightenment = Enumeration("viridian_enlightenment", [
     (5, "apic_assist"),
     (6, "crash_ctl"),
     (7, "synic"),
+    (8, "stimer"),
     ])
 
 libxl_hdtype = Enumeration("hdtype", [
diff --git a/xen/arch/x86/hvm/viridian/private.h b/xen/arch/x86/hvm/viridian/private.h
index 96a784b840..c272c34cda 100644
--- a/xen/arch/x86/hvm/viridian/private.h
+++ b/xen/arch/x86/hvm/viridian/private.h
@@ -74,6 +74,11 @@ 
 int viridian_synic_wrmsr(struct vcpu *v, uint32_t idx, uint64_t val);
 int viridian_synic_rdmsr(const struct vcpu *v, uint32_t idx, uint64_t *val);
 
+bool viridian_synic_deliver_timer_msg(struct vcpu *v, unsigned int sintx,
+                                      unsigned int index,
+                                      uint64_t expiration,
+                                      uint64_t delivery);
+
 int viridian_synic_vcpu_init(const struct vcpu *v);
 int viridian_synic_domain_init(const struct domain *d);
 
@@ -93,7 +98,9 @@  void viridian_synic_load_domain_ctxt(
 int viridian_time_wrmsr(struct vcpu *v, uint32_t idx, uint64_t val);
 int viridian_time_rdmsr(const struct vcpu *v, uint32_t idx, uint64_t *val);
 
-int viridian_time_vcpu_init(const struct vcpu *v);
+void viridian_time_poll_timers(struct vcpu *v);
+
+int viridian_time_vcpu_init(struct vcpu *v);
 int viridian_time_domain_init(const struct domain *d);
 
 void viridian_time_vcpu_deinit(const struct vcpu *v);
diff --git a/xen/arch/x86/hvm/viridian/synic.c b/xen/arch/x86/hvm/viridian/synic.c
index 84ab02694f..2791021bcc 100644
--- a/xen/arch/x86/hvm/viridian/synic.c
+++ b/xen/arch/x86/hvm/viridian/synic.c
@@ -346,9 +346,60 @@  void viridian_synic_domain_deinit(const struct domain *d)
 {
 }
 
-void viridian_synic_poll(const struct vcpu *v)
+void viridian_synic_poll(struct vcpu *v)
 {
-    /* There are currently no message sources */
+    viridian_time_poll_timers(v);
+}
+
+bool viridian_synic_deliver_timer_msg(struct vcpu *v, unsigned int sintx,
+                                      unsigned int index,
+                                      uint64_t expiration,
+                                      uint64_t delivery)
+{
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    const union viridian_sint_msr *vs = &vv->sint[sintx];
+    HV_MESSAGE *msg = vv->simp.ptr;
+    struct {
+        uint32_t TimerIndex;
+        uint32_t Reserved;
+        uint64_t ExpirationTime;
+        uint64_t DeliveryTime;
+    } payload = {
+        .TimerIndex = index,
+        .ExpirationTime = expiration,
+        .DeliveryTime = delivery,
+    };
+
+    if ( test_bit(sintx, &vv->msg_pending) )
+        return false;
+
+    /*
+     * To avoid using an atomic test-and-set, and barrier before calling
+     * vlapic_set_irq(), this function must be called in context of the
+     * vcpu receiving the message.
+     */
+    ASSERT(v == current);
+
+    msg += sintx;
+
+    if ( msg->Header.MessageType != HvMessageTypeNone )
+    {
+        msg->Header.MessageFlags.MessagePending = 1;
+        __set_bit(sintx, &vv->msg_pending);
+        return false;
+    }
+
+    msg->Header.MessageType = HvMessageTimerExpired;
+    msg->Header.MessageFlags.MessagePending = 0;
+    msg->Header.PayloadSize = sizeof(payload);
+
+    BUILD_BUG_ON(sizeof(payload) > sizeof(msg->Payload));
+    memcpy(msg->Payload, &payload, sizeof(payload));
+
+    if ( !vs->mask )
+        vlapic_set_irq(vcpu_vlapic(v), vs->vector, 0);
+
+    return true;
 }
 
 bool viridian_synic_is_auto_eoi_sint(const struct vcpu *v,
diff --git a/xen/arch/x86/hvm/viridian/time.c b/xen/arch/x86/hvm/viridian/time.c
index 71291d921c..692f014fc4 100644
--- a/xen/arch/x86/hvm/viridian/time.c
+++ b/xen/arch/x86/hvm/viridian/time.c
@@ -12,6 +12,7 @@ 
 #include <xen/version.h>
 
 #include <asm/apic.h>
+#include <asm/event.h>
 #include <asm/hvm/support.h>
 
 #include "private.h"
@@ -27,8 +28,10 @@  typedef struct _HV_REFERENCE_TSC_PAGE
 
 static void update_reference_tsc(struct domain *d, bool initialize)
 {
-    const struct viridian_page *rt = &d->arch.hvm.viridian->reference_tsc;
+    struct viridian_domain *vd = d->arch.hvm.viridian;
+    const struct viridian_page *rt = &vd->reference_tsc;
     HV_REFERENCE_TSC_PAGE *p = rt->ptr;
+    uint32_t seq;
 
     if ( initialize )
         clear_page(p);
@@ -59,6 +62,8 @@  static void update_reference_tsc(struct domain *d, bool initialize)
 
         printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
                d->domain_id);
+
+        vd->reference_tsc_valid = false;
         return;
     }
 
@@ -72,11 +77,14 @@  static void update_reference_tsc(struct domain *d, bool initialize)
      * ticks per 100ns shifted left by 64.
      */
     p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
+    smp_wmb();
+
+    seq = p->TscSequence + 1;
+    if ( seq == 0xFFFFFFFF || seq == 0 ) /* Avoid both 'invalid' values */
+        seq = 1;
 
-    p->TscSequence++;
-    if ( p->TscSequence == 0xFFFFFFFF ||
-         p->TscSequence == 0 ) /* Avoid both 'invalid' values */
-        p->TscSequence = 1;
+    p->TscSequence = seq;
+    vd->reference_tsc_valid = true;
 }
 
 static int64_t raw_trc_val(const struct domain *d)
@@ -118,18 +126,250 @@  static int64_t time_ref_count(const struct domain *d)
     return raw_trc_val(d) + trc->off;
 }
 
+/*
+ * The specification says: "The partition reference time is computed
+ * by the following formula:
+ *
+ * ReferenceTime = ((VirtualTsc * TscScale) >> 64) + TscOffset
+ *
+ * The multiplication is a 64 bit multiplication, which results in a
+ * 128 bit number which is then shifted 64 times to the right to obtain
+ * the high 64 bits."
+ */
+static uint64_t scale_tsc(uint64_t tsc, uint64_t scale, uint64_t offset)
+{
+    uint64_t result;
+
+    /*
+     * Quadword MUL takes an implicit operand in RAX, and puts the result
+     * in RDX:RAX. Because we only want the result of the multiplication
+     * after shifting right by 64 bits, we therefore only need the content
+     * of RDX.
+     */
+    asm ( "mulq %[scale]"
+          : "+a" (tsc), "=d" (result)
+          : [scale] "rm" (scale) );
+
+    return result + offset;
+}
+
+static uint64_t time_now(struct domain *d)
+{
+    uint64_t tsc, scale;
+
+    /*
+     * If the reference TSC page is not enabled, or has been invalidated
+     * fall back to the partition reference counter.
+     */
+    if ( !d->arch.hvm.viridian->reference_tsc_valid )
+        return time_ref_count(d);
+
+    /* Otherwise compute reference time in the same way the guest would */
+    tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
+    scale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
+
+    return scale_tsc(tsc, scale, 0);
+}
+
+static void stop_stimer(struct viridian_stimer *vs)
+{
+    const struct vcpu *v = vs->v;
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int stimerx = vs - &vv->stimer[0];
+
+    if ( !vs->started )
+        return;
+
+    stop_timer(&vs->timer);
+    clear_bit(stimerx, &vv->stimer_pending);
+    vs->started = false;
+}
+
+static void stimer_expire(void *data)
+{
+    struct viridian_stimer *vs = data;
+    struct vcpu *v = vs->v;
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int stimerx = vs - &vv->stimer[0];
+
+    if ( !vs->config.enabled )
+        return;
+
+    set_bit(stimerx, &vv->stimer_pending);
+    vcpu_kick(v);
+
+    if ( !vs->config.periodic )
+        vs->config.enabled = 0;
+}
+
+static void start_stimer(struct viridian_stimer *vs)
+{
+    const struct vcpu *v = vs->v;
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int stimerx = vs - &vv->stimer[0];
+    int64_t now = time_now(v->domain);
+    int64_t expiration;
+    s_time_t timeout;
+
+    if ( !test_and_set_bit(stimerx, &vv->stimer_enabled) )
+        printk(XENLOG_G_INFO "%pv: VIRIDIAN STIMER%u: enabled\n", v,
+               stimerx);
+
+    if ( vs->config.periodic )
+    {
+        /*
+         * The specification says that if the timer is lazy then we
+         * skip over any missed expirations so we can treat this case
+         * as the same as if the timer is currently stopped, i.e. we
+         * just schedule expiration to be 'count' ticks from now.
+         */
+        if ( !vs->started || vs->config.lazy )
+            expiration = now + vs->count;
+        else
+        {
+            unsigned int missed = 0;
+
+            /*
+             * The timer is already started, so we're re-scheduling.
+             * Hence advance the timer expiration by one tick.
+             */
+            expiration = vs->expiration + vs->count;
+
+            /* Now check to see if any expirations have been missed */
+            if ( expiration - now <= 0 )
+                missed = ((now - expiration) / vs->count) + 1;
+
+            /*
+             * The specification says that if the timer is not lazy then
+             * a non-zero missed count should be used to reduce the period
+             * of the timer until it catches up, unless the count has
+             * reached a 'significant number', in which case the timer
+             * should be treated as lazy. Unfortunately the specification
+             * does not state what that number is so the choice of number
+             * here is a pure guess.
+             */
+            if ( missed > 3 )
+                expiration = now + vs->count;
+            else if ( missed )
+                expiration = now + (vs->count / missed);
+        }
+    }
+    else
+    {
+        expiration = vs->count;
+        if ( expiration - now <= 0 )
+        {
+            vs->expiration = expiration;
+            stimer_expire(vs);
+            return;
+        }
+    }
+    ASSERT(expiration - now > 0);
+
+    vs->expiration = expiration;
+    timeout = (expiration - now) * 100ull;
+
+    vs->started = true;
+    migrate_timer(&vs->timer, smp_processor_id());
+    set_timer(&vs->timer, timeout + NOW());
+}
+
+static void poll_stimer(struct vcpu *v, unsigned int stimerx)
+{
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    struct viridian_stimer *vs = &vv->stimer[stimerx];
+
+    if ( !test_bit(stimerx, &vv->stimer_pending) )
+        return;
+
+    if ( !viridian_synic_deliver_timer_msg(v, vs->config.sintx,
+                                           stimerx, vs->expiration,
+                                           time_now(v->domain)) )
+        return;
+
+    clear_bit(stimerx, &vv->stimer_pending);
+
+    if ( vs->config.enabled )
+        start_stimer(vs);
+}
+
+void viridian_time_poll_timers(struct vcpu *v)
+{
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    if ( !vv->stimer_pending )
+       return;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+        poll_stimer(v, i);
+}
+
+void viridian_time_vcpu_freeze(struct vcpu *v)
+{
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    if ( !is_viridian_vcpu(v) ||
+         !(viridian_feature_mask(v->domain) & HVMPV_stimer) )
+        return;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        struct viridian_stimer *vs = &vv->stimer[i];
+
+        if ( vs->started )
+            stop_timer(&vs->timer);
+    }
+}
+
+void viridian_time_vcpu_thaw(struct vcpu *v)
+{
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    if ( !is_viridian_vcpu(v) ||
+         !(viridian_feature_mask(v->domain) & HVMPV_stimer) )
+        return;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        struct viridian_stimer *vs = &vv->stimer[i];
+
+        if ( vs->config.enabled )
+            start_stimer(vs);
+    }
+}
+
 void viridian_time_domain_freeze(const struct domain *d)
 {
+    struct vcpu *v;
+
+    if ( !is_viridian_domain(d) )
+        return;
+
+    for_each_vcpu ( d, v )
+        viridian_time_vcpu_freeze(v);
+
     time_ref_count_freeze(d);
 }
 
 void viridian_time_domain_thaw(const struct domain *d)
 {
+    struct vcpu *v;
+
+    if ( !is_viridian_domain(d) )
+        return;
+
     time_ref_count_thaw(d);
+
+    for_each_vcpu ( d, v )
+        viridian_time_vcpu_thaw(v);
 }
 
 int viridian_time_wrmsr(struct vcpu *v, uint32_t idx, uint64_t val)
 {
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
     struct domain *d = v->domain;
     struct viridian_domain *vd = d->arch.hvm.viridian;
 
@@ -149,6 +389,61 @@  int viridian_time_wrmsr(struct vcpu *v, uint32_t idx, uint64_t val)
         }
         break;
 
+    case HV_X64_MSR_TIME_REF_COUNT:
+        return X86EMUL_EXCEPTION;
+
+    case HV_X64_MSR_STIMER0_CONFIG:
+    case HV_X64_MSR_STIMER1_CONFIG:
+    case HV_X64_MSR_STIMER2_CONFIG:
+    case HV_X64_MSR_STIMER3_CONFIG:
+    {
+        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
+        struct viridian_stimer *vs =
+            &array_access_nospec(vv->stimer, stimerx);
+
+        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
+            return X86EMUL_EXCEPTION;
+
+        stop_stimer(vs);
+
+        vs->config.raw = val;
+
+        if ( !vs->config.sintx )
+            vs->config.enabled = 0;
+
+        if ( vs->config.enabled )
+            start_stimer(vs);
+
+        break;
+    }
+
+    case HV_X64_MSR_STIMER0_COUNT:
+    case HV_X64_MSR_STIMER1_COUNT:
+    case HV_X64_MSR_STIMER2_COUNT:
+    case HV_X64_MSR_STIMER3_COUNT:
+    {
+        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
+        struct viridian_stimer *vs =
+            &array_access_nospec(vv->stimer, stimerx);
+
+        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
+            return X86EMUL_EXCEPTION;
+
+        stop_stimer(vs);
+
+        vs->count = val;
+
+        if ( !vs->count  )
+            vs->config.enabled = 0;
+        else if ( vs->config.auto_enable )
+            vs->config.enabled = 1;
+
+        if ( vs->config.enabled )
+            start_stimer(vs);
+
+        break;
+    }
+
     default:
         gdprintk(XENLOG_INFO, "%s: unimplemented MSR %#x (%016"PRIx64")\n",
                  __func__, idx, val);
@@ -160,6 +455,7 @@  int viridian_time_wrmsr(struct vcpu *v, uint32_t idx, uint64_t val)
 
 int viridian_time_rdmsr(const struct vcpu *v, uint32_t idx, uint64_t *val)
 {
+    const struct viridian_vcpu *vv = v->arch.hvm.viridian;
     const struct domain *d = v->domain;
     struct viridian_domain *vd = d->arch.hvm.viridian;
 
@@ -201,6 +497,38 @@  int viridian_time_rdmsr(const struct vcpu *v, uint32_t idx, uint64_t *val)
         break;
     }
 
+    case HV_X64_MSR_STIMER0_CONFIG:
+    case HV_X64_MSR_STIMER1_CONFIG:
+    case HV_X64_MSR_STIMER2_CONFIG:
+    case HV_X64_MSR_STIMER3_CONFIG:
+    {
+        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
+        const struct viridian_stimer *vs =
+            &array_access_nospec(vv->stimer, stimerx);
+
+        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
+            return X86EMUL_EXCEPTION;
+
+        *val = vs->config.raw;
+        break;
+    }
+
+    case HV_X64_MSR_STIMER0_COUNT:
+    case HV_X64_MSR_STIMER1_COUNT:
+    case HV_X64_MSR_STIMER2_COUNT:
+    case HV_X64_MSR_STIMER3_COUNT:
+    {
+        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
+        const struct viridian_stimer *vs =
+            &array_access_nospec(vv->stimer, stimerx);
+
+        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
+            return X86EMUL_EXCEPTION;
+
+        *val = vs->count;
+        break;
+    }
+
     default:
         gdprintk(XENLOG_INFO, "%s: unimplemented MSR %#x\n", __func__, idx);
         return X86EMUL_EXCEPTION;
@@ -209,8 +537,19 @@  int viridian_time_rdmsr(const struct vcpu *v, uint32_t idx, uint64_t *val)
     return X86EMUL_OKAY;
 }
 
-int viridian_time_vcpu_init(const struct vcpu *v)
+int viridian_time_vcpu_init(struct vcpu *v)
 {
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        struct viridian_stimer *vs = &vv->stimer[i];
+
+        vs->v = v;
+        init_timer(&vs->timer, stimer_expire, vs, v->processor);
+    }
+
     return 0;
 }
 
@@ -221,6 +560,16 @@  int viridian_time_domain_init(const struct domain *d)
 
 void viridian_time_vcpu_deinit(const struct vcpu *v)
 {
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        struct viridian_stimer *vs = &vv->stimer[i];
+
+        kill_timer(&vs->timer);
+        vs->v = NULL;
+    }
 }
 
 void viridian_time_domain_deinit(const struct domain *d)
@@ -231,11 +580,36 @@  void viridian_time_domain_deinit(const struct domain *d)
 void viridian_time_save_vcpu_ctxt(
     const struct vcpu *v, struct hvm_viridian_vcpu_context *ctxt)
 {
+    const struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    BUILD_BUG_ON(ARRAY_SIZE(vv->stimer) !=
+                 ARRAY_SIZE(ctxt->stimer_config_msr));
+    BUILD_BUG_ON(ARRAY_SIZE(vv->stimer) !=
+                 ARRAY_SIZE(ctxt->stimer_count_msr));
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        const struct viridian_stimer *vs = &vv->stimer[i];
+
+        ctxt->stimer_config_msr[i] = vs->config.raw;
+        ctxt->stimer_count_msr[i] = vs->count;
+    }
 }
 
 void viridian_time_load_vcpu_ctxt(
     struct vcpu *v, const struct hvm_viridian_vcpu_context *ctxt)
 {
+    struct viridian_vcpu *vv = v->arch.hvm.viridian;
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(vv->stimer); i++ )
+    {
+        struct viridian_stimer *vs = &vv->stimer[i];
+
+        vs->config.raw = ctxt->stimer_config_msr[i];
+        vs->count = ctxt->stimer_count_msr[i];
+    }
 }
 
 void viridian_time_save_domain_ctxt(
diff --git a/xen/arch/x86/hvm/viridian/viridian.c b/xen/arch/x86/hvm/viridian/viridian.c
index f3166fbcd0..dce648bb4e 100644
--- a/xen/arch/x86/hvm/viridian/viridian.c
+++ b/xen/arch/x86/hvm/viridian/viridian.c
@@ -181,6 +181,8 @@  void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
             mask.AccessPartitionReferenceTsc = 1;
         if ( viridian_feature_mask(d) & HVMPV_synic )
             mask.AccessSynicRegs = 1;
+        if ( viridian_feature_mask(d) & HVMPV_stimer )
+            mask.AccessSyntheticTimerRegs = 1;
 
         u.mask = mask;
 
@@ -322,6 +324,8 @@  int guest_wrmsr_viridian(struct vcpu *v, uint32_t idx, uint64_t val)
     case HV_X64_MSR_TSC_FREQUENCY:
     case HV_X64_MSR_APIC_FREQUENCY:
     case HV_X64_MSR_REFERENCE_TSC:
+    case HV_X64_MSR_TIME_REF_COUNT:
+    case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
         return viridian_time_wrmsr(v, idx, val);
 
     case HV_X64_MSR_CRASH_P0:
@@ -403,6 +407,7 @@  int guest_rdmsr_viridian(const struct vcpu *v, uint32_t idx, uint64_t *val)
     case HV_X64_MSR_APIC_FREQUENCY:
     case HV_X64_MSR_REFERENCE_TSC:
     case HV_X64_MSR_TIME_REF_COUNT:
+    case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
         return viridian_time_rdmsr(v, idx, val);
 
     case HV_X64_MSR_CRASH_P0:
diff --git a/xen/include/asm-x86/hvm/viridian.h b/xen/include/asm-x86/hvm/viridian.h
index 03fc4c6b76..54e46cc4c4 100644
--- a/xen/include/asm-x86/hvm/viridian.h
+++ b/xen/include/asm-x86/hvm/viridian.h
@@ -40,6 +40,32 @@  union viridian_sint_msr
     };
 };
 
+union viridian_stimer_config_msr
+{
+    uint64_t raw;
+    struct
+    {
+        uint64_t enabled:1;
+        uint64_t periodic:1;
+        uint64_t lazy:1;
+        uint64_t auto_enable:1;
+        uint64_t vector:8;
+        uint64_t direct_mode:1;
+        uint64_t reserved_zero1:3;
+        uint64_t sintx:4;
+        uint64_t reserved_zero2:44;
+    };
+};
+
+struct viridian_stimer {
+    struct vcpu *v;
+    struct timer timer;
+    union viridian_stimer_config_msr config;
+    uint64_t count;
+    uint64_t expiration;
+    bool started;
+};
+
 struct viridian_vcpu
 {
     struct viridian_page vp_assist;
@@ -51,6 +77,9 @@  struct viridian_vcpu
     struct viridian_page simp;
     union viridian_sint_msr sint[16];
     uint8_t vector_to_sintx[256];
+    struct viridian_stimer stimer[4];
+    unsigned int stimer_enabled;
+    unsigned int stimer_pending;
     uint64_t crash_param[5];
 };
 
@@ -87,6 +116,7 @@  struct viridian_domain
     union viridian_page_msr hypercall_gpa;
     struct viridian_time_ref_count time_ref_count;
     struct viridian_page reference_tsc;
+    bool reference_tsc_valid;
 };
 
 void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
@@ -111,7 +141,7 @@  void viridian_apic_assist_set(const struct vcpu *v);
 bool viridian_apic_assist_completed(const struct vcpu *v);
 void viridian_apic_assist_clear(const struct vcpu *v);
 
-void viridian_synic_poll(const struct vcpu *v);
+void viridian_synic_poll(struct vcpu *v);
 bool viridian_synic_is_auto_eoi_sint(const struct vcpu *v,
                                      unsigned int vector);
 void viridian_synic_ack_sint(const struct vcpu *v, unsigned int vector);
diff --git a/xen/include/public/arch-x86/hvm/save.h b/xen/include/public/arch-x86/hvm/save.h
index ec3e4df12c..8344aa471f 100644
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -604,6 +604,8 @@  struct hvm_viridian_vcpu_context {
     uint8_t  _pad[7];
     uint64_t simp_msr;
     uint64_t sint_msr[16];
+    uint64_t stimer_config_msr[4];
+    uint64_t stimer_count_msr[4];
 };
 
 DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context);
diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h
index e7e3c7c892..e06b0942d0 100644
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -150,6 +150,10 @@ 
 #define _HVMPV_synic 7
 #define HVMPV_synic (1 << _HVMPV_synic)
 
+/* Enable STIMER MSRs */
+#define _HVMPV_stimer 8
+#define HVMPV_stimer (1 << _HVMPV_stimer)
+
 #define HVMPV_feature_mask \
         (HVMPV_base_freq | \
          HVMPV_no_freq | \
@@ -158,7 +162,8 @@ 
          HVMPV_hcall_remote_tlb_flush | \
          HVMPV_apic_assist | \
          HVMPV_crash_ctl | \
-         HVMPV_synic)
+         HVMPV_synic | \
+         HVMPV_stimer)
 
 #endif