diff mbox series

[v4,08/12] x86/vpt: switch interrupt injection model

Message ID 20210420140723.65321-9-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86/intr: introduce EOI callbacks and fix vPT | expand

Commit Message

Roger Pau Monné April 20, 2021, 2:07 p.m. UTC
Currently vPT relies on timers being assigned to a vCPU and performing
checks on every return to HVM guest in order to check if an interrupt
from a vPT timer assigned to the vCPU is currently being injected.

This model doesn't work properly since the interrupt destination vCPU
of a vPT timer can be different from the vCPU where the timer is
currently assigned, in which case the timer would get stuck because it
never sees the interrupt as being injected.

Knowing when a vPT interrupt is injected is relevant for the guest
timer modes where missed vPT interrupts are not discarded and instead
are accumulated and injected when possible.

This change aims to modify the logic described above, so that vPT
doesn't need to check on every return to HVM guest if a vPT interrupt
is being injected. In order to achieve this the vPT code is modified
to make use of the new EOI callbacks, so that virtual timers can
detect when a interrupt has been serviced by the guest by waiting for
the EOI callback to execute.

This model also simplifies some of the logic, as when executing the
timer EOI callback Xen can try to inject another interrupt if the
timer has interrupts pending for delivery.

Note that timers are still bound to a vCPU for the time being, this
relation however doesn't limit the interrupt destination anymore, and
will be removed by further patches.

This model has been tested with Windows 7 guests without showing any
timer delay, even when the guest was limited to have very little CPU
capacity and pending virtual timer interrupts accumulate.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v3:
 - Rename pt_irq_fired to irq_eoi and adjust the logic.
 - Initialize v and cb_priv in eoi_callback.

Changes since v2:
 - Avoid and explicit != NULL check.
 - Use a switch in inject_interrupt to evaluate the timer mode.
 - Print the pt->source field on error in create_periodic_time.

Changes since v1:
 - New in this version.
---
 xen/arch/x86/hvm/svm/intr.c   |   3 -
 xen/arch/x86/hvm/vmx/intr.c   |  59 ------
 xen/arch/x86/hvm/vpt.c        | 352 +++++++++++++++-------------------
 xen/include/asm-x86/hvm/vpt.h |   5 +-
 4 files changed, 157 insertions(+), 262 deletions(-)

Comments

Jan Beulich May 4, 2021, 11 a.m. UTC | #1
On 20.04.2021 16:07, Roger Pau Monne wrote:
> @@ -295,188 +248,153 @@ static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
>              list_del(&pt->list);
>          pt->on_list = false;
>          pt->pending_intr_nr = 0;
> +
> +        return;
>      }
> -    else if ( mode_is(v->domain, one_missed_tick_pending) ||
> -              mode_is(v->domain, no_missed_ticks_pending) )
> -    {
> -        pt->last_plt_gtime = hvm_get_guest_time(v);
> -        pt_process_missed_ticks(pt);
> -        pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
> -        set_timer(&pt->timer, pt->scheduled);
> -    }
> -    else
> +
> +    pt_process_missed_ticks(pt);
> +    /* 'collapse' missed ticks according to the selected mode. */
> +    switch ( pt->vcpu->domain->arch.hvm.params[HVM_PARAM_TIMER_MODE] )
>      {
> -        pt->last_plt_gtime += pt->period;
> -        if ( --pt->pending_intr_nr == 0 )
> -        {
> -            pt_process_missed_ticks(pt);
> -            if ( pt->pending_intr_nr == 0 )
> -                set_timer(&pt->timer, pt->scheduled);
> -        }
> +    case HVMPTM_one_missed_tick_pending:
> +        pt->pending_intr_nr = min(pt->pending_intr_nr, 1u);
> +        break;
> +
> +    case HVMPTM_no_missed_ticks_pending:
> +        pt->pending_intr_nr = 0;
> +        break;
>      }
>  
> -    if ( mode_is(v->domain, delay_for_missed_ticks) &&
> -         (hvm_get_guest_time(v) < pt->last_plt_gtime) )
> -        hvm_set_guest_time(v, pt->last_plt_gtime);
> +    if ( !pt->pending_intr_nr )
> +        set_timer(&pt->timer, pt->scheduled);
>  }
>  
> -int pt_update_irq(struct vcpu *v)
> +static void pt_timer_fn(void *data)
>  {
> -    struct list_head *head = &v->arch.hvm.tm_list;
> -    struct periodic_time *pt, *temp, *earliest_pt;
> -    uint64_t max_lag;
> -    int irq, pt_vector = -1;
> -    bool level;
> +    struct periodic_time *pt = data;
> +    struct vcpu *v;
> +    time_cb *cb = NULL;
> +    void *cb_priv;
> +    unsigned int irq;
>  
> -    pt_vcpu_lock(v);
> +    pt_lock(pt);
>  
> -    earliest_pt = NULL;
> -    max_lag = -1ULL;
> -    list_for_each_entry_safe ( pt, temp, head, list )
> +    v = pt->vcpu;
> +    irq = pt->irq;
> +
> +    if ( inject_interrupt(pt) )
>      {
> -        if ( pt->pending_intr_nr )
> -        {
> -            if ( pt_irq_masked(pt) &&
> -                 /* Level interrupts should be asserted even if masked. */
> -                 !pt->level )
> -            {
> -                /* suspend timer emulation */
> -                list_del(&pt->list);
> -                pt->on_list = 0;
> -            }
> -            else
> -            {
> -                if ( (pt->last_plt_gtime + pt->period) < max_lag )
> -                {
> -                    max_lag = pt->last_plt_gtime + pt->period;
> -                    earliest_pt = pt;
> -                }
> -            }
> -        }
> +        pt->scheduled += pt->period;
> +        pt->do_not_freeze = 0;

Nit: "false" please.

> +        cb = pt->cb;
> +        cb_priv = pt->priv;
>      }
> -
> -    if ( earliest_pt == NULL )
> +    else
>      {
> -        pt_vcpu_unlock(v);
> -        return -1;
> +        /* Masked. */
> +        if ( pt->on_list )
> +            list_del(&pt->list);
> +        pt->on_list = false;
> +        pt->pending_intr_nr++;
>      }

inject_interrupt() returns whether it was able to deliver the interrupt.
This in particular fails if the interrupt was masked and is edge
triggered. This, unexpectedly to me, reports success if a level triggered
interrupt was already pending. But in either event, the missed ticks
accounting is, as per my understanding of the comment in hvm/params.h,
supposed to be dealing with missing delivery due to preemption only. An
interrupt being masked / already pending may not be in this state due to
the guest having got preempted, though. A guest keeping a timer interrupt
masked for an extended period of time should not get a flood of
interrupts later on, no matter what HVM_PARAM_TIMER_MODE is set to.

However, I'm not going to exclude that little bit of doc is wrong, or
implementation and doc aren't in agreement already before your change.

> -    earliest_pt->irq_issued = 1;

This looks to be the only place where the field gets set to non-zero.
If the field is unused after this change, it wants deleting. I notice
patch 11 does so, but it may be worthwhile pointing out
- in the description here, that field removal will happen later,
- in the later patch, that this field was already unused (and doesn't
  become dead by the other removal done there).

> -    irq = earliest_pt->irq;
> -    level = earliest_pt->level;
> +    pt_unlock(pt);
>  
> -    pt_vcpu_unlock(v);
> +    if ( cb )
> +        cb(v, cb_priv);
> +}
>  
> -    switch ( earliest_pt->source )
> -    {
> -    case PTSRC_lapic:
> -        /*
> -         * If periodic timer interrupt is handled by lapic, its vector in
> -         * IRR is returned and used to set eoi_exit_bitmap for virtual
> -         * interrupt delivery case. Otherwise return -1 to do nothing.
> -         */
> -        vlapic_set_irq(vcpu_vlapic(v), irq, 0);
> -        pt_vector = irq;
> -        break;
> +static void eoi_callback(struct periodic_time *pt)
> +{
> +    struct vcpu *v = NULL;
> +    time_cb *cb = NULL;
> +    void *cb_priv = NULL;
>  
> -    case PTSRC_isa:
> -        hvm_isa_irq_deassert(v->domain, irq);
> -        if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
> -             v->domain->arch.hvm.vpic[irq >> 3].int_output )
> -            hvm_isa_irq_assert(v->domain, irq, NULL);
> -        else
> +    pt_lock(pt);
> +
> +    irq_eoi(pt);
> +    if ( pt->pending_intr_nr )
> +    {
> +        if ( inject_interrupt(pt) )
>          {
> -            pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
> -            /*
> -             * hvm_isa_irq_assert may not set the corresponding bit in vIRR
> -             * when mask field of IOAPIC RTE is set. Check it again.
> -             */
> -            if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
> -                pt_vector = -1;
> +            pt->pending_intr_nr--;
> +            cb = pt->cb;
> +            cb_priv = pt->priv;
> +            v = pt->vcpu;
>          }
> -        break;
> -
> -    case PTSRC_ioapic:
> -        pt_vector = hvm_ioapic_assert(v->domain, irq, level);
> -        if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
> +        else
>          {
> -            pt_vector = -1;
> -            if ( level )
> -            {
> -                /*
> -                 * Level interrupts are always asserted because the pin assert
> -                 * count is incremented regardless of whether the pin is masked
> -                 * or the vector latched in IRR, so also execute the callback
> -                 * associated with the timer.
> -                 */
> -                time_cb *cb = NULL;
> -                void *cb_priv = NULL;
> -
> -                pt_vcpu_lock(v);
> -                /* Make sure the timer is still on the list. */
> -                list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
> -                    if ( pt == earliest_pt )
> -                    {
> -                        pt_irq_fired(v, pt);
> -                        cb = pt->cb;
> -                        cb_priv = pt->priv;
> -                        break;
> -                    }
> -                pt_vcpu_unlock(v);
> -
> -                if ( cb != NULL )
> -                    cb(v, cb_priv);
> -            }
> +            /* Masked. */
> +            if ( pt->on_list )
> +                list_del(&pt->list);
> +            pt->on_list = false;
>          }
> -        break;
>      }
>  
> -    return pt_vector;
> +    pt_unlock(pt);
> +
> +    if ( cb )
> +        cb(v, cb_priv);
>  }
>  
> -static struct periodic_time *is_pt_irq(
> -    struct vcpu *v, struct hvm_intack intack)
> +static void vlapic_eoi_callback(struct vcpu *unused, unsigned int unused2,
> +                                void *data)
>  {
> -    struct list_head *head = &v->arch.hvm.tm_list;
> -    struct periodic_time *pt;
> -
> -    list_for_each_entry ( pt, head, list )
> -    {
> -        if ( pt->pending_intr_nr && pt->irq_issued &&
> -             (intack.vector == pt_irq_vector(pt, intack.source)) )
> -            return pt;
> -    }
> +    eoi_callback(data);
> +}
>  
> -    return NULL;
> +static void vioapic_eoi_callback(struct domain *unused, unsigned int unused2,
> +                                 void *data)
> +{
> +    eoi_callback(data);
>  }
>  
> -void pt_intr_post(struct vcpu *v, struct hvm_intack intack)
> +static bool inject_interrupt(struct periodic_time *pt)
>  {
> -    struct periodic_time *pt;
> -    time_cb *cb;
> -    void *cb_priv;
> +    struct vcpu *v = pt->vcpu;
> +    struct domain *d = v->domain;
> +    unsigned int irq = pt->irq;
>  
> -    if ( intack.source == hvm_intsrc_vector )
> -        return;
> +    /* Level interrupts should be asserted even if masked. */
> +    if ( pt_irq_masked(pt) && !pt->level )
> +        return false;
>  
> -    pt_vcpu_lock(v);
> -
> -    pt = is_pt_irq(v, intack);
> -    if ( pt == NULL )
> +    switch ( pt->source )
>      {
> -        pt_vcpu_unlock(v);
> -        return;
> +    case PTSRC_lapic:
> +        vlapic_set_irq_callback(vcpu_vlapic(v), pt->irq, 0, vlapic_eoi_callback,
> +                                pt);
> +        break;
> +
> +    case PTSRC_isa:
> +        hvm_isa_irq_deassert(d, irq);
> +        hvm_isa_irq_assert(d, irq, NULL);
> +        break;
> +
> +    case PTSRC_ioapic:
> +        hvm_ioapic_assert(d, irq, pt->level);
> +        break;
>      }

Why do ISA IRQs get de-asserted first, but IO-APIC ones don't? I
notice e.g. hvm_set_callback_irq_level() and hvm_set_pci_link_route()
have similar apparent asymmetries, so I guess I'm missing something.
In particular I can't spot - even prior to this change - where
hvm_irq->gsi_assert_count[gsi] would get decremented for a level
triggered IRQ, when hvm_ioapic_deassert() gets called only from
hvm/hpet.c:hpet_write(). I guess the main point is that that's the
only case of a level triggered timer interrupt for us?

> @@ -641,20 +590,29 @@ void pt_adjust_global_vcpu_target(struct vcpu *v)
>      write_unlock(&pl_time->vhpet.lock);
>  }
>  
> -
>  static void pt_resume(struct periodic_time *pt)
>  {
> +    struct vcpu *v;
> +    time_cb *cb = NULL;
> +    void *cb_priv;
> +
>      if ( pt->vcpu == NULL )
>          return;
>  
>      pt_lock(pt);
> -    if ( pt->pending_intr_nr && !pt->on_list )
> +    if ( pt->pending_intr_nr && !pt->on_list && inject_interrupt(pt) )
>      {
> +        pt->pending_intr_nr--;
> +        cb = pt->cb;
> +        cb_priv = pt->priv;
> +        v = pt->vcpu;
>          pt->on_list = 1;
>          list_add(&pt->list, &pt->vcpu->arch.hvm.tm_list);
> -        vcpu_kick(pt->vcpu);

Just for my own understanding: The replacement of this is what happens
down the call tree from inject_interrupt()?

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/svm/intr.c b/xen/arch/x86/hvm/svm/intr.c
index 7f815d23078..2ee2332253b 100644
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -146,8 +146,6 @@  void svm_intr_assist(void)
         return;
 
     /* Crank the handle on interrupt state. */
-    pt_update_irq(v);
-
     do {
         intack = hvm_vcpu_has_pending_irq(v);
         if ( likely(intack.source == hvm_intsrc_none) )
@@ -219,7 +217,6 @@  void svm_intr_assist(void)
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         svm_inject_extint(v, intack.vector);
-        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/arch/x86/hvm/vmx/intr.c b/xen/arch/x86/hvm/vmx/intr.c
index 80bfbb47878..3fcc7073db2 100644
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -203,7 +203,6 @@  static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
             {
                 /* for now, duplicate the ack path in vmx_intr_assist */
                 hvm_vcpu_ack_pending_irq(v, intack);
-                pt_intr_post(v, intack);
 
                 intack = hvm_vcpu_has_pending_irq(v);
                 if ( unlikely(intack.source != hvm_intsrc_none) )
@@ -242,7 +241,6 @@  void vmx_intr_assist(void)
     struct vcpu *v = current;
     unsigned int tpr_threshold = 0;
     enum hvm_intblk intblk;
-    int pt_vector;
 
     /* Block event injection when single step with MTF. */
     if ( unlikely(v->arch.hvm.single_step) )
@@ -263,8 +261,6 @@  void vmx_intr_assist(void)
 #endif
 
     /* Crank the handle on interrupt state. */
-    pt_vector = pt_update_irq(v);
-
     do {
         unsigned long intr_info;
 
@@ -337,58 +333,6 @@  void vmx_intr_assist(void)
     {
         unsigned long status;
 
-       /*
-        * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
-        * for intack.vector - give a chance to post periodic time interrupts when
-        * periodic time interrupts become the highest one
-        */
-        if ( pt_vector != -1 )
-        {
-#ifndef NDEBUG
-            /*
-             * We assert that intack.vector is the highest priority vector for
-             * only an interrupt from vlapic can reach this point and the
-             * highest vector is chosen in hvm_vcpu_has_pending_irq().
-             * But, in fact, the assertion failed sometimes. It is suspected
-             * that PIR is not synced to vIRR which makes pt_vector is left in
-             * PIR. In order to verify this suspicion, dump some information
-             * when the assertion fails.
-             */
-            if ( unlikely(intack.vector < pt_vector) )
-            {
-                const struct vlapic *vlapic;
-                const struct pi_desc *pi_desc;
-                const uint32_t *word;
-                unsigned int i;
-
-                printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
-                       current, intack.source, intack.vector, pt_vector);
-
-                vlapic = vcpu_vlapic(v);
-                if ( vlapic && vlapic->regs )
-                {
-                    word = (const void *)&vlapic->regs->data[APIC_IRR];
-                    printk(XENLOG_ERR "vIRR:");
-                    for ( i = X86_NR_VECTORS / 32; i-- ; )
-                        printk(" %08x", word[i*4]);
-                    printk("\n");
-                }
-
-                pi_desc = &v->arch.hvm.vmx.pi_desc;
-                if ( pi_desc )
-                {
-                    word = (const void *)&pi_desc->pir;
-                    printk(XENLOG_ERR " PIR:");
-                    for ( i = X86_NR_VECTORS / 32; i-- ; )
-                        printk(" %08x", word[i]);
-                    printk("\n");
-                }
-            }
-#endif
-            ASSERT(intack.vector >= pt_vector);
-            vmx_set_eoi_exit_bitmap(v, intack.vector);
-        }
-
         /* we need update the RVI field */
         __vmread(GUEST_INTR_STATUS, &status);
         status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
@@ -397,14 +341,11 @@  void vmx_intr_assist(void)
         __vmwrite(GUEST_INTR_STATUS, status);
 
         vmx_sync_exit_bitmap(v);
-
-        pt_intr_post(v, intack);
     }
     else
     {
         HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(intack.vector, intack.source);
-        pt_intr_post(v, intack);
     }
 
     /* Is there another IRQ to queue up behind this one? */
diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index 24d90c0a186..6744b88d20c 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -26,6 +26,8 @@ 
 #define mode_is(d, name) \
     ((d)->arch.hvm.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name)
 
+static bool inject_interrupt(struct periodic_time *pt);
+
 void hvm_init_guest_time(struct domain *d)
 {
     struct pl_time *pl = d->arch.hvm.pl_time;
@@ -75,35 +77,6 @@  void hvm_set_guest_time(struct vcpu *v, u64 guest_time)
     }
 }
 
-static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
-{
-    struct vcpu *v = pt->vcpu;
-    unsigned int gsi, isa_irq;
-    int vector;
-
-    if ( pt->source == PTSRC_lapic )
-        return pt->irq;
-
-    isa_irq = pt->irq;
-
-    if ( src == hvm_intsrc_pic )
-        return (v->domain->arch.hvm.vpic[isa_irq >> 3].irq_base
-                + (isa_irq & 7));
-
-    ASSERT(src == hvm_intsrc_lapic);
-    gsi = pt->source == PTSRC_isa ? hvm_isa_irq_to_gsi(isa_irq) : pt->irq;
-    vector = vioapic_get_vector(v->domain, gsi);
-    if ( vector < 0 )
-    {
-        dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
-                v->domain->domain_id, gsi);
-        domain_crash(v->domain);
-        return -1;
-    }
-
-    return vector;
-}
-
 static int pt_irq_masked(struct periodic_time *pt)
 {
     struct vcpu *v = pt->vcpu;
@@ -257,35 +230,15 @@  void pt_restore_timer(struct vcpu *v)
     pt_vcpu_lock(v);
 
     list_for_each_entry ( pt, head, list )
-    {
         if ( pt->pending_intr_nr == 0 )
-        {
-            pt_process_missed_ticks(pt);
             set_timer(&pt->timer, pt->scheduled);
-        }
-    }
 
     pt_thaw_time(v);
 
     pt_vcpu_unlock(v);
 }
 
-static void pt_timer_fn(void *data)
-{
-    struct periodic_time *pt = data;
-
-    pt_lock(pt);
-
-    pt->pending_intr_nr++;
-    pt->scheduled += pt->period;
-    pt->do_not_freeze = 0;
-
-    vcpu_kick(pt->vcpu);
-
-    pt_unlock(pt);
-}
-
-static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
+static void irq_eoi(struct periodic_time *pt)
 {
     pt->irq_issued = false;
 
@@ -295,188 +248,153 @@  static void pt_irq_fired(struct vcpu *v, struct periodic_time *pt)
             list_del(&pt->list);
         pt->on_list = false;
         pt->pending_intr_nr = 0;
+
+        return;
     }
-    else if ( mode_is(v->domain, one_missed_tick_pending) ||
-              mode_is(v->domain, no_missed_ticks_pending) )
-    {
-        pt->last_plt_gtime = hvm_get_guest_time(v);
-        pt_process_missed_ticks(pt);
-        pt->pending_intr_nr = 0; /* 'collapse' all missed ticks */
-        set_timer(&pt->timer, pt->scheduled);
-    }
-    else
+
+    pt_process_missed_ticks(pt);
+    /* 'collapse' missed ticks according to the selected mode. */
+    switch ( pt->vcpu->domain->arch.hvm.params[HVM_PARAM_TIMER_MODE] )
     {
-        pt->last_plt_gtime += pt->period;
-        if ( --pt->pending_intr_nr == 0 )
-        {
-            pt_process_missed_ticks(pt);
-            if ( pt->pending_intr_nr == 0 )
-                set_timer(&pt->timer, pt->scheduled);
-        }
+    case HVMPTM_one_missed_tick_pending:
+        pt->pending_intr_nr = min(pt->pending_intr_nr, 1u);
+        break;
+
+    case HVMPTM_no_missed_ticks_pending:
+        pt->pending_intr_nr = 0;
+        break;
     }
 
-    if ( mode_is(v->domain, delay_for_missed_ticks) &&
-         (hvm_get_guest_time(v) < pt->last_plt_gtime) )
-        hvm_set_guest_time(v, pt->last_plt_gtime);
+    if ( !pt->pending_intr_nr )
+        set_timer(&pt->timer, pt->scheduled);
 }
 
-int pt_update_irq(struct vcpu *v)
+static void pt_timer_fn(void *data)
 {
-    struct list_head *head = &v->arch.hvm.tm_list;
-    struct periodic_time *pt, *temp, *earliest_pt;
-    uint64_t max_lag;
-    int irq, pt_vector = -1;
-    bool level;
+    struct periodic_time *pt = data;
+    struct vcpu *v;
+    time_cb *cb = NULL;
+    void *cb_priv;
+    unsigned int irq;
 
-    pt_vcpu_lock(v);
+    pt_lock(pt);
 
-    earliest_pt = NULL;
-    max_lag = -1ULL;
-    list_for_each_entry_safe ( pt, temp, head, list )
+    v = pt->vcpu;
+    irq = pt->irq;
+
+    if ( inject_interrupt(pt) )
     {
-        if ( pt->pending_intr_nr )
-        {
-            if ( pt_irq_masked(pt) &&
-                 /* Level interrupts should be asserted even if masked. */
-                 !pt->level )
-            {
-                /* suspend timer emulation */
-                list_del(&pt->list);
-                pt->on_list = 0;
-            }
-            else
-            {
-                if ( (pt->last_plt_gtime + pt->period) < max_lag )
-                {
-                    max_lag = pt->last_plt_gtime + pt->period;
-                    earliest_pt = pt;
-                }
-            }
-        }
+        pt->scheduled += pt->period;
+        pt->do_not_freeze = 0;
+        cb = pt->cb;
+        cb_priv = pt->priv;
     }
-
-    if ( earliest_pt == NULL )
+    else
     {
-        pt_vcpu_unlock(v);
-        return -1;
+        /* Masked. */
+        if ( pt->on_list )
+            list_del(&pt->list);
+        pt->on_list = false;
+        pt->pending_intr_nr++;
     }
 
-    earliest_pt->irq_issued = 1;
-    irq = earliest_pt->irq;
-    level = earliest_pt->level;
+    pt_unlock(pt);
 
-    pt_vcpu_unlock(v);
+    if ( cb )
+        cb(v, cb_priv);
+}
 
-    switch ( earliest_pt->source )
-    {
-    case PTSRC_lapic:
-        /*
-         * If periodic timer interrupt is handled by lapic, its vector in
-         * IRR is returned and used to set eoi_exit_bitmap for virtual
-         * interrupt delivery case. Otherwise return -1 to do nothing.
-         */
-        vlapic_set_irq(vcpu_vlapic(v), irq, 0);
-        pt_vector = irq;
-        break;
+static void eoi_callback(struct periodic_time *pt)
+{
+    struct vcpu *v = NULL;
+    time_cb *cb = NULL;
+    void *cb_priv = NULL;
 
-    case PTSRC_isa:
-        hvm_isa_irq_deassert(v->domain, irq);
-        if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
-             v->domain->arch.hvm.vpic[irq >> 3].int_output )
-            hvm_isa_irq_assert(v->domain, irq, NULL);
-        else
+    pt_lock(pt);
+
+    irq_eoi(pt);
+    if ( pt->pending_intr_nr )
+    {
+        if ( inject_interrupt(pt) )
         {
-            pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
-            /*
-             * hvm_isa_irq_assert may not set the corresponding bit in vIRR
-             * when mask field of IOAPIC RTE is set. Check it again.
-             */
-            if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-                pt_vector = -1;
+            pt->pending_intr_nr--;
+            cb = pt->cb;
+            cb_priv = pt->priv;
+            v = pt->vcpu;
         }
-        break;
-
-    case PTSRC_ioapic:
-        pt_vector = hvm_ioapic_assert(v->domain, irq, level);
-        if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
+        else
         {
-            pt_vector = -1;
-            if ( level )
-            {
-                /*
-                 * Level interrupts are always asserted because the pin assert
-                 * count is incremented regardless of whether the pin is masked
-                 * or the vector latched in IRR, so also execute the callback
-                 * associated with the timer.
-                 */
-                time_cb *cb = NULL;
-                void *cb_priv = NULL;
-
-                pt_vcpu_lock(v);
-                /* Make sure the timer is still on the list. */
-                list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
-                    if ( pt == earliest_pt )
-                    {
-                        pt_irq_fired(v, pt);
-                        cb = pt->cb;
-                        cb_priv = pt->priv;
-                        break;
-                    }
-                pt_vcpu_unlock(v);
-
-                if ( cb != NULL )
-                    cb(v, cb_priv);
-            }
+            /* Masked. */
+            if ( pt->on_list )
+                list_del(&pt->list);
+            pt->on_list = false;
         }
-        break;
     }
 
-    return pt_vector;
+    pt_unlock(pt);
+
+    if ( cb )
+        cb(v, cb_priv);
 }
 
-static struct periodic_time *is_pt_irq(
-    struct vcpu *v, struct hvm_intack intack)
+static void vlapic_eoi_callback(struct vcpu *unused, unsigned int unused2,
+                                void *data)
 {
-    struct list_head *head = &v->arch.hvm.tm_list;
-    struct periodic_time *pt;
-
-    list_for_each_entry ( pt, head, list )
-    {
-        if ( pt->pending_intr_nr && pt->irq_issued &&
-             (intack.vector == pt_irq_vector(pt, intack.source)) )
-            return pt;
-    }
+    eoi_callback(data);
+}
 
-    return NULL;
+static void vioapic_eoi_callback(struct domain *unused, unsigned int unused2,
+                                 void *data)
+{
+    eoi_callback(data);
 }
 
-void pt_intr_post(struct vcpu *v, struct hvm_intack intack)
+static bool inject_interrupt(struct periodic_time *pt)
 {
-    struct periodic_time *pt;
-    time_cb *cb;
-    void *cb_priv;
+    struct vcpu *v = pt->vcpu;
+    struct domain *d = v->domain;
+    unsigned int irq = pt->irq;
 
-    if ( intack.source == hvm_intsrc_vector )
-        return;
+    /* Level interrupts should be asserted even if masked. */
+    if ( pt_irq_masked(pt) && !pt->level )
+        return false;
 
-    pt_vcpu_lock(v);
-
-    pt = is_pt_irq(v, intack);
-    if ( pt == NULL )
+    switch ( pt->source )
     {
-        pt_vcpu_unlock(v);
-        return;
+    case PTSRC_lapic:
+        vlapic_set_irq_callback(vcpu_vlapic(v), pt->irq, 0, vlapic_eoi_callback,
+                                pt);
+        break;
+
+    case PTSRC_isa:
+        hvm_isa_irq_deassert(d, irq);
+        hvm_isa_irq_assert(d, irq, NULL);
+        break;
+
+    case PTSRC_ioapic:
+        hvm_ioapic_assert(d, irq, pt->level);
+        break;
     }
 
-    pt_irq_fired(v, pt);
+    switch ( d->arch.hvm.params[HVM_PARAM_TIMER_MODE] )
+    {
+    case HVMPTM_one_missed_tick_pending:
+    case HVMPTM_no_missed_ticks_pending:
+        pt->last_plt_gtime = hvm_get_guest_time(v);
+        break;
 
-    cb = pt->cb;
-    cb_priv = pt->priv;
+    case HVMPTM_delay_for_missed_ticks:
+        pt->last_plt_gtime += pt->period;
+        if ( hvm_get_guest_time(v) < pt->last_plt_gtime )
+            hvm_set_guest_time(v, pt->last_plt_gtime);
+        break;
 
-    pt_vcpu_unlock(v);
+    default:
+        pt->last_plt_gtime += pt->period;
+        break;
+    }
 
-    if ( cb != NULL )
-        cb(v, cb_priv);
+    return true;
 }
 
 void pt_migrate(struct vcpu *v)
@@ -552,6 +470,24 @@  void create_periodic_time(
     pt->cb = cb;
     pt->priv = data;
 
+    switch ( pt->source )
+    {
+        int rc;
+
+    case PTSRC_isa:
+        irq = hvm_isa_irq_to_gsi(irq);
+        /* fallthrough */
+    case PTSRC_ioapic:
+        pt->eoi_cb.callback = vioapic_eoi_callback;
+        pt->eoi_cb.data = pt;
+        rc = hvm_gsi_register_callback(v->domain, irq, &pt->eoi_cb);
+        if ( rc )
+            gdprintk(XENLOG_WARNING,
+                     "unable to register callback for timer GSI %u source %u: %d\n",
+                     irq, pt->source, rc);
+        break;
+    }
+
     pt_vcpu_lock(v);
     pt->on_list = 1;
     list_add(&pt->list, &v->arch.hvm.tm_list);
@@ -565,6 +501,8 @@  void create_periodic_time(
 
 void destroy_periodic_time(struct periodic_time *pt)
 {
+    unsigned int gsi;
+
     /* Was this structure previously initialised by create_periodic_time()? */
     if ( pt->vcpu == NULL )
         return;
@@ -574,6 +512,17 @@  void destroy_periodic_time(struct periodic_time *pt)
         list_del(&pt->list);
     pt->on_list = 0;
     pt->pending_intr_nr = 0;
+
+    gsi = pt->irq;
+    switch ( pt->source )
+    {
+    case PTSRC_isa:
+        gsi = hvm_isa_irq_to_gsi(pt->irq);
+        /* fallthrough */
+    case PTSRC_ioapic:
+        hvm_gsi_unregister_callback(pt->vcpu->domain, gsi, &pt->eoi_cb);
+        break;
+    }
     pt_unlock(pt);
 
     /*
@@ -641,20 +590,29 @@  void pt_adjust_global_vcpu_target(struct vcpu *v)
     write_unlock(&pl_time->vhpet.lock);
 }
 
-
 static void pt_resume(struct periodic_time *pt)
 {
+    struct vcpu *v;
+    time_cb *cb = NULL;
+    void *cb_priv;
+
     if ( pt->vcpu == NULL )
         return;
 
     pt_lock(pt);
-    if ( pt->pending_intr_nr && !pt->on_list )
+    if ( pt->pending_intr_nr && !pt->on_list && inject_interrupt(pt) )
     {
+        pt->pending_intr_nr--;
+        cb = pt->cb;
+        cb_priv = pt->priv;
+        v = pt->vcpu;
         pt->on_list = 1;
         list_add(&pt->list, &pt->vcpu->arch.hvm.tm_list);
-        vcpu_kick(pt->vcpu);
     }
     pt_unlock(pt);
+
+    if ( cb )
+        cb(v, cb_priv);
 }
 
 void pt_may_unmask_irq(struct domain *d, struct periodic_time *vlapic_pt)
diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h
index 74c0cedd11c..384d2e02039 100644
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -23,6 +23,7 @@ 
 #include <xen/list.h>
 #include <xen/rwlock.h>
 #include <asm/hvm/hvm.h>
+#include <asm/hvm/irq.h>
 
 /*
  * Abstract layer of periodic time, one short time.
@@ -50,6 +51,7 @@  struct periodic_time {
     struct timer timer;         /* ac_timer */
     time_cb *cb;
     void *priv;                 /* point back to platform time source */
+    struct hvm_gsi_eoi_callback eoi_cb; /* EOI callback registration data */
 };
 
 
@@ -151,9 +153,6 @@  struct pl_time {    /* platform time */
 
 void pt_save_timer(struct vcpu *v);
 void pt_restore_timer(struct vcpu *v);
-int pt_update_irq(struct vcpu *v);
-struct hvm_intack;
-void pt_intr_post(struct vcpu *v, struct hvm_intack intack);
 void pt_migrate(struct vcpu *v);
 
 void pt_adjust_global_vcpu_target(struct vcpu *v);