diff mbox

[v4,3/5] ARM: KVM: arch_timers: Add guest timer core support

Message ID 20121110154618.3274.53380.stgit@chazy-air (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Nov. 10, 2012, 3:46 p.m. UTC
From: Marc Zyngier <marc.zyngier@arm.com>

We can inject a timer interrupt into the guest as a result of
three possible events:
- The virtual timer interrupt has fired while we were still
  executing the guest
- The timer interrupt hasn't fired, but it expired while we
  were doing the world switch
- A hrtimer we programmed earlier has fired

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_arch_timer.h |   57 +++++++++
 arch/arm/kvm/reset.c                  |    9 +
 arch/arm/kvm/timer.c                  |  204 +++++++++++++++++++++++++++++++++
 3 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/kvm/timer.c

Comments

Will Deacon Nov. 23, 2012, 4:17 p.m. UTC | #1
On Sat, Nov 10, 2012 at 03:46:19PM +0000, Christoffer Dall wrote:
> From: Marc Zyngier <marc.zyngier@arm.com>
> 
> We can inject a timer interrupt into the guest as a result of
> three possible events:
> - The virtual timer interrupt has fired while we were still
>   executing the guest
> - The timer interrupt hasn't fired, but it expired while we
>   were doing the world switch
> - A hrtimer we programmed earlier has fired
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
> ---
>  arch/arm/include/asm/kvm_arch_timer.h |   57 +++++++++
>  arch/arm/kvm/reset.c                  |    9 +
>  arch/arm/kvm/timer.c                  |  204 +++++++++++++++++++++++++++++++++
>  3 files changed, 269 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm/kvm/timer.c
> 
> diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h
> index 513b852..bd5e501 100644
> --- a/arch/arm/include/asm/kvm_arch_timer.h
> +++ b/arch/arm/include/asm/kvm_arch_timer.h
> @@ -19,13 +19,68 @@
>  #ifndef __ASM_ARM_KVM_ARCH_TIMER_H
>  #define __ASM_ARM_KVM_ARCH_TIMER_H
> 
> +#include <linux/clocksource.h>
> +#include <linux/hrtimer.h>
> +#include <linux/workqueue.h>
> +
>  struct arch_timer_kvm {
> +#ifdef CONFIG_KVM_ARM_TIMER
> +       /* Is the timer enabled */
> +       bool                    enabled;
> +
> +       /*
> +        * Virtual offset (kernel access it through cntvoff, HYP code
> +        * access it as two 32bit values).
> +        */
> +       union {
> +               cycle_t         cntvoff;
> +               struct {
> +                       u32     low;    /* Restored only */
> +                       u32     high;   /* Restored only */
> +               } cntvoff32;
> +       };
> +#endif

Endianness?

>  };
> 
>  struct arch_timer_cpu {
> +#ifdef CONFIG_KVM_ARM_TIMER
> +       /* Registers: control register, timer value */
> +       u32                             cntv_ctl;       /* Saved/restored */
> +       union {
> +               cycle_t                 cntv_cval;
> +               struct {
> +                       u32             low;            /* Saved/restored */
> +                       u32             high;           /* Saved/restored */
> +               } cntv_cval32;
> +       };

Similarly.

> +       /*
> +        * Anything that is not used directly from assembly code goes
> +        * here.
> +        */
> +
> +       /* Background timer used when the guest is not running */
> +       struct hrtimer                  timer;
> +
> +       /* Work queued with the above timer expires */
> +       struct work_struct              expired;
> +
> +       /* Background timer active */
> +       bool                            armed;
> +
> +       /* Timer IRQ */
> +       const struct kvm_irq_level      *irq;
> +#endif
>  };
> 
> -#ifndef CONFIG_KVM_ARM_TIMER
> +#ifdef CONFIG_KVM_ARM_TIMER
> +int kvm_timer_hyp_init(void);
> +int kvm_timer_init(struct kvm *kvm);
> +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
> +void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu);
> +void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu);
> +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
> +#else
>  static inline int kvm_timer_hyp_init(void)
>  {
>         return 0;
> diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
> index b80256b..7463f5b 100644
> --- a/arch/arm/kvm/reset.c
> +++ b/arch/arm/kvm/reset.c
> @@ -37,6 +37,12 @@ static struct kvm_regs a15_regs_reset = {
>         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
>  };
> 
> +#ifdef CONFIG_KVM_ARM_TIMER
> +static const struct kvm_irq_level a15_virt_timer_ppi = {
> +       { .irq = 27 },  /* irq: A7/A15 specific */

This should be parameterised by the vCPU type.

> +       .level = 1      /* level */
> +};
> +#endif
> 
>  /*******************************************************************************
>   * Exported reset function
> @@ -59,6 +65,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>                         return -EINVAL;
>                 cpu_reset = &a15_regs_reset;
>                 vcpu->arch.midr = read_cpuid_id();
> +#ifdef CONFIG_KVM_ARM_TIMER
> +               vcpu->arch.timer_cpu.irq = &a15_virt_timer_ppi;
> +#endif
>                 break;
>         default:
>                 return -ENODEV;
> diff --git a/arch/arm/kvm/timer.c b/arch/arm/kvm/timer.c
> new file mode 100644
> index 0000000..a241298
> --- /dev/null
> +++ b/arch/arm/kvm/timer.c

Maybe kvm/arch_timer.c since it seems to be specific to that device and
matches the naming for the main driver under kernel/? If we get new virtual
timer devices in the future, I guess this would need to be split up so the
arch-timer-specific bits are separate from generic hrtimer and kvm code.

> @@ -0,0 +1,204 @@
> +/*
> + * Copyright (C) 2012 ARM Ltd.
> + * Author: Marc Zyngier <marc.zyngier@arm.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +#include <linux/of_irq.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <linux/interrupt.h>
> +
> +#include <asm/arch_timer.h>
> +
> +#include <asm/kvm_vgic.h>
> +#include <asm/kvm_arch_timer.h>
> +
> +static struct timecounter *timecounter;
> +static struct workqueue_struct *wqueue;
> +
> +static cycle_t kvm_phys_timer_read(void)
> +{
> +       return timecounter->cc->read(timecounter->cc);
> +}
> +
> +static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
> +{
> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> +       timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */

This is ARCH_TIMER_CTRL_IT_MASK right? We should make that definition
available if it is needed here.

> +       kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
> +                           vcpu->arch.timer_cpu.irq->irq,
> +                           vcpu->arch.timer_cpu.irq->level);
> +}
> +
> +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
> +{
> +       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
> +
> +       /*
> +        * We disable the timer in the world switch and let it be
> +        * handled by kvm_timer_sync_from_cpu(). Getting a timer
> +        * interrupt at this point is a sure sign of some major
> +        * breakage.
> +        */
> +       pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
> +       return IRQ_HANDLED;

IRQ_NONE?

> +}
> +
> +static void kvm_timer_inject_irq_work(struct work_struct *work)
> +{
> +       struct kvm_vcpu *vcpu;
> +
> +       vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
> +       vcpu->arch.timer_cpu.armed = false;
> +       kvm_timer_inject_irq(vcpu);
> +}
> +
> +static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
> +{
> +       struct arch_timer_cpu *timer;
> +       timer = container_of(hrt, struct arch_timer_cpu, timer);
> +       queue_work(wqueue, &timer->expired);
> +       return HRTIMER_NORESTART;
> +}
> +
> +void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu)
> +{
> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> +       /*
> +        * We're about to run this vcpu again, so there is no need to
> +        * keep the background timer running, as we're about to
> +        * populate the CPU timer again.
> +        */
> +       if (timer->armed) {
> +               hrtimer_cancel(&timer->timer);
> +               cancel_work_sync(&timer->expired);
> +               timer->armed = false;
> +       }
> +}

I think some helper functions like timer_is_armed, timer_arm and
timer_disarm would make this more readable (resisting arm_timer, which
confuses things more!).

> +
> +void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu)
> +{
> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +       cycle_t cval, now;
> +       u64 ns;
> +
> +       /* Check if the timer is enabled and unmasked first */
> +       if ((timer->cntv_ctl & 3) != 1)
> +               return;

Again, we should create/use ARCH_TIMER #defines for this hardware bits.

> +       cval = timer->cntv_cval;
> +       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;

We probably want to add an isb() *before* the mrrc in
arch_timer_counter_read if you want to do things like this, because the
counter can be speculated in advance.

> +       BUG_ON(timer->armed);
> +
> +       if (cval <= now) {
> +               /*
> +                * Timer has already expired while we were not
> +                * looking. Inject the interrupt and carry on.
> +                */
> +               kvm_timer_inject_irq(vcpu);
> +               return;
> +       }

Does this buy you much? You still have to cope with the timer expiring here
anyway.

> +       timer->armed = true;
> +       ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
> +       hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
> +                     HRTIMER_MODE_ABS);
> +}
> +
> +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> +       INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
> +       hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> +       timer->timer.function = kvm_timer_expire;
> +}
> +
> +static void kvm_timer_init_interrupt(void *info)
> +{
> +       unsigned int *irqp = info;
> +
> +       enable_percpu_irq(*irqp, 0);

IRQ_TYPE_NONE

> +}
> +
> +
> +static const struct of_device_id arch_timer_of_match[] = {
> +       { .compatible   = "arm,armv7-timer",    },
> +       {},
> +};
> +
> +int kvm_timer_hyp_init(void)
> +{
> +       struct device_node *np;
> +       unsigned int ppi;
> +       int err;
> +
> +       timecounter = arch_timer_get_timecounter();
> +       if (!timecounter)
> +               return -ENODEV;
> +
> +       np = of_find_matching_node(NULL, arch_timer_of_match);
> +       if (!np) {
> +               kvm_err("kvm_arch_timer: can't find DT node\n");
> +               return -ENODEV;
> +       }
> +
> +       ppi = irq_of_parse_and_map(np, 2);
> +       if (!ppi) {
> +               kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
> +               return -EINVAL;
> +       }
> +
> +       err = request_percpu_irq(ppi, kvm_arch_timer_handler,
> +                                "kvm guest timer", kvm_get_running_vcpus());
> +       if (err) {
> +               kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
> +                       ppi, err);
> +               return err;
> +       }
> +
> +       wqueue = create_singlethread_workqueue("kvm_arch_timer");
> +       if (!wqueue) {
> +               free_percpu_irq(ppi, kvm_get_running_vcpus());
> +               return -ENOMEM;
> +       }
> +
> +       kvm_info("%s IRQ%d\n", np->name, ppi);
> +       on_each_cpu(kvm_timer_init_interrupt, &ppi, 1);

on_each_cpu currently just returns 0, but you could use that as your return
value for good measure anyway.

> +       return 0;
> +}
> +
> +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
> +{
> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
> +
> +       hrtimer_cancel(&timer->timer);
> +       cancel_work_sync(&timer->expired);
> +}
> +
> +int kvm_timer_init(struct kvm *kvm)
> +{
> +       if (timecounter && wqueue) {
> +               kvm->arch.timer.cntvoff = kvm_phys_timer_read();

Shouldn't this be initialised to 0 and then updated on world switch?

Will
Marc Zyngier Nov. 23, 2012, 4:52 p.m. UTC | #2
On 23/11/12 16:17, Will Deacon wrote:
> On Sat, Nov 10, 2012 at 03:46:19PM +0000, Christoffer Dall wrote:
>> From: Marc Zyngier <marc.zyngier@arm.com>
>>
>> We can inject a timer interrupt into the guest as a result of
>> three possible events:
>> - The virtual timer interrupt has fired while we were still
>>   executing the guest
>> - The timer interrupt hasn't fired, but it expired while we
>>   were doing the world switch
>> - A hrtimer we programmed earlier has fired
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
>> ---
>>  arch/arm/include/asm/kvm_arch_timer.h |   57 +++++++++
>>  arch/arm/kvm/reset.c                  |    9 +
>>  arch/arm/kvm/timer.c                  |  204 +++++++++++++++++++++++++++++++++
>>  3 files changed, 269 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/arm/kvm/timer.c
>>
>> diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h
>> index 513b852..bd5e501 100644
>> --- a/arch/arm/include/asm/kvm_arch_timer.h
>> +++ b/arch/arm/include/asm/kvm_arch_timer.h
>> @@ -19,13 +19,68 @@
>>  #ifndef __ASM_ARM_KVM_ARCH_TIMER_H
>>  #define __ASM_ARM_KVM_ARCH_TIMER_H
>>
>> +#include <linux/clocksource.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/workqueue.h>
>> +
>>  struct arch_timer_kvm {
>> +#ifdef CONFIG_KVM_ARM_TIMER
>> +       /* Is the timer enabled */
>> +       bool                    enabled;
>> +
>> +       /*
>> +        * Virtual offset (kernel access it through cntvoff, HYP code
>> +        * access it as two 32bit values).
>> +        */
>> +       union {
>> +               cycle_t         cntvoff;
>> +               struct {
>> +                       u32     low;    /* Restored only */
>> +                       u32     high;   /* Restored only */
>> +               } cntvoff32;
>> +       };
>> +#endif
> 
> Endianness?

Yup. I'll fix that.

>>  };
>>
>>  struct arch_timer_cpu {
>> +#ifdef CONFIG_KVM_ARM_TIMER
>> +       /* Registers: control register, timer value */
>> +       u32                             cntv_ctl;       /* Saved/restored */
>> +       union {
>> +               cycle_t                 cntv_cval;
>> +               struct {
>> +                       u32             low;            /* Saved/restored */
>> +                       u32             high;           /* Saved/restored */
>> +               } cntv_cval32;
>> +       };
> 
> Similarly.

Ditto.

> 
>> +       /*
>> +        * Anything that is not used directly from assembly code goes
>> +        * here.
>> +        */
>> +
>> +       /* Background timer used when the guest is not running */
>> +       struct hrtimer                  timer;
>> +
>> +       /* Work queued with the above timer expires */
>> +       struct work_struct              expired;
>> +
>> +       /* Background timer active */
>> +       bool                            armed;
>> +
>> +       /* Timer IRQ */
>> +       const struct kvm_irq_level      *irq;
>> +#endif
>>  };
>>
>> -#ifndef CONFIG_KVM_ARM_TIMER
>> +#ifdef CONFIG_KVM_ARM_TIMER
>> +int kvm_timer_hyp_init(void);
>> +int kvm_timer_init(struct kvm *kvm);
>> +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
>> +void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu);
>> +void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu);
>> +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
>> +#else
>>  static inline int kvm_timer_hyp_init(void)
>>  {
>>         return 0;
>> diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
>> index b80256b..7463f5b 100644
>> --- a/arch/arm/kvm/reset.c
>> +++ b/arch/arm/kvm/reset.c
>> @@ -37,6 +37,12 @@ static struct kvm_regs a15_regs_reset = {
>>         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
>>  };
>>
>> +#ifdef CONFIG_KVM_ARM_TIMER
>> +static const struct kvm_irq_level a15_virt_timer_ppi = {
>> +       { .irq = 27 },  /* irq: A7/A15 specific */
> 
> This should be parameterised by the vCPU type.

This is already A15 specific, and assigned in an A15 specific code
section below.

>> +       .level = 1      /* level */
>> +};
>> +#endif
>>
>>  /*******************************************************************************
>>   * Exported reset function
>> @@ -59,6 +65,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>>                         return -EINVAL;
>>                 cpu_reset = &a15_regs_reset;
>>                 vcpu->arch.midr = read_cpuid_id();
>> +#ifdef CONFIG_KVM_ARM_TIMER
>> +               vcpu->arch.timer_cpu.irq = &a15_virt_timer_ppi;
>> +#endif
>>                 break;
>>         default:
>>                 return -ENODEV;
>> diff --git a/arch/arm/kvm/timer.c b/arch/arm/kvm/timer.c
>> new file mode 100644
>> index 0000000..a241298
>> --- /dev/null
>> +++ b/arch/arm/kvm/timer.c
> 
> Maybe kvm/arch_timer.c since it seems to be specific to that device and
> matches the naming for the main driver under kernel/? If we get new virtual
> timer devices in the future, I guess this would need to be split up so the
> arch-timer-specific bits are separate from generic hrtimer and kvm code.

Sure.

>> @@ -0,0 +1,204 @@
>> +/*
>> + * Copyright (C) 2012 ARM Ltd.
>> + * Author: Marc Zyngier <marc.zyngier@arm.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
>> + */
>> +
>> +#include <linux/of_irq.h>
>> +#include <linux/kvm.h>
>> +#include <linux/kvm_host.h>
>> +#include <linux/interrupt.h>
>> +
>> +#include <asm/arch_timer.h>
>> +
>> +#include <asm/kvm_vgic.h>
>> +#include <asm/kvm_arch_timer.h>
>> +
>> +static struct timecounter *timecounter;
>> +static struct workqueue_struct *wqueue;
>> +
>> +static cycle_t kvm_phys_timer_read(void)
>> +{
>> +       return timecounter->cc->read(timecounter->cc);
>> +}
>> +
>> +static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
>> +{
>> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>> +
>> +       timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
> 
> This is ARCH_TIMER_CTRL_IT_MASK right? We should make that definition
> available if it is needed here.

OK.

>> +       kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
>> +                           vcpu->arch.timer_cpu.irq->irq,
>> +                           vcpu->arch.timer_cpu.irq->level);
>> +}
>> +
>> +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
>> +{
>> +       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
>> +
>> +       /*
>> +        * We disable the timer in the world switch and let it be
>> +        * handled by kvm_timer_sync_from_cpu(). Getting a timer
>> +        * interrupt at this point is a sure sign of some major
>> +        * breakage.
>> +        */
>> +       pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
>> +       return IRQ_HANDLED;
> 
> IRQ_NONE?

I don't think so. We're actually handling the interrupt (admittedly in a
very basic way), and as it is a per-cpu interrupt, there will be noone
else to take care of it.

>> +}
>> +
>> +static void kvm_timer_inject_irq_work(struct work_struct *work)
>> +{
>> +       struct kvm_vcpu *vcpu;
>> +
>> +       vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
>> +       vcpu->arch.timer_cpu.armed = false;
>> +       kvm_timer_inject_irq(vcpu);
>> +}
>> +
>> +static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
>> +{
>> +       struct arch_timer_cpu *timer;
>> +       timer = container_of(hrt, struct arch_timer_cpu, timer);
>> +       queue_work(wqueue, &timer->expired);
>> +       return HRTIMER_NORESTART;
>> +}
>> +
>> +void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu)
>> +{
>> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>> +
>> +       /*
>> +        * We're about to run this vcpu again, so there is no need to
>> +        * keep the background timer running, as we're about to
>> +        * populate the CPU timer again.
>> +        */
>> +       if (timer->armed) {
>> +               hrtimer_cancel(&timer->timer);
>> +               cancel_work_sync(&timer->expired);
>> +               timer->armed = false;
>> +       }
>> +}
> 
> I think some helper functions like timer_is_armed, timer_arm and
> timer_disarm would make this more readable (resisting arm_timer, which
> confuses things more!).

OK.

>> +
>> +void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu)
>> +{
>> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>> +       cycle_t cval, now;
>> +       u64 ns;
>> +
>> +       /* Check if the timer is enabled and unmasked first */
>> +       if ((timer->cntv_ctl & 3) != 1)
>> +               return;
> 
> Again, we should create/use ARCH_TIMER #defines for this hardware bits.
> 
>> +       cval = timer->cntv_cval;
>> +       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
> 
> We probably want to add an isb() *before* the mrrc in
> arch_timer_counter_read if you want to do things like this, because the
> counter can be speculated in advance.

Indeed.

>> +       BUG_ON(timer->armed);
>> +
>> +       if (cval <= now) {
>> +               /*
>> +                * Timer has already expired while we were not
>> +                * looking. Inject the interrupt and carry on.
>> +                */
>> +               kvm_timer_inject_irq(vcpu);
>> +               return;
>> +       }
> 
> Does this buy you much? You still have to cope with the timer expiring here
> anyway.

It definitely does from a latency point of view. Programming a timer
that will expire right away, calling the interrupt handler, queuing the
work queue, waiting for the workqueue to be scheduled and finally
delivering the interrupt... If we can catch a few of these early (and we
do), it is worth it.

>> +       timer->armed = true;
>> +       ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
>> +       hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
>> +                     HRTIMER_MODE_ABS);
>> +}
>> +
>> +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
>> +{
>> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>> +
>> +       INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
>> +       hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
>> +       timer->timer.function = kvm_timer_expire;
>> +}
>> +
>> +static void kvm_timer_init_interrupt(void *info)
>> +{
>> +       unsigned int *irqp = info;
>> +
>> +       enable_percpu_irq(*irqp, 0);
> 
> IRQ_TYPE_NONE
> 
>> +}
>> +
>> +
>> +static const struct of_device_id arch_timer_of_match[] = {
>> +       { .compatible   = "arm,armv7-timer",    },
>> +       {},
>> +};
>> +
>> +int kvm_timer_hyp_init(void)
>> +{
>> +       struct device_node *np;
>> +       unsigned int ppi;
>> +       int err;
>> +
>> +       timecounter = arch_timer_get_timecounter();
>> +       if (!timecounter)
>> +               return -ENODEV;
>> +
>> +       np = of_find_matching_node(NULL, arch_timer_of_match);
>> +       if (!np) {
>> +               kvm_err("kvm_arch_timer: can't find DT node\n");
>> +               return -ENODEV;
>> +       }
>> +
>> +       ppi = irq_of_parse_and_map(np, 2);
>> +       if (!ppi) {
>> +               kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       err = request_percpu_irq(ppi, kvm_arch_timer_handler,
>> +                                "kvm guest timer", kvm_get_running_vcpus());
>> +       if (err) {
>> +               kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
>> +                       ppi, err);
>> +               return err;
>> +       }
>> +
>> +       wqueue = create_singlethread_workqueue("kvm_arch_timer");
>> +       if (!wqueue) {
>> +               free_percpu_irq(ppi, kvm_get_running_vcpus());
>> +               return -ENOMEM;
>> +       }
>> +
>> +       kvm_info("%s IRQ%d\n", np->name, ppi);
>> +       on_each_cpu(kvm_timer_init_interrupt, &ppi, 1);
> 
> on_each_cpu currently just returns 0, but you could use that as your return
> value for good measure anyway.

Sure.

>> +       return 0;
>> +}
>> +
>> +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
>> +{
>> +       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
>> +
>> +       hrtimer_cancel(&timer->timer);
>> +       cancel_work_sync(&timer->expired);
>> +}
>> +
>> +int kvm_timer_init(struct kvm *kvm)
>> +{
>> +       if (timecounter && wqueue) {
>> +               kvm->arch.timer.cntvoff = kvm_phys_timer_read();
> 
> Shouldn't this be initialised to 0 and then updated on world switch?

No. You do not want your virtual offset to drift. Otherwise you'll
observe something like time dilatation, and your clocks will drift.
Plus, you really want all your vcpus to be synchronized. Allowing them
to drift apart could be an interesting experience... ;-)

	M.
Will Deacon Nov. 23, 2012, 5 p.m. UTC | #3
On Fri, Nov 23, 2012 at 04:52:12PM +0000, Marc Zyngier wrote:
> On 23/11/12 16:17, Will Deacon wrote:
> >> diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
> >> index b80256b..7463f5b 100644
> >> --- a/arch/arm/kvm/reset.c
> >> +++ b/arch/arm/kvm/reset.c
> >> @@ -37,6 +37,12 @@ static struct kvm_regs a15_regs_reset = {
> >>         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
> >>  };
> >>
> >> +#ifdef CONFIG_KVM_ARM_TIMER
> >> +static const struct kvm_irq_level a15_virt_timer_ppi = {
> >> +       { .irq = 27 },  /* irq: A7/A15 specific */
> >
> > This should be parameterised by the vCPU type.
> 
> This is already A15 specific, and assigned in an A15 specific code
> section below.

Right, but we can take the interrupt number from the device-tree, like we do
for the host anyway.

> >> +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
> >> +{
> >> +       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
> >> +
> >> +       /*
> >> +        * We disable the timer in the world switch and let it be
> >> +        * handled by kvm_timer_sync_from_cpu(). Getting a timer
> >> +        * interrupt at this point is a sure sign of some major
> >> +        * breakage.
> >> +        */
> >> +       pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
> >> +       return IRQ_HANDLED;
> >
> > IRQ_NONE?
> 
> I don't think so. We're actually handling the interrupt (admittedly in a
> very basic way), and as it is a per-cpu interrupt, there will be noone
> else to take care of it.

For an SPI, returning IRQ_NONE would (eventually) silence a screaming
interrupt because the generic IRQ bits would disable it. I'm not sure if that
applies to PPIs or not but if it does, I'd say that's a good reason to use it.

> 
> >> +       BUG_ON(timer->armed);
> >> +
> >> +       if (cval <= now) {
> >> +               /*
> >> +                * Timer has already expired while we were not
> >> +                * looking. Inject the interrupt and carry on.
> >> +                */
> >> +               kvm_timer_inject_irq(vcpu);
> >> +               return;
> >> +       }
> >
> > Does this buy you much? You still have to cope with the timer expiring here
> > anyway.
> 
> It definitely does from a latency point of view. Programming a timer
> that will expire right away, calling the interrupt handler, queuing the
> work queue, waiting for the workqueue to be scheduled and finally
> delivering the interrupt... If we can catch a few of these early (and we
> do), it is worth it.

Ok, interesting. I wasn't sure how often that happened in practice.

> >> +int kvm_timer_init(struct kvm *kvm)
> >> +{
> >> +       if (timecounter && wqueue) {
> >> +               kvm->arch.timer.cntvoff = kvm_phys_timer_read();
> >
> > Shouldn't this be initialised to 0 and then updated on world switch?
> 
> No. You do not want your virtual offset to drift. Otherwise you'll
> observe something like time dilatation, and your clocks will drift.
> Plus, you really want all your vcpus to be synchronized. Allowing them
> to drift apart could be an interesting experience... ;-)

In which case, why do we initialise it to the physical timer in the first
place? Surely the value doesn't matter, as long as everybody agrees on what
it is?

Will
Marc Zyngier Nov. 23, 2012, 5:11 p.m. UTC | #4
On 23/11/12 17:00, Will Deacon wrote:
> On Fri, Nov 23, 2012 at 04:52:12PM +0000, Marc Zyngier wrote:
>> On 23/11/12 16:17, Will Deacon wrote:
>>>> diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
>>>> index b80256b..7463f5b 100644
>>>> --- a/arch/arm/kvm/reset.c
>>>> +++ b/arch/arm/kvm/reset.c
>>>> @@ -37,6 +37,12 @@ static struct kvm_regs a15_regs_reset = {
>>>>         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
>>>>  };
>>>>
>>>> +#ifdef CONFIG_KVM_ARM_TIMER
>>>> +static const struct kvm_irq_level a15_virt_timer_ppi = {
>>>> +       { .irq = 27 },  /* irq: A7/A15 specific */
>>>
>>> This should be parameterised by the vCPU type.
>>
>> This is already A15 specific, and assigned in an A15 specific code
>> section below.
> 
> Right, but we can take the interrupt number from the device-tree, like we do
> for the host anyway.

Certainly. I'll update this bit.

>>>> +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
>>>> +{
>>>> +       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
>>>> +
>>>> +       /*
>>>> +        * We disable the timer in the world switch and let it be
>>>> +        * handled by kvm_timer_sync_from_cpu(). Getting a timer
>>>> +        * interrupt at this point is a sure sign of some major
>>>> +        * breakage.
>>>> +        */
>>>> +       pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
>>>> +       return IRQ_HANDLED;
>>>
>>> IRQ_NONE?
>>
>> I don't think so. We're actually handling the interrupt (admittedly in a
>> very basic way), and as it is a per-cpu interrupt, there will be noone
>> else to take care of it.
> 
> For an SPI, returning IRQ_NONE would (eventually) silence a screaming
> interrupt because the generic IRQ bits would disable it. I'm not sure if that
> applies to PPIs or not but if it does, I'd say that's a good reason to use it.
> 
>>
>>>> +       BUG_ON(timer->armed);
>>>> +
>>>> +       if (cval <= now) {
>>>> +               /*
>>>> +                * Timer has already expired while we were not
>>>> +                * looking. Inject the interrupt and carry on.
>>>> +                */
>>>> +               kvm_timer_inject_irq(vcpu);
>>>> +               return;
>>>> +       }
>>>
>>> Does this buy you much? You still have to cope with the timer expiring here
>>> anyway.
>>
>> It definitely does from a latency point of view. Programming a timer
>> that will expire right away, calling the interrupt handler, queuing the
>> work queue, waiting for the workqueue to be scheduled and finally
>> delivering the interrupt... If we can catch a few of these early (and we
>> do), it is worth it.
> 
> Ok, interesting. I wasn't sure how often that happened in practice.
> 
>>>> +int kvm_timer_init(struct kvm *kvm)
>>>> +{
>>>> +       if (timecounter && wqueue) {
>>>> +               kvm->arch.timer.cntvoff = kvm_phys_timer_read();
>>>
>>> Shouldn't this be initialised to 0 and then updated on world switch?
>>
>> No. You do not want your virtual offset to drift. Otherwise you'll
>> observe something like time dilatation, and your clocks will drift.
>> Plus, you really want all your vcpus to be synchronized. Allowing them
>> to drift apart could be an interesting experience... ;-)
> 
> In which case, why do we initialise it to the physical timer in the first
> place? Surely the value doesn't matter, as long as everybody agrees on what
> it is?

This ensures that the VM gets its virtual counter starting as closely as
possible to 0. As good a convention as any.

	M.
Christoffer Dall Nov. 30, 2012, 11:11 p.m. UTC | #5
On Fri, Nov 23, 2012 at 12:11 PM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 23/11/12 17:00, Will Deacon wrote:
>> On Fri, Nov 23, 2012 at 04:52:12PM +0000, Marc Zyngier wrote:
>>> On 23/11/12 16:17, Will Deacon wrote:
>>>>> diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
>>>>> index b80256b..7463f5b 100644
>>>>> --- a/arch/arm/kvm/reset.c
>>>>> +++ b/arch/arm/kvm/reset.c
>>>>> @@ -37,6 +37,12 @@ static struct kvm_regs a15_regs_reset = {
>>>>>         .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
>>>>>  };
>>>>>
>>>>> +#ifdef CONFIG_KVM_ARM_TIMER
>>>>> +static const struct kvm_irq_level a15_virt_timer_ppi = {
>>>>> +       { .irq = 27 },  /* irq: A7/A15 specific */
>>>>
>>>> This should be parameterised by the vCPU type.
>>>
>>> This is already A15 specific, and assigned in an A15 specific code
>>> section below.
>>
>> Right, but we can take the interrupt number from the device-tree, like we do
>> for the host anyway.
>
> Certainly. I'll update this bit.
>
So we lock ourselves to always only support emulating the same CPU on
the guest as on the host?

(I know we assume this elsewhere and that even the A7 uses the same
IRQ number, but these doesn't necessarily have to be the same in the
future, and there's no need to make such a task more difficult, and we
are not really making things mode stable/simple by taking it from the
DT - are we?)

-Christoffer
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h
index 513b852..bd5e501 100644
--- a/arch/arm/include/asm/kvm_arch_timer.h
+++ b/arch/arm/include/asm/kvm_arch_timer.h
@@ -19,13 +19,68 @@ 
 #ifndef __ASM_ARM_KVM_ARCH_TIMER_H
 #define __ASM_ARM_KVM_ARCH_TIMER_H
 
+#include <linux/clocksource.h>
+#include <linux/hrtimer.h>
+#include <linux/workqueue.h>
+
 struct arch_timer_kvm {
+#ifdef CONFIG_KVM_ARM_TIMER
+	/* Is the timer enabled */
+	bool			enabled;
+
+	/*
+	 * Virtual offset (kernel access it through cntvoff, HYP code
+	 * access it as two 32bit values).
+	 */
+	union {
+		cycle_t		cntvoff;
+		struct {
+			u32	low; 	/* Restored only */
+			u32	high;  	/* Restored only */
+		} cntvoff32;
+	};
+#endif
 };
 
 struct arch_timer_cpu {
+#ifdef CONFIG_KVM_ARM_TIMER
+	/* Registers: control register, timer value */
+	u32				cntv_ctl;	/* Saved/restored */
+	union {
+		cycle_t			cntv_cval;
+		struct {
+			u32		low;		/* Saved/restored */
+			u32		high;		/* Saved/restored */
+		} cntv_cval32;
+	};
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+
+	/* Background timer used when the guest is not running */
+	struct hrtimer			timer;
+
+	/* Work queued with the above timer expires */
+	struct work_struct		expired;
+
+	/* Background timer active */
+	bool				armed;
+
+	/* Timer IRQ */
+	const struct kvm_irq_level	*irq;
+#endif
 };
 
-#ifndef CONFIG_KVM_ARM_TIMER
+#ifdef CONFIG_KVM_ARM_TIMER
+int kvm_timer_hyp_init(void);
+int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu);
+void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu);
+void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+#else
 static inline int kvm_timer_hyp_init(void)
 {
 	return 0;
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b..7463f5b 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -37,6 +37,12 @@  static struct kvm_regs a15_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+#ifdef CONFIG_KVM_ARM_TIMER
+static const struct kvm_irq_level a15_virt_timer_ppi = {
+	{ .irq = 27 },	/* irq: A7/A15 specific */
+	.level = 1	/* level */
+};
+#endif
 
 /*******************************************************************************
  * Exported reset function
@@ -59,6 +65,9 @@  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			return -EINVAL;
 		cpu_reset = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+#ifdef CONFIG_KVM_ARM_TIMER
+		vcpu->arch.timer_cpu.irq = &a15_virt_timer_ppi;
+#endif
 		break;
 	default:
 		return -ENODEV;
diff --git a/arch/arm/kvm/timer.c b/arch/arm/kvm/timer.c
new file mode 100644
index 0000000..a241298
--- /dev/null
+++ b/arch/arm/kvm/timer.c
@@ -0,0 +1,204 @@ 
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/of_irq.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+
+#include <asm/arch_timer.h>
+
+#include <asm/kvm_vgic.h>
+#include <asm/kvm_arch_timer.h>
+
+static struct timecounter *timecounter;
+static struct workqueue_struct *wqueue;
+
+static cycle_t kvm_phys_timer_read(void)
+{
+	return timecounter->cc->read(timecounter->cc);
+}
+
+static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */
+	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+			    vcpu->arch.timer_cpu.irq->irq,
+			    vcpu->arch.timer_cpu.irq->level);
+}
+
+static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
+{
+	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
+
+	/*
+	 * We disable the timer in the world switch and let it be
+	 * handled by kvm_timer_sync_from_cpu(). Getting a timer
+	 * interrupt at this point is a sure sign of some major
+	 * breakage.
+	 */
+	pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
+	return IRQ_HANDLED;
+}
+
+static void kvm_timer_inject_irq_work(struct work_struct *work)
+{
+	struct kvm_vcpu *vcpu;
+
+	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
+	vcpu->arch.timer_cpu.armed = false;
+	kvm_timer_inject_irq(vcpu);
+}
+
+static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
+{
+	struct arch_timer_cpu *timer;
+	timer = container_of(hrt, struct arch_timer_cpu, timer);
+	queue_work(wqueue, &timer->expired);
+	return HRTIMER_NORESTART;
+}
+
+void kvm_timer_sync_to_cpu(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	/*
+	 * We're about to run this vcpu again, so there is no need to
+	 * keep the background timer running, as we're about to
+	 * populate the CPU timer again.
+	 */
+	if (timer->armed) {
+		hrtimer_cancel(&timer->timer);
+		cancel_work_sync(&timer->expired);
+		timer->armed = false;
+	}
+}
+
+void kvm_timer_sync_from_cpu(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	cycle_t cval, now;
+	u64 ns;
+
+	/* Check if the timer is enabled and unmasked first */
+	if ((timer->cntv_ctl & 3) != 1)
+		return;
+
+	cval = timer->cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	BUG_ON(timer->armed);
+
+	if (cval <= now) {
+		/*
+		 * Timer has already expired while we were not
+		 * looking. Inject the interrupt and carry on.
+		 */
+		kvm_timer_inject_irq(vcpu);
+		return;
+	}
+
+	timer->armed = true;
+	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
+	hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
+		      HRTIMER_MODE_ABS);
+}
+
+void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
+	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	timer->timer.function = kvm_timer_expire;
+}
+
+static void kvm_timer_init_interrupt(void *info)
+{
+	unsigned int *irqp = info;
+
+	enable_percpu_irq(*irqp, 0);
+}
+
+
+static const struct of_device_id arch_timer_of_match[] = {
+	{ .compatible	= "arm,armv7-timer",	},
+	{},
+};
+
+int kvm_timer_hyp_init(void)
+{
+	struct device_node *np;
+	unsigned int ppi;
+	int err;
+
+	timecounter = arch_timer_get_timecounter();
+	if (!timecounter)
+		return -ENODEV;
+
+	np = of_find_matching_node(NULL, arch_timer_of_match);
+	if (!np) {
+		kvm_err("kvm_arch_timer: can't find DT node\n");
+		return -ENODEV;
+	}
+
+	ppi = irq_of_parse_and_map(np, 2);
+	if (!ppi) {
+		kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
+		return -EINVAL;
+	}
+
+	err = request_percpu_irq(ppi, kvm_arch_timer_handler,
+				 "kvm guest timer", kvm_get_running_vcpus());
+	if (err) {
+		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
+			ppi, err);
+		return err;
+	}
+
+	wqueue = create_singlethread_workqueue("kvm_arch_timer");
+	if (!wqueue) {
+		free_percpu_irq(ppi, kvm_get_running_vcpus());
+		return -ENOMEM;
+	}
+
+	kvm_info("%s IRQ%d\n", np->name, ppi);
+	on_each_cpu(kvm_timer_init_interrupt, &ppi, 1);
+
+	return 0;
+}
+
+void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	hrtimer_cancel(&timer->timer);
+	cancel_work_sync(&timer->expired);
+}
+
+int kvm_timer_init(struct kvm *kvm)
+{
+	if (timecounter && wqueue) {
+		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+		kvm->arch.timer.enabled = 1;
+	}
+
+	return 0;
+}