Message ID | 20170405092815.22503-4-cdall@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05.04.17 11:28, Christoffer Dall wrote: > From: Alexander Graf <agraf@suse.de> > > If you're running with a userspace gic or other interrupt constroller > (that is no vgic in the kernel), then you have so far not been able to > use the architected timers, because the output of the architected > timers, which are driven inside the kernel, was a kernel-only construct > between the arch timer code and the vgic. > > This patch implements the new KVM_CAP_ARM_USER_IRQ feature, where we use a > side channel on the kvm_run structure, run->s.regs.device_irq_level, to > always notify userspace of the timer output levels when using a userspace > irqchip. > > This works by ensureing that before we enter the guest, if the timer > output level has changed compared to what we last told userspace, we > don't enter the guest, but instead return to userspace to notify it of > the new level. If we are exiting, because of an MMIO for example, and > the level changed at the same time, the value is also updated and > userspace can sample the line as it needs. This is nicely achieved > simply always updating the timer_irq_level field after the main run > loop. > > Note that the kvm_timer_update_irq trace event is changed to show the > host IRQ number for the timer instead of the guest IRQ number, because > the kernel no longer know which IRQ userspace wires up the timer signal > to. > > Also note that this patch implements all required functionality but does > not yet advertise the capability. > > Signed-off-by: Alexander Graf <agraf@suse.de> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > --- > arch/arm/kvm/arm.c | 18 +++---- > include/kvm/arm_arch_timer.h | 2 + > virt/kvm/arm/arch_timer.c | 122 +++++++++++++++++++++++++++++++++++-------- > 3 files changed, 110 insertions(+), 32 deletions(-) > > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 7fa4898..efb16e5 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -515,13 +515,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) > return ret; > } > > - /* > - * Enable the arch timers only if we have an in-kernel VGIC > - * and it has been properly initialized, since we cannot handle > - * interrupts from the virtual timer with a userspace gic. > - */ > - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) > - ret = kvm_timer_enable(vcpu); > + ret = kvm_timer_enable(vcpu); > > return ret; > } > @@ -640,9 +634,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > local_irq_disable(); > > /* > - * Re-check atomic conditions > + * If we have a singal pending, or need to notify a userspace > + * irqchip about timer level changes, then we exit (and update > + * the timer level state in kvm_timer_update_run below). > */ > - if (signal_pending(current)) { > + if (signal_pending(current) || > + kvm_timer_should_notify_user(vcpu)) { > ret = -EINTR; > run->exit_reason = KVM_EXIT_INTR; > } > @@ -714,6 +711,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > ret = handle_exit(vcpu, run, ret); > } > > + /* Tell userspace about in-kernel device output levels */ > + kvm_timer_update_run(vcpu); > + > if (vcpu->sigset_active) > sigprocmask(SIG_SETMASK, &sigsaved, NULL); > return ret; > diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h > index fe797d6..295584f 100644 > --- a/include/kvm/arm_arch_timer.h > +++ b/include/kvm/arm_arch_timer.h > @@ -63,6 +63,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, > void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); > void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); > void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); > +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); > +void kvm_timer_update_run(struct kvm_vcpu *vcpu); > void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); > > u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); > diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c > index 363f0d2..5dc2167 100644 > --- a/virt/kvm/arm/arch_timer.c > +++ b/virt/kvm/arm/arch_timer.c > @@ -184,6 +184,27 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) > return cval <= now; > } > > +/* > + * Reflect the timer output level into the kvm_run structure > + */ > +void kvm_timer_update_run(struct kvm_vcpu *vcpu) > +{ > + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); > + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); > + struct kvm_sync_regs *regs = &vcpu->run->s.regs; > + > + if (likely(irqchip_in_kernel(vcpu->kvm))) > + return; > + > + /* Populate the device bitmap with the timer states */ > + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | > + KVM_ARM_DEV_EL1_PTIMER); > + if (vtimer->irq.level) > + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; > + if (ptimer->irq.level) > + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; > +} > + > static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, > struct arch_timer_context *timer_ctx) > { > @@ -194,9 +215,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, > trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, > timer_ctx->irq.level); > > - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, > - timer_ctx->irq.level); > - WARN_ON(ret); > + if (likely(irqchip_in_kernel(vcpu->kvm))) { > + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, > + timer_ctx->irq.irq, > + timer_ctx->irq.level); > + WARN_ON(ret); > + } > } > > /* > @@ -215,7 +239,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) > * because the guest would never see the interrupt. Instead wait > * until we call this function from kvm_timer_flush_hwstate. > */ > - if (!timer->enabled) > + if (unlikely(!timer->enabled)) > return; > > if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) > @@ -282,28 +306,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) > timer_disarm(timer); > } > > -/** > - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu > - * @vcpu: The vcpu pointer > - * > - * Check if the virtual timer has expired while we were running in the host, > - * and inject an interrupt if that was the case. > - */ > -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) > +static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) > { > - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); > bool phys_active; > int ret; > > - if (unlikely(!timer->enabled)) > - return; > - > - kvm_timer_update_state(vcpu); > - > - /* Set the background timer for the physical timer emulation. */ > - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); > - > /* > * If we enter the guest with the virtual input level to the VGIC > * asserted, then we have already told the VGIC what we need to, and > @@ -355,11 +363,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) > vtimer->active_cleared_last = !phys_active; > } > > +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) > +{ > + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); > + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); > + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; > + bool vlevel, plevel; > + > + if (likely(irqchip_in_kernel(vcpu->kvm))) > + return false; > + > + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; > + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; > + > + return vtimer->irq.level != vlevel || > + ptimer->irq.level != plevel; > +} > + > +static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) > +{ > + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); > + > + /* > + * To prevent continuously exiting from the guest, we mask the > + * physical interrupt such that the guest can make forward progress. > + * Once we detect the output level being deasserted, we unmask the > + * interrupt again so that we exit from the guest when the timer > + * fires. > + */ > + if (vtimer->irq.level) > + disable_percpu_irq(host_vtimer_irq); > + else > + enable_percpu_irq(host_vtimer_irq, 0); > +} > + > +/** > + * kvm_timer_flush_hwstate - prepare timers before running the vcpu > + * @vcpu: The vcpu pointer > + * > + * Check if the virtual timer has expired while we were running in the host, > + * and inject an interrupt if that was the case, making sure the timer is > + * masked or disabled on the host so that we keep executing. Also schedule a > + * software timer for the physical timer if it is enabled. > + */ > +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + > + if (unlikely(!timer->enabled)) > + return; > + > + kvm_timer_update_state(vcpu); > + > + /* Set the background timer for the physical timer emulation. */ > + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); > + > + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) > + kvm_timer_flush_hwstate_user(vcpu); > + else > + kvm_timer_flush_hwstate_vgic(vcpu); > +} > + > /** > * kvm_timer_sync_hwstate - sync timer state from cpu > * @vcpu: The vcpu pointer > * > - * Check if the virtual timer has expired while we were running in the guest, > + * Check if any of the timers have expired while we were running in the guest, > * and inject an interrupt if that was the case. > */ > void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) > @@ -559,6 +628,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) > if (timer->enabled) > return 0; > > + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ > + if (!irqchip_in_kernel(vcpu->kvm)) > + goto no_vgic; > + > + if (!vgic_initialized(vcpu->kvm)) > + return -ENODEV; > + > /* > * Find the physical IRQ number corresponding to the host_vtimer_irq > */ > @@ -582,8 +658,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) > if (ret) > return ret; > > +no_vgic: > timer->enabled = 1; What happens if 1) User space spawns a VM with user space irqchip 2) Runs the VM 3) Then adds a virtual gic device Would we then access wrong data because we don't run through kvm_timer_enable() because timer->enabled is set? Would thus (unprivileged) user space be able to DOS the host system? Alex
On 06/04/17 09:16, Alexander Graf wrote: > > > On 05.04.17 11:28, Christoffer Dall wrote: >> From: Alexander Graf <agraf@suse.de> >> >> If you're running with a userspace gic or other interrupt constroller >> (that is no vgic in the kernel), then you have so far not been able to >> use the architected timers, because the output of the architected >> timers, which are driven inside the kernel, was a kernel-only construct >> between the arch timer code and the vgic. >> >> This patch implements the new KVM_CAP_ARM_USER_IRQ feature, where we use a >> side channel on the kvm_run structure, run->s.regs.device_irq_level, to >> always notify userspace of the timer output levels when using a userspace >> irqchip. >> >> This works by ensureing that before we enter the guest, if the timer >> output level has changed compared to what we last told userspace, we >> don't enter the guest, but instead return to userspace to notify it of >> the new level. If we are exiting, because of an MMIO for example, and >> the level changed at the same time, the value is also updated and >> userspace can sample the line as it needs. This is nicely achieved >> simply always updating the timer_irq_level field after the main run >> loop. >> >> Note that the kvm_timer_update_irq trace event is changed to show the >> host IRQ number for the timer instead of the guest IRQ number, because >> the kernel no longer know which IRQ userspace wires up the timer signal >> to. >> >> Also note that this patch implements all required functionality but does >> not yet advertise the capability. >> >> Signed-off-by: Alexander Graf <agraf@suse.de> >> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> >> --- >> arch/arm/kvm/arm.c | 18 +++---- >> include/kvm/arm_arch_timer.h | 2 + >> virt/kvm/arm/arch_timer.c | 122 +++++++++++++++++++++++++++++++++++-------- >> 3 files changed, 110 insertions(+), 32 deletions(-) >> >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 7fa4898..efb16e5 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -515,13 +515,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) >> return ret; >> } >> >> - /* >> - * Enable the arch timers only if we have an in-kernel VGIC >> - * and it has been properly initialized, since we cannot handle >> - * interrupts from the virtual timer with a userspace gic. >> - */ >> - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) >> - ret = kvm_timer_enable(vcpu); >> + ret = kvm_timer_enable(vcpu); >> >> return ret; >> } >> @@ -640,9 +634,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) >> local_irq_disable(); >> >> /* >> - * Re-check atomic conditions >> + * If we have a singal pending, or need to notify a userspace >> + * irqchip about timer level changes, then we exit (and update >> + * the timer level state in kvm_timer_update_run below). >> */ >> - if (signal_pending(current)) { >> + if (signal_pending(current) || >> + kvm_timer_should_notify_user(vcpu)) { >> ret = -EINTR; >> run->exit_reason = KVM_EXIT_INTR; >> } >> @@ -714,6 +711,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) >> ret = handle_exit(vcpu, run, ret); >> } >> >> + /* Tell userspace about in-kernel device output levels */ >> + kvm_timer_update_run(vcpu); >> + >> if (vcpu->sigset_active) >> sigprocmask(SIG_SETMASK, &sigsaved, NULL); >> return ret; >> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h >> index fe797d6..295584f 100644 >> --- a/include/kvm/arm_arch_timer.h >> +++ b/include/kvm/arm_arch_timer.h >> @@ -63,6 +63,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, >> void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); >> void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); >> void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); >> +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); >> +void kvm_timer_update_run(struct kvm_vcpu *vcpu); >> void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); >> >> u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); >> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c >> index 363f0d2..5dc2167 100644 >> --- a/virt/kvm/arm/arch_timer.c >> +++ b/virt/kvm/arm/arch_timer.c >> @@ -184,6 +184,27 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) >> return cval <= now; >> } >> >> +/* >> + * Reflect the timer output level into the kvm_run structure >> + */ >> +void kvm_timer_update_run(struct kvm_vcpu *vcpu) >> +{ >> + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); >> + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); >> + struct kvm_sync_regs *regs = &vcpu->run->s.regs; >> + >> + if (likely(irqchip_in_kernel(vcpu->kvm))) >> + return; >> + >> + /* Populate the device bitmap with the timer states */ >> + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | >> + KVM_ARM_DEV_EL1_PTIMER); >> + if (vtimer->irq.level) >> + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; >> + if (ptimer->irq.level) >> + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; >> +} >> + >> static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, >> struct arch_timer_context *timer_ctx) >> { >> @@ -194,9 +215,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, >> trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, >> timer_ctx->irq.level); >> >> - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, >> - timer_ctx->irq.level); >> - WARN_ON(ret); >> + if (likely(irqchip_in_kernel(vcpu->kvm))) { >> + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, >> + timer_ctx->irq.irq, >> + timer_ctx->irq.level); >> + WARN_ON(ret); >> + } >> } >> >> /* >> @@ -215,7 +239,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) >> * because the guest would never see the interrupt. Instead wait >> * until we call this function from kvm_timer_flush_hwstate. >> */ >> - if (!timer->enabled) >> + if (unlikely(!timer->enabled)) >> return; >> >> if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) >> @@ -282,28 +306,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) >> timer_disarm(timer); >> } >> >> -/** >> - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu >> - * @vcpu: The vcpu pointer >> - * >> - * Check if the virtual timer has expired while we were running in the host, >> - * and inject an interrupt if that was the case. >> - */ >> -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) >> +static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) >> { >> - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >> struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); >> bool phys_active; >> int ret; >> >> - if (unlikely(!timer->enabled)) >> - return; >> - >> - kvm_timer_update_state(vcpu); >> - >> - /* Set the background timer for the physical timer emulation. */ >> - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); >> - >> /* >> * If we enter the guest with the virtual input level to the VGIC >> * asserted, then we have already told the VGIC what we need to, and >> @@ -355,11 +363,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) >> vtimer->active_cleared_last = !phys_active; >> } >> >> +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) >> +{ >> + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); >> + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); >> + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; >> + bool vlevel, plevel; >> + >> + if (likely(irqchip_in_kernel(vcpu->kvm))) >> + return false; >> + >> + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; >> + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; >> + >> + return vtimer->irq.level != vlevel || >> + ptimer->irq.level != plevel; >> +} >> + >> +static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) >> +{ >> + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); >> + >> + /* >> + * To prevent continuously exiting from the guest, we mask the >> + * physical interrupt such that the guest can make forward progress. >> + * Once we detect the output level being deasserted, we unmask the >> + * interrupt again so that we exit from the guest when the timer >> + * fires. >> + */ >> + if (vtimer->irq.level) >> + disable_percpu_irq(host_vtimer_irq); >> + else >> + enable_percpu_irq(host_vtimer_irq, 0); >> +} >> + >> +/** >> + * kvm_timer_flush_hwstate - prepare timers before running the vcpu >> + * @vcpu: The vcpu pointer >> + * >> + * Check if the virtual timer has expired while we were running in the host, >> + * and inject an interrupt if that was the case, making sure the timer is >> + * masked or disabled on the host so that we keep executing. Also schedule a >> + * software timer for the physical timer if it is enabled. >> + */ >> +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) >> +{ >> + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; >> + >> + if (unlikely(!timer->enabled)) >> + return; >> + >> + kvm_timer_update_state(vcpu); >> + >> + /* Set the background timer for the physical timer emulation. */ >> + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); >> + >> + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) >> + kvm_timer_flush_hwstate_user(vcpu); >> + else >> + kvm_timer_flush_hwstate_vgic(vcpu); >> +} >> + >> /** >> * kvm_timer_sync_hwstate - sync timer state from cpu >> * @vcpu: The vcpu pointer >> * >> - * Check if the virtual timer has expired while we were running in the guest, >> + * Check if any of the timers have expired while we were running in the guest, >> * and inject an interrupt if that was the case. >> */ >> void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) >> @@ -559,6 +628,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) >> if (timer->enabled) >> return 0; >> >> + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ >> + if (!irqchip_in_kernel(vcpu->kvm)) >> + goto no_vgic; >> + >> + if (!vgic_initialized(vcpu->kvm)) >> + return -ENODEV; >> + >> /* >> * Find the physical IRQ number corresponding to the host_vtimer_irq >> */ >> @@ -582,8 +658,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) >> if (ret) >> return ret; >> >> +no_vgic: >> timer->enabled = 1; > > What happens if > > 1) User space spawns a VM with user space irqchip > 2) Runs the VM > 3) Then adds a virtual gic device As soon as a vcpu has run once, it is not possible to instantiate a vgic (see virt/kvm/arm/vgic/vgic-init.c:kvm_vgic_create around line 101). Thanks, M.
On 06.04.17 10:25, Marc Zyngier wrote: > On 06/04/17 09:16, Alexander Graf wrote: >> >> >> On 05.04.17 11:28, Christoffer Dall wrote: >>> From: Alexander Graf <agraf@suse.de> >>> >>> @@ -559,6 +628,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) >>> if (timer->enabled) >>> return 0; >>> >>> + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ >>> + if (!irqchip_in_kernel(vcpu->kvm)) >>> + goto no_vgic; >>> + >>> + if (!vgic_initialized(vcpu->kvm)) >>> + return -ENODEV; >>> + >>> /* >>> * Find the physical IRQ number corresponding to the host_vtimer_irq >>> */ >>> @@ -582,8 +658,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) >>> if (ret) >>> return ret; >>> >>> +no_vgic: >>> timer->enabled = 1; >> >> What happens if >> >> 1) User space spawns a VM with user space irqchip >> 2) Runs the VM >> 3) Then adds a virtual gic device > > As soon as a vcpu has run once, it is not possible to instantiate a vgic > (see virt/kvm/arm/vgic/vgic-init.c:kvm_vgic_create around line 101). Ah, I was missing that part. Awesome, all problems solved :). Alex
On 05/04/17 10:28, Christoffer Dall wrote: > From: Alexander Graf <agraf@suse.de> > > If you're running with a userspace gic or other interrupt constroller controller > (that is no vgic in the kernel), then you have so far not been able to > use the architected timers, because the output of the architected > timers, which are driven inside the kernel, was a kernel-only construct > between the arch timer code and the vgic. > > This patch implements the new KVM_CAP_ARM_USER_IRQ feature, where we use a > side channel on the kvm_run structure, run->s.regs.device_irq_level, to > always notify userspace of the timer output levels when using a userspace > irqchip. > > This works by ensureing that before we enter the guest, if the timer ensuring > output level has changed compared to what we last told userspace, we > don't enter the guest, but instead return to userspace to notify it of > the new level. If we are exiting, because of an MMIO for example, and > the level changed at the same time, the value is also updated and > userspace can sample the line as it needs. This is nicely achieved > simply always updating the timer_irq_level field after the main run > loop. > > Note that the kvm_timer_update_irq trace event is changed to show the > host IRQ number for the timer instead of the guest IRQ number, because > the kernel no longer know which IRQ userspace wires up the timer signal > to. > > Also note that this patch implements all required functionality but does > not yet advertise the capability. > > Signed-off-by: Alexander Graf <agraf@suse.de> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> Otherwise looks good. M.
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 7fa4898..efb16e5 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -515,13 +515,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Enable the arch timers only if we have an in-kernel VGIC - * and it has been properly initialized, since we cannot handle - * interrupts from the virtual timer with a userspace gic. - */ - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) - ret = kvm_timer_enable(vcpu); + ret = kvm_timer_enable(vcpu); return ret; } @@ -640,9 +634,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_disable(); /* - * Re-check atomic conditions + * If we have a singal pending, or need to notify a userspace + * irqchip about timer level changes, then we exit (and update + * the timer level state in kvm_timer_update_run below). */ - if (signal_pending(current)) { + if (signal_pending(current) || + kvm_timer_should_notify_user(vcpu)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; } @@ -714,6 +711,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = handle_exit(vcpu, run, ret); } + /* Tell userspace about in-kernel device output levels */ + kvm_timer_update_run(vcpu); + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); return ret; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index fe797d6..295584f 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -63,6 +63,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu); +void kvm_timer_update_run(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 363f0d2..5dc2167 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -184,6 +184,27 @@ bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) return cval <= now; } +/* + * Reflect the timer output level into the kvm_run structure + */ +void kvm_timer_update_run(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return; + + /* Populate the device bitmap with the timer states */ + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | + KVM_ARM_DEV_EL1_PTIMER); + if (vtimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; + if (ptimer->irq.level) + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; +} + static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx) { @@ -194,9 +215,12 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); - WARN_ON(ret); + if (likely(irqchip_in_kernel(vcpu->kvm))) { + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer_ctx->irq.irq, + timer_ctx->irq.level); + WARN_ON(ret); + } } /* @@ -215,7 +239,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) * because the guest would never see the interrupt. Instead wait * until we call this function from kvm_timer_flush_hwstate. */ - if (!timer->enabled) + if (unlikely(!timer->enabled)) return; if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) @@ -282,28 +306,12 @@ void kvm_timer_unschedule(struct kvm_vcpu *vcpu) timer_disarm(timer); } -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu - * @vcpu: The vcpu pointer - * - * Check if the virtual timer has expired while we were running in the host, - * and inject an interrupt if that was the case. - */ -void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +static void kvm_timer_flush_hwstate_vgic(struct kvm_vcpu *vcpu) { - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); bool phys_active; int ret; - if (unlikely(!timer->enabled)) - return; - - kvm_timer_update_state(vcpu); - - /* Set the background timer for the physical timer emulation. */ - kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); - /* * If we enter the guest with the virtual input level to the VGIC * asserted, then we have already told the VGIC what we need to, and @@ -355,11 +363,72 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) vtimer->active_cleared_last = !phys_active; } +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool vlevel, plevel; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; + + return vtimer->irq.level != vlevel || + ptimer->irq.level != plevel; +} + +static void kvm_timer_flush_hwstate_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * To prevent continuously exiting from the guest, we mask the + * physical interrupt such that the guest can make forward progress. + * Once we detect the output level being deasserted, we unmask the + * interrupt again so that we exit from the guest when the timer + * fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, 0); +} + +/** + * kvm_timer_flush_hwstate - prepare timers before running the vcpu + * @vcpu: The vcpu pointer + * + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case, making sure the timer is + * masked or disabled on the host so that we keep executing. Also schedule a + * software timer for the physical timer if it is enabled. + */ +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + if (unlikely(!timer->enabled)) + return; + + kvm_timer_update_state(vcpu); + + /* Set the background timer for the physical timer emulation. */ + kvm_timer_emulate(vcpu, vcpu_ptimer(vcpu)); + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + kvm_timer_flush_hwstate_user(vcpu); + else + kvm_timer_flush_hwstate_vgic(vcpu); +} + /** * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer has expired while we were running in the guest, + * Check if any of the timers have expired while we were running in the guest, * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) @@ -559,6 +628,13 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (timer->enabled) return 0; + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ + if (!irqchip_in_kernel(vcpu->kvm)) + goto no_vgic; + + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -582,8 +658,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; +no_vgic: timer->enabled = 1; - return 0; }