diff mbox

[v3,2/3] Detect vGIC presence at runtime

Message ID cad42f98c38c042430da47bf69ac2db81cb957af.1438770954.git.p.fedin@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Fedin Aug. 5, 2015, 10:53 a.m. UTC
Before commit 662d9715840aef44dcb573b0f9fab9e8319c868a is was possible to
compile the kernel without vGIC and vTimer support. Commit message says
about possibility to detect vGIC support in runtine, but this has never
been implemented.

This patch introdices runtime check, restoring the lost functionality. It
again allows to use KVM on hardware without vGIC. Interrupt controller has
to be emulated in userspace in this case.

-ENODEV return code from probe function means there's no GIC at all.
-ENXIO happens when, for example, there is GIC node in the device tree,
but it does not specify vGIC resources. Normally this means that vGIC
hardware is defunct. Any other error code is still treated as full stop
because it might mean some really serious problems.

This patch does not touch any virtual timer code, suggesting that timer
hardware is actually in place. Normally on boards in question it is true,
however since vGIC is missing, it is impossible to correctly utilize
interrupts from the virtual timer. Since virtual timer handling is in
active redevelopment now, handling in it userspace is out of scope at
the moment. The guest is currently suggested to use some memory-mapped
timer which can be emulated in userspace.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
---
 arch/arm/kvm/arm.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

Comments

Marc Zyngier Aug. 14, 2015, 11:26 a.m. UTC | #1
On 05/08/15 11:53, Pavel Fedin wrote:
> Before commit 662d9715840aef44dcb573b0f9fab9e8319c868a is was possible to
> compile the kernel without vGIC and vTimer support. Commit message says
> about possibility to detect vGIC support in runtine, but this has never
> been implemented.
> 
> This patch introdices runtime check, restoring the lost functionality. It
> again allows to use KVM on hardware without vGIC. Interrupt controller has
> to be emulated in userspace in this case.
> 
> -ENODEV return code from probe function means there's no GIC at all.
> -ENXIO happens when, for example, there is GIC node in the device tree,
> but it does not specify vGIC resources. Normally this means that vGIC
> hardware is defunct. Any other error code is still treated as full stop
> because it might mean some really serious problems.
> 
> This patch does not touch any virtual timer code, suggesting that timer

And that's a problem, see below.

> hardware is actually in place. Normally on boards in question it is true,
> however since vGIC is missing, it is impossible to correctly utilize
> interrupts from the virtual timer. Since virtual timer handling is in
> active redevelopment now, handling in it userspace is out of scope at
> the moment. The guest is currently suggested to use some memory-mapped
> timer which can be emulated in userspace.
> 
> Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
> ---
>  arch/arm/kvm/arm.c | 17 +++++++++++++++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 199a50a..1039161 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -61,6 +61,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
>  static u8 kvm_next_vmid;
>  static DEFINE_SPINLOCK(kvm_vmid_lock);
>  
> +static bool vgic_present;
> +
>  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
>  {
>  	BUG_ON(preemptible());
> @@ -131,7 +133,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	kvm->arch.vmid_gen = 0;
>  
>  	/* The maximum number of VCPUs is limited by the host's GIC model */
> -	kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
> +	kvm->arch.max_vcpus = vgic_present ?
> +				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>  
>  	return ret;
>  out_free_stage2_pgd:
> @@ -171,6 +174,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	int r;
>  	switch (ext) {
>  	case KVM_CAP_IRQCHIP:
> +		r = vgic_present;
> +		break;
>  	case KVM_CAP_IOEVENTFD:
>  	case KVM_CAP_DEVICE_CTRL:
>  	case KVM_CAP_USER_MEMORY:
> @@ -849,6 +854,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
>  
>  	switch (dev_id) {
>  	case KVM_ARM_DEVICE_VGIC_V2:
> +		if (!vgic_present)
> +			return -ENXIO;
>  		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
>  	default:
>  		return -ENODEV;
> @@ -863,6 +870,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  
>  	switch (ioctl) {
>  	case KVM_CREATE_IRQCHIP: {
> +		if (!vgic_present)
> +			return -ENXIO;
>  		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
>  	}
>  	case KVM_ARM_SET_DEVICE_ADDR: {
> @@ -1045,8 +1054,12 @@ static int init_hyp_mode(void)
>  	 * Init HYP view of VGIC
>  	 */
>  	err = kvm_vgic_hyp_init();
> -	if (err)
> +	if (err == -ENODEV || err == -ENXIO)
> +		vgic_present = false;

Which is the default value, isn't it?

> +	else if (err)
>  		goto out_free_context;
> +	else
> +		vgic_present = true;

This is fairly unreadable. Please use a switch statement instead.

>  
>  	/*
>  	 * Init HYP architected timer support
> 

And here, we're going to assume that the arch timer still usable. We
definitely need a way to *prevent* the timer to be used when there is no
GIC. Otherwise, we're going to start trying to setup the mapping for the
active state, and the guest may start poking it.

Timer and GIC are really tied to each other. If you start making one
optional, you need to carry on working the dependency chain.

Thanks,

	M.
Pavel Fedin Aug. 14, 2015, 12:26 p.m. UTC | #2
Hello! Thank you for quick response.

> This is fairly unreadable. Please use a switch statement instead.

 Christoffer disliked it in v1, so i thought a bit and changed it. Ok, will change it back.

> And here, we're going to assume that the arch timer still usable. We
> definitely need a way to *prevent* the timer to be used when there is no
> GIC. Otherwise, we're going to start trying to setup the mapping for the
> active state, and the guest may start poking it.

But, this seems to be already done, isn't it?
According to http://lxr.free-electrons.com/source/arch/arm/kvm/arm.c#L439:
--- cut ---
459         /*
460          * Enable the arch timers only if we have an in-kernel VGIC
461          * and it has been properly initialized, since we cannot handle
462          * interrupts from the virtual timer with a userspace gic.
463          */
464         if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
465                 kvm_timer_enable(kvm);
--- cut ---
 
 Without kvm->arch.timer.enabled set to 1 by kvm_timer_enable() VM context save/restore code will
not actually touch timer registers. Therefore the host part of the code will not do anything.
 As to guest itself, only userspace can stop it from accessing timer registers. My experimental qemu
does this by removing generic timer node from guest's device tree. Virtual timer access simply
cannot be trapped, otherwise there would be no problem at all. But, OK, even if the guest programs
timer, we will just see "Unexpected IRQ 27" on the console, and the guest will not work, so it's not
terribly fatal.

 You know, i actually looked at it before posting v3. I tried to omit kvm_timer_hyp_init() call too,
and got lots of crashes because:
1. kvm_timer_init() is called unconditionally
2. qemu does some initialization of timer registers unconditionally using ioctl, and they end up in
kvm_arm_timer_set_reg()
 Both of these points end up in kvm_phys_timer_read() which dereferences timecounter == NULL.
 Well, i could make kvm_phys_timer_read() just returning 0 in this case, but this could mis-trigger
kvm_timer_should_fire() in some circumstances. I would have to patch it too... At this point i
decided to stop because the result perhaps does not worth the effort and amount of patching.

 While writing this message i was walking through this code once again, and... I have a suggestion.
Actually, if we are really paranoid, we could be afraid of kvm_vgic_inject_irq() being called, which
would do some weird things without vGIC. It is possible to add a check for kvm->arch.timer.enabled
in kvm_timer_sync_hwstate() and kvm_timer_flush_hwstate(). If the timer is disabled those functions
will simply return doing nothing. This would guarantee that interrupt injection is never attempted.

 What do you think?

 And some more. Actually, it is possible to emulate generic timer in userspace, just not the virtual
one. IIRC access to physical timer can be trapped. So, if we modify guest's device tree by removing
virtual timer IRQ, the guest will fall back to physical timer. And this will be caught by the
hypervisor. After this all we have to do is to add corresponding exit code which would allow the
userspace to emulate missing CP15 (or system in case of ARM64) registers. So, this timer issue is
not grave, just i postpone implementing it until GIC issues are settled down.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 14, 2015, 1:10 p.m. UTC | #3
On 14/08/15 13:26, Pavel Fedin wrote:
>  Hello! Thank you for quick response.
> 
>> This is fairly unreadable. Please use a switch statement instead.
> 
>  Christoffer disliked it in v1, so i thought a bit and changed it. Ok, will change it back.
> 
>> And here, we're going to assume that the arch timer still usable. We
>> definitely need a way to *prevent* the timer to be used when there is no
>> GIC. Otherwise, we're going to start trying to setup the mapping for the
>> active state, and the guest may start poking it.
> 
> But, this seems to be already done, isn't it?
> According to http://lxr.free-electrons.com/source/arch/arm/kvm/arm.c#L439:
> --- cut ---
> 459         /*
> 460          * Enable the arch timers only if we have an in-kernel VGIC
> 461          * and it has been properly initialized, since we cannot handle
> 462          * interrupts from the virtual timer with a userspace gic.
> 463          */
> 464         if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
> 465                 kvm_timer_enable(kvm);
> --- cut ---

Right, I failed to remember that one. Sorry. It should be safe then.
Hopefully.

[...]

>  And some more. Actually, it is possible to emulate generic timer in userspace, just not the virtual
> one. IIRC access to physical timer can be trapped. So, if we modify guest's device tree by removing
> virtual timer IRQ, the guest will fall back to physical timer. And this will be caught by the
> hypervisor. After this all we have to do is to add corresponding exit code which would allow the
> userspace to emulate missing CP15 (or system in case of ARM64) registers. So, this timer issue is
> not grave, just i postpone implementing it until GIC issues are settled down.

This is completely Linux-specific, unfortunately. And it relies on
userpace to expose a modified DT, so you need to be able to report back
to userspace that you can't deal with the virtual timer.

Which brings me to the next point: how do you tell userspace that your
timers are non-functional?

Thanks,

	M.
Pavel Fedin Aug. 14, 2015, 1:41 p.m. UTC | #4
Hello!

> This is completely Linux-specific, unfortunately.

 Yes. But better than nothing.

> And it relies on
> userpace to expose a modified DT, so you need to be able to report back
> to userspace that you can't deal with the virtual timer.

 Easy. If KVM_CAP_IRQCHIP == 0, then we apparently don't have vGIC, and since we know that vGIC and
vTimer are paired, we know that there is no vTimer too.

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 199a50a..1039161 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,6 +61,8 @@  static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static bool vgic_present;
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -131,7 +133,8 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
+	kvm->arch.max_vcpus = vgic_present ?
+				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 
 	return ret;
 out_free_stage2_pgd:
@@ -171,6 +174,8 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
+		r = vgic_present;
+		break;
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
@@ -849,6 +854,8 @@  static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
 	switch (dev_id) {
 	case KVM_ARM_DEVICE_VGIC_V2:
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
@@ -863,6 +870,8 @@  long kvm_arch_vm_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
@@ -1045,8 +1054,12 @@  static int init_hyp_mode(void)
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
-	if (err)
+	if (err == -ENODEV || err == -ENXIO)
+		vgic_present = false;
+	else if (err)
 		goto out_free_context;
+	else
+		vgic_present = true;
 
 	/*
 	 * Init HYP architected timer support