diff mbox

[v3,09/11] KVM: arm/arm64: vgic: Prevent userspace injection of a mapped interrupt

Message ID 1437753309-17989-10-git-send-email-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marc Zyngier July 24, 2015, 3:55 p.m. UTC
Virtual interrupts mapped to a HW interrupt should only be triggered
from inside the kernel. Otherwise, you could end up confusing the
kernel (and the GIC's) state machine.

Rearrange the injection path so that kvm_vgic_inject_irq is
used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
used for mapped interrupts. The latter should only be called from
inside the kernel (timer, VFIO).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h |  2 +
 virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 70 insertions(+), 31 deletions(-)

Comments

Christoffer Dall Aug. 4, 2015, 1:45 p.m. UTC | #1
On Fri, Jul 24, 2015 at 04:55:07PM +0100, Marc Zyngier wrote:
> Virtual interrupts mapped to a HW interrupt should only be triggered
> from inside the kernel. Otherwise, you could end up confusing the
> kernel (and the GIC's) state machine.
> 
> Rearrange the injection path so that kvm_vgic_inject_irq is
> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
> used for mapped interrupts. The latter should only be called from
> inside the kernel (timer, VFIO).
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  include/kvm/arm_vgic.h |  2 +
>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 70 insertions(+), 31 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 7306b4b..f6bfd79 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>  			bool level);
> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> +			       struct irq_phys_map *map, bool level);
>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 3f7b690..e40ef70 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>  }
>  
>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> -				  unsigned int irq_num, bool level)
> +				   struct irq_phys_map *map,
> +				   unsigned int irq_num, bool level)
>  {
>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  	struct kvm_vcpu *vcpu;
> @@ -1541,6 +1542,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	int enabled;
>  	bool ret = true, can_inject = true;
>  
> +	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> +		return -EINVAL;
> +
>  	spin_lock(&dist->lock);
>  
>  	vcpu = kvm_get_vcpu(kvm, cpuid);
> @@ -1603,14 +1607,42 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  out:
>  	spin_unlock(&dist->lock);
>  
> -	return ret ? cpuid : -EINVAL;
> +	if (!ret) {

don't you mean if (ret) here?  hint: ret is a bool

> +		/* kick the specified vcpu */
> +		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
> +	}
> +
> +	return 0;

isn't this a change in the internal API?
Before, we would return -EINVAL when ret is false.  Not sure if this
has any consequences though?

> +}
> +
> +static int vgic_lazy_init(struct kvm *kvm)
> +{
> +	int ret = 0;
> +
> +	if (unlikely(!vgic_initialized(kvm))) {
> +		/*
> +		 * We only provide the automatic initialization of the VGIC
> +		 * for the legacy case of a GICv2. Any other type must
> +		 * be explicitly initialized once setup with the respective
> +		 * KVM device call.
> +		 */
> +		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
> +			return -EBUSY;
> +
> +		mutex_lock(&kvm->lock);
> +		ret = vgic_init(kvm);
> +		mutex_unlock(&kvm->lock);
> +	}
> +
> +	return ret;
>  }
>  
>  /**
>   * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
>   * @kvm:     The VM structure pointer
>   * @cpuid:   The CPU for PPIs
> - * @irq_num: The IRQ number that is assigned to the device
> + * @irq_num: The IRQ number that is assigned to the device. This IRQ
> + *           must not be mapped to a HW interrupt.
>   * @level:   Edge-triggered:  true:  to trigger the interrupt
>   *			      false: to ignore the call
>   *	     Level-sensitive  true:  activates an interrupt
> @@ -1623,39 +1655,44 @@ out:
>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>  			bool level)
>  {
> -	int ret = 0;
> -	int vcpu_id;
> -
> -	if (unlikely(!vgic_initialized(kvm))) {
> -		/*
> -		 * We only provide the automatic initialization of the VGIC
> -		 * for the legacy case of a GICv2. Any other type must
> -		 * be explicitly initialized once setup with the respective
> -		 * KVM device call.
> -		 */
> -		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -		mutex_lock(&kvm->lock);
> -		ret = vgic_init(kvm);
> -		mutex_unlock(&kvm->lock);
> +	struct irq_phys_map *map;
> +	int ret;
>  
> -		if (ret)
> -			goto out;
> -	}
> +	ret = vgic_lazy_init(kvm);
> +	if (ret)
> +		return ret;
>  
> -	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> +	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
> +	if (map)
>  		return -EINVAL;
>  
> -	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
> -	if (vcpu_id >= 0) {
> -		/* kick the specified vcpu */
> -		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
> -	}
> +	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
> +}
>  
> -out:
> -	return ret;
> +/**
> + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
> + * @kvm:     The VM structure pointer
> + * @cpuid:   The CPU for PPIs
> + * @map:     Pointer to a irq_phys_map structure describing the mapping
> + * @level:   Edge-triggered:  true:  to trigger the interrupt
> + *			      false: to ignore the call
> + *	     Level-sensitive  true:  activates an interrupt
> + *			      false: deactivates an interrupt

just noticed this unfortunate use of the words 'activate/deactivate'
here, which is not true, it just raises/lowers the input signal...

> + *
> + * The GIC is not concerned with devices being active-LOW or active-HIGH for
> + * level-sensitive interrupts.  You can think of the level parameter as 1
> + * being HIGH and 0 being LOW and all devices being active-HIGH.
> + */
> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> +			       struct irq_phys_map *map, bool level)
> +{
> +	int ret;
> +
> +	ret = vgic_lazy_init(kvm);
> +	if (ret)
> +		return ret;
> +
> +	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
>  }
>  
>  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> -- 
> 2.1.4
> 

Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 4, 2015, 4:02 p.m. UTC | #2
On 04/08/15 14:45, Christoffer Dall wrote:
> On Fri, Jul 24, 2015 at 04:55:07PM +0100, Marc Zyngier wrote:
>> Virtual interrupts mapped to a HW interrupt should only be triggered
>> from inside the kernel. Otherwise, you could end up confusing the
>> kernel (and the GIC's) state machine.
>>
>> Rearrange the injection path so that kvm_vgic_inject_irq is
>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>> used for mapped interrupts. The latter should only be called from
>> inside the kernel (timer, VFIO).
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  include/kvm/arm_vgic.h |  2 +
>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index 7306b4b..f6bfd79 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>  			bool level);
>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>> +			       struct irq_phys_map *map, bool level);
>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>> index 3f7b690..e40ef70 100644
>> --- a/virt/kvm/arm/vgic.c
>> +++ b/virt/kvm/arm/vgic.c
>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>  }
>>  
>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>> -				  unsigned int irq_num, bool level)
>> +				   struct irq_phys_map *map,
>> +				   unsigned int irq_num, bool level)
>>  {
>>  	struct vgic_dist *dist = &kvm->arch.vgic;
>>  	struct kvm_vcpu *vcpu;
>> @@ -1541,6 +1542,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>  	int enabled;
>>  	bool ret = true, can_inject = true;
>>  
>> +	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
>> +		return -EINVAL;
>> +
>>  	spin_lock(&dist->lock);
>>  
>>  	vcpu = kvm_get_vcpu(kvm, cpuid);
>> @@ -1603,14 +1607,42 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>  out:
>>  	spin_unlock(&dist->lock);
>>  
>> -	return ret ? cpuid : -EINVAL;
>> +	if (!ret) {
> 
> don't you mean if (ret) here?  hint: ret is a bool

Ouch. Nice catch!

> 
>> +		/* kick the specified vcpu */
>> +		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
>> +	}
>> +
>> +	return 0;
> 
> isn't this a change in the internal API?
> Before, we would return -EINVAL when ret is false.  Not sure if this
> has any consequences though?

I don't think this is a change in API. Before this patch, we would
either return a vcpuid or -EINVAL. But the error would not be propagated
beyond kvm_vgic_inject_irq, effectively discarding the error code.

Also, it is a bit odd to return an error because the toggling of the
line wasn't significant (like bringing the line down on an
edge-triggered interrupt).

> 
>> +}
>> +
>> +static int vgic_lazy_init(struct kvm *kvm)
>> +{
>> +	int ret = 0;
>> +
>> +	if (unlikely(!vgic_initialized(kvm))) {
>> +		/*
>> +		 * We only provide the automatic initialization of the VGIC
>> +		 * for the legacy case of a GICv2. Any other type must
>> +		 * be explicitly initialized once setup with the respective
>> +		 * KVM device call.
>> +		 */
>> +		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
>> +			return -EBUSY;
>> +
>> +		mutex_lock(&kvm->lock);
>> +		ret = vgic_init(kvm);
>> +		mutex_unlock(&kvm->lock);
>> +	}
>> +
>> +	return ret;
>>  }
>>  
>>  /**
>>   * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
>>   * @kvm:     The VM structure pointer
>>   * @cpuid:   The CPU for PPIs
>> - * @irq_num: The IRQ number that is assigned to the device
>> + * @irq_num: The IRQ number that is assigned to the device. This IRQ
>> + *           must not be mapped to a HW interrupt.
>>   * @level:   Edge-triggered:  true:  to trigger the interrupt
>>   *			      false: to ignore the call
>>   *	     Level-sensitive  true:  activates an interrupt
>> @@ -1623,39 +1655,44 @@ out:
>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>  			bool level)
>>  {
>> -	int ret = 0;
>> -	int vcpu_id;
>> -
>> -	if (unlikely(!vgic_initialized(kvm))) {
>> -		/*
>> -		 * We only provide the automatic initialization of the VGIC
>> -		 * for the legacy case of a GICv2. Any other type must
>> -		 * be explicitly initialized once setup with the respective
>> -		 * KVM device call.
>> -		 */
>> -		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
>> -			ret = -EBUSY;
>> -			goto out;
>> -		}
>> -		mutex_lock(&kvm->lock);
>> -		ret = vgic_init(kvm);
>> -		mutex_unlock(&kvm->lock);
>> +	struct irq_phys_map *map;
>> +	int ret;
>>  
>> -		if (ret)
>> -			goto out;
>> -	}
>> +	ret = vgic_lazy_init(kvm);
>> +	if (ret)
>> +		return ret;
>>  
>> -	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
>> +	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
>> +	if (map)
>>  		return -EINVAL;
>>  
>> -	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
>> -	if (vcpu_id >= 0) {
>> -		/* kick the specified vcpu */
>> -		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
>> -	}
>> +	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
>> +}
>>  
>> -out:
>> -	return ret;
>> +/**
>> + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
>> + * @kvm:     The VM structure pointer
>> + * @cpuid:   The CPU for PPIs
>> + * @map:     Pointer to a irq_phys_map structure describing the mapping
>> + * @level:   Edge-triggered:  true:  to trigger the interrupt
>> + *			      false: to ignore the call
>> + *	     Level-sensitive  true:  activates an interrupt
>> + *			      false: deactivates an interrupt
> 
> just noticed this unfortunate use of the words 'activate/deactivate'
> here, which is not true, it just raises/lowers the input signal...
> 

I'll clean that up.

>> + *
>> + * The GIC is not concerned with devices being active-LOW or active-HIGH for
>> + * level-sensitive interrupts.  You can think of the level parameter as 1
>> + * being HIGH and 0 being LOW and all devices being active-HIGH.
>> + */
>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>> +			       struct irq_phys_map *map, bool level)
>> +{
>> +	int ret;
>> +
>> +	ret = vgic_lazy_init(kvm);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
>>  }
>>  
>>  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
>> -- 
>> 2.1.4
>>
> 
> Thanks,
> -Christoffer
> 

Thanks,

	M.
Eric Auger Aug. 4, 2015, 4:21 p.m. UTC | #3
Hi Marc,
On 07/24/2015 05:55 PM, Marc Zyngier wrote:
> Virtual interrupts mapped to a HW interrupt should only be triggered
> from inside the kernel. Otherwise, you could end up confusing the
> kernel (and the GIC's) state machine.
> 
> Rearrange the injection path so that kvm_vgic_inject_irq is
> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
> used for mapped interrupts. The latter should only be called from
> inside the kernel (timer, VFIO).
nit: I would replace VFIO by irqfd.
VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
currently is implemented in vgic.c
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  include/kvm/arm_vgic.h |  2 +
>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 70 insertions(+), 31 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 7306b4b..f6bfd79 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>  			bool level);
> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> +			       struct irq_phys_map *map, bool level);
>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 3f7b690..e40ef70 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>  }
>  
>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> -				  unsigned int irq_num, bool level)
> +				   struct irq_phys_map *map,
> +				   unsigned int irq_num, bool level)
>  {
In vgic_update_irq_pending, I needed to modify the following line and
add the "&& !map".

        if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {

Without that, the level being not properly modeled for level sensitive
forwarded IRQs, the 2d injection fails.

Eric


>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  	struct kvm_vcpu *vcpu;
> @@ -1541,6 +1542,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	int enabled;
>  	bool ret = true, can_inject = true;
>  
> +	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> +		return -EINVAL;
> +
>  	spin_lock(&dist->lock);
>  
>  	vcpu = kvm_get_vcpu(kvm, cpuid);
> @@ -1603,14 +1607,42 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  out:
>  	spin_unlock(&dist->lock);
>  
> -	return ret ? cpuid : -EINVAL;
> +	if (!ret) {
> +		/* kick the specified vcpu */
> +		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
> +	}
> +
> +	return 0;
> +}
> +
> +static int vgic_lazy_init(struct kvm *kvm)
> +{
> +	int ret = 0;
> +
> +	if (unlikely(!vgic_initialized(kvm))) {
> +		/*
> +		 * We only provide the automatic initialization of the VGIC
> +		 * for the legacy case of a GICv2. Any other type must
> +		 * be explicitly initialized once setup with the respective
> +		 * KVM device call.
> +		 */
> +		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
> +			return -EBUSY;
> +
> +		mutex_lock(&kvm->lock);
> +		ret = vgic_init(kvm);
> +		mutex_unlock(&kvm->lock);
> +	}
> +
> +	return ret;
>  }
>  
>  /**
>   * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
>   * @kvm:     The VM structure pointer
>   * @cpuid:   The CPU for PPIs
> - * @irq_num: The IRQ number that is assigned to the device
> + * @irq_num: The IRQ number that is assigned to the device. This IRQ
> + *           must not be mapped to a HW interrupt.
>   * @level:   Edge-triggered:  true:  to trigger the interrupt
>   *			      false: to ignore the call
>   *	     Level-sensitive  true:  activates an interrupt
> @@ -1623,39 +1655,44 @@ out:
>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>  			bool level)
>  {
> -	int ret = 0;
> -	int vcpu_id;
> -
> -	if (unlikely(!vgic_initialized(kvm))) {
> -		/*
> -		 * We only provide the automatic initialization of the VGIC
> -		 * for the legacy case of a GICv2. Any other type must
> -		 * be explicitly initialized once setup with the respective
> -		 * KVM device call.
> -		 */
> -		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
> -			ret = -EBUSY;
> -			goto out;
> -		}
> -		mutex_lock(&kvm->lock);
> -		ret = vgic_init(kvm);
> -		mutex_unlock(&kvm->lock);
> +	struct irq_phys_map *map;
> +	int ret;
>  
> -		if (ret)
> -			goto out;
> -	}
> +	ret = vgic_lazy_init(kvm);
> +	if (ret)
> +		return ret;
>  
> -	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> +	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
> +	if (map)
>  		return -EINVAL;
>  
> -	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
> -	if (vcpu_id >= 0) {
> -		/* kick the specified vcpu */
> -		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
> -	}
> +	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
> +}
>  
> -out:
> -	return ret;
> +/**
> + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
> + * @kvm:     The VM structure pointer
> + * @cpuid:   The CPU for PPIs
> + * @map:     Pointer to a irq_phys_map structure describing the mapping
> + * @level:   Edge-triggered:  true:  to trigger the interrupt
> + *			      false: to ignore the call
> + *	     Level-sensitive  true:  activates an interrupt
> + *			      false: deactivates an interrupt
> + *
> + * The GIC is not concerned with devices being active-LOW or active-HIGH for
> + * level-sensitive interrupts.  You can think of the level parameter as 1
> + * being HIGH and 0 being LOW and all devices being active-HIGH.
> + */
> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> +			       struct irq_phys_map *map, bool level)
> +{
> +	int ret;
> +
> +	ret = vgic_lazy_init(kvm);
> +	if (ret)
> +		return ret;
> +
> +	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
>  }
>  
>  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 4, 2015, 4:44 p.m. UTC | #4
On 04/08/15 17:21, Eric Auger wrote:
> Hi Marc,
> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>> Virtual interrupts mapped to a HW interrupt should only be triggered
>> from inside the kernel. Otherwise, you could end up confusing the
>> kernel (and the GIC's) state machine.
>>
>> Rearrange the injection path so that kvm_vgic_inject_irq is
>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>> used for mapped interrupts. The latter should only be called from
>> inside the kernel (timer, VFIO).
> nit: I would replace VFIO by irqfd.
> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
> currently is implemented in vgic.c

Ah, thanks for reminding me of the right terminology, I tend to think of
it as one big bag of nasty tricks... ;-)

I'll update the commit message.

>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  include/kvm/arm_vgic.h |  2 +
>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index 7306b4b..f6bfd79 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>  			bool level);
>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>> +			       struct irq_phys_map *map, bool level);
>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>> index 3f7b690..e40ef70 100644
>> --- a/virt/kvm/arm/vgic.c
>> +++ b/virt/kvm/arm/vgic.c
>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>  }
>>  
>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>> -				  unsigned int irq_num, bool level)
>> +				   struct irq_phys_map *map,
>> +				   unsigned int irq_num, bool level)
>>  {
> In vgic_update_irq_pending, I needed to modify the following line and
> add the "&& !map".
> 
>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
> 
> Without that, the level being not properly modeled for level sensitive
> forwarded IRQs, the 2d injection fails.

Ah! Is that because we never see the line being reset to zero, and the
VGIC still sees the line as pending at the distributor level?

Thanks,

	M.
Christoffer Dall Aug. 4, 2015, 5:38 p.m. UTC | #5
On Tue, Aug 04, 2015 at 05:02:41PM +0100, Marc Zyngier wrote:
> On 04/08/15 14:45, Christoffer Dall wrote:
> > On Fri, Jul 24, 2015 at 04:55:07PM +0100, Marc Zyngier wrote:
> >> Virtual interrupts mapped to a HW interrupt should only be triggered
> >> from inside the kernel. Otherwise, you could end up confusing the
> >> kernel (and the GIC's) state machine.
> >>
> >> Rearrange the injection path so that kvm_vgic_inject_irq is
> >> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
> >> used for mapped interrupts. The latter should only be called from
> >> inside the kernel (timer, VFIO).
> >>
> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >> ---
> >>  include/kvm/arm_vgic.h |  2 +
> >>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
> >>  2 files changed, 70 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index 7306b4b..f6bfd79 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >>  			bool level);
> >> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> >> +			       struct irq_phys_map *map, bool level);
> >>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
> >>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index 3f7b690..e40ef70 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
> >>  }
> >>  
> >>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >> -				  unsigned int irq_num, bool level)
> >> +				   struct irq_phys_map *map,
> >> +				   unsigned int irq_num, bool level)
> >>  {
> >>  	struct vgic_dist *dist = &kvm->arch.vgic;
> >>  	struct kvm_vcpu *vcpu;
> >> @@ -1541,6 +1542,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >>  	int enabled;
> >>  	bool ret = true, can_inject = true;
> >>  
> >> +	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> >> +		return -EINVAL;
> >> +
> >>  	spin_lock(&dist->lock);
> >>  
> >>  	vcpu = kvm_get_vcpu(kvm, cpuid);
> >> @@ -1603,14 +1607,42 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >>  out:
> >>  	spin_unlock(&dist->lock);
> >>  
> >> -	return ret ? cpuid : -EINVAL;
> >> +	if (!ret) {
> > 
> > don't you mean if (ret) here?  hint: ret is a bool
> 
> Ouch. Nice catch!
> 
> > 
> >> +		/* kick the specified vcpu */
> >> +		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
> >> +	}
> >> +
> >> +	return 0;
> > 
> > isn't this a change in the internal API?
> > Before, we would return -EINVAL when ret is false.  Not sure if this
> > has any consequences though?
> 
> I don't think this is a change in API. Before this patch, we would
> either return a vcpuid or -EINVAL. But the error would not be propagated
> beyond kvm_vgic_inject_irq, effectively discarding the error code.
> 
> Also, it is a bit odd to return an error because the toggling of the
> line wasn't significant (like bringing the line down on an
> edge-triggered interrupt).
> 

true, indeed, my brain was too fried to think it through.

(why does coming back from vacation always involve paging in weird vgic
stuff for me?)

> > 
> >> +}
> >> +
> >> +static int vgic_lazy_init(struct kvm *kvm)
> >> +{
> >> +	int ret = 0;
> >> +
> >> +	if (unlikely(!vgic_initialized(kvm))) {
> >> +		/*
> >> +		 * We only provide the automatic initialization of the VGIC
> >> +		 * for the legacy case of a GICv2. Any other type must
> >> +		 * be explicitly initialized once setup with the respective
> >> +		 * KVM device call.
> >> +		 */
> >> +		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
> >> +			return -EBUSY;
> >> +
> >> +		mutex_lock(&kvm->lock);
> >> +		ret = vgic_init(kvm);
> >> +		mutex_unlock(&kvm->lock);
> >> +	}
> >> +
> >> +	return ret;
> >>  }
> >>  
> >>  /**
> >>   * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
> >>   * @kvm:     The VM structure pointer
> >>   * @cpuid:   The CPU for PPIs
> >> - * @irq_num: The IRQ number that is assigned to the device
> >> + * @irq_num: The IRQ number that is assigned to the device. This IRQ
> >> + *           must not be mapped to a HW interrupt.
> >>   * @level:   Edge-triggered:  true:  to trigger the interrupt
> >>   *			      false: to ignore the call
> >>   *	     Level-sensitive  true:  activates an interrupt
> >> @@ -1623,39 +1655,44 @@ out:
> >>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >>  			bool level)
> >>  {
> >> -	int ret = 0;
> >> -	int vcpu_id;
> >> -
> >> -	if (unlikely(!vgic_initialized(kvm))) {
> >> -		/*
> >> -		 * We only provide the automatic initialization of the VGIC
> >> -		 * for the legacy case of a GICv2. Any other type must
> >> -		 * be explicitly initialized once setup with the respective
> >> -		 * KVM device call.
> >> -		 */
> >> -		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
> >> -			ret = -EBUSY;
> >> -			goto out;
> >> -		}
> >> -		mutex_lock(&kvm->lock);
> >> -		ret = vgic_init(kvm);
> >> -		mutex_unlock(&kvm->lock);
> >> +	struct irq_phys_map *map;
> >> +	int ret;
> >>  
> >> -		if (ret)
> >> -			goto out;
> >> -	}
> >> +	ret = vgic_lazy_init(kvm);
> >> +	if (ret)
> >> +		return ret;
> >>  
> >> -	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
> >> +	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
> >> +	if (map)
> >>  		return -EINVAL;
> >>  
> >> -	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
> >> -	if (vcpu_id >= 0) {
> >> -		/* kick the specified vcpu */
> >> -		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
> >> -	}
> >> +	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
> >> +}
> >>  
> >> -out:
> >> -	return ret;
> >> +/**
> >> + * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
> >> + * @kvm:     The VM structure pointer
> >> + * @cpuid:   The CPU for PPIs
> >> + * @map:     Pointer to a irq_phys_map structure describing the mapping
> >> + * @level:   Edge-triggered:  true:  to trigger the interrupt
> >> + *			      false: to ignore the call
> >> + *	     Level-sensitive  true:  activates an interrupt
> >> + *			      false: deactivates an interrupt
> > 
> > just noticed this unfortunate use of the words 'activate/deactivate'
> > here, which is not true, it just raises/lowers the input signal...
> > 
> 
> I'll clean that up.
> 

Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Aug. 5, 2015, 7:32 a.m. UTC | #6
Hi Marc,
On 08/04/2015 06:44 PM, Marc Zyngier wrote:
> On 04/08/15 17:21, Eric Auger wrote:
>> Hi Marc,
>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>> from inside the kernel. Otherwise, you could end up confusing the
>>> kernel (and the GIC's) state machine.
>>>
>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>> used for mapped interrupts. The latter should only be called from
>>> inside the kernel (timer, VFIO).
>> nit: I would replace VFIO by irqfd.
>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>> currently is implemented in vgic.c
> 
> Ah, thanks for reminding me of the right terminology, I tend to think of
> it as one big bag of nasty tricks... ;-)
> 
> I'll update the commit message.
> 
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> ---
>>>  include/kvm/arm_vgic.h |  2 +
>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index 7306b4b..f6bfd79 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>  			bool level);
>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>> +			       struct irq_phys_map *map, bool level);
>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>> index 3f7b690..e40ef70 100644
>>> --- a/virt/kvm/arm/vgic.c
>>> +++ b/virt/kvm/arm/vgic.c
>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>  }
>>>  
>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>> -				  unsigned int irq_num, bool level)
>>> +				   struct irq_phys_map *map,
>>> +				   unsigned int irq_num, bool level)
>>>  {
>> In vgic_update_irq_pending, I needed to modify the following line and
>> add the "&& !map".
>>
>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>
>> Without that, the level being not properly modeled for level sensitive
>> forwarded IRQs, the 2d injection fails.
> 
> Ah! Is that because we never see the line being reset to zero, and the
> VGIC still sees the line as pending at the distributor level?
yes indeed

Eric
> 
> Thanks,
> 
> 	M.
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 5, 2015, 9:44 a.m. UTC | #7
On 05/08/15 08:32, Eric Auger wrote:
> Hi Marc,
> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
>> On 04/08/15 17:21, Eric Auger wrote:
>>> Hi Marc,
>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>>> from inside the kernel. Otherwise, you could end up confusing the
>>>> kernel (and the GIC's) state machine.
>>>>
>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>>> used for mapped interrupts. The latter should only be called from
>>>> inside the kernel (timer, VFIO).
>>> nit: I would replace VFIO by irqfd.
>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>>> currently is implemented in vgic.c
>>
>> Ah, thanks for reminding me of the right terminology, I tend to think of
>> it as one big bag of nasty tricks... ;-)
>>
>> I'll update the commit message.
>>
>>>>
>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>> ---
>>>>  include/kvm/arm_vgic.h |  2 +
>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>> index 7306b4b..f6bfd79 100644
>>>> --- a/include/kvm/arm_vgic.h
>>>> +++ b/include/kvm/arm_vgic.h
>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>>  			bool level);
>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>>> +			       struct irq_phys_map *map, bool level);
>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>>> index 3f7b690..e40ef70 100644
>>>> --- a/virt/kvm/arm/vgic.c
>>>> +++ b/virt/kvm/arm/vgic.c
>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>>  }
>>>>  
>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>>> -				  unsigned int irq_num, bool level)
>>>> +				   struct irq_phys_map *map,
>>>> +				   unsigned int irq_num, bool level)
>>>>  {
>>> In vgic_update_irq_pending, I needed to modify the following line and
>>> add the "&& !map".
>>>
>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>>
>>> Without that, the level being not properly modeled for level sensitive
>>> forwarded IRQs, the 2d injection fails.
>>
>> Ah! Is that because we never see the line being reset to zero, and the
>> VGIC still sees the line as pending at the distributor level?
> yes indeed

Then it is a bigger problem we need to solve, and your solution just
papers over the issue.

The main problem is that irqfd is essentially an edge-triggered
signalling. Fire and forget. Given that we're dealing with a level
triggered interrupt, we end up with the interrupt still marked as
pending (nobody took the signal down).

The usual way to get out of that mess in is to evaluate the state of the
level on EOI. But we can't trap on EOI for a HW interrupt.

So it raises the question: should we instead consider the HW pending
state instead of the software one for mapped interrupts? It is
expensive, but it feels more correct.

Thoughts?

	M.
Christoffer Dall Aug. 5, 2015, 10:53 a.m. UTC | #8
On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
> On 05/08/15 08:32, Eric Auger wrote:
> > Hi Marc,
> > On 08/04/2015 06:44 PM, Marc Zyngier wrote:
> >> On 04/08/15 17:21, Eric Auger wrote:
> >>> Hi Marc,
> >>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
> >>>> Virtual interrupts mapped to a HW interrupt should only be triggered
> >>>> from inside the kernel. Otherwise, you could end up confusing the
> >>>> kernel (and the GIC's) state machine.
> >>>>
> >>>> Rearrange the injection path so that kvm_vgic_inject_irq is
> >>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
> >>>> used for mapped interrupts. The latter should only be called from
> >>>> inside the kernel (timer, VFIO).
> >>> nit: I would replace VFIO by irqfd.
> >>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
> >>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
> >>> currently is implemented in vgic.c
> >>
> >> Ah, thanks for reminding me of the right terminology, I tend to think of
> >> it as one big bag of nasty tricks... ;-)
> >>
> >> I'll update the commit message.
> >>
> >>>>
> >>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >>>> ---
> >>>>  include/kvm/arm_vgic.h |  2 +
> >>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
> >>>>  2 files changed, 70 insertions(+), 31 deletions(-)
> >>>>
> >>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >>>> index 7306b4b..f6bfd79 100644
> >>>> --- a/include/kvm/arm_vgic.h
> >>>> +++ b/include/kvm/arm_vgic.h
> >>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >>>>  			bool level);
> >>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> >>>> +			       struct irq_phys_map *map, bool level);
> >>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
> >>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >>>> index 3f7b690..e40ef70 100644
> >>>> --- a/virt/kvm/arm/vgic.c
> >>>> +++ b/virt/kvm/arm/vgic.c
> >>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
> >>>>  }
> >>>>  
> >>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >>>> -				  unsigned int irq_num, bool level)
> >>>> +				   struct irq_phys_map *map,
> >>>> +				   unsigned int irq_num, bool level)
> >>>>  {
> >>> In vgic_update_irq_pending, I needed to modify the following line and
> >>> add the "&& !map".
> >>>
> >>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
> >>>
> >>> Without that, the level being not properly modeled for level sensitive
> >>> forwarded IRQs, the 2d injection fails.
> >>
> >> Ah! Is that because we never see the line being reset to zero, and the
> >> VGIC still sees the line as pending at the distributor level?
> > yes indeed
> 
> Then it is a bigger problem we need to solve, and your solution just
> papers over the issue.
> 
> The main problem is that irqfd is essentially an edge-triggered
> signalling. Fire and forget. Given that we're dealing with a level
> triggered interrupt, we end up with the interrupt still marked as
> pending (nobody took the signal down).
> 
> The usual way to get out of that mess in is to evaluate the state of the
> level on EOI. But we can't trap on EOI for a HW interrupt.
> 
> So it raises the question: should we instead consider the HW pending
> state instead of the software one for mapped interrupts? It is
> expensive, but it feels more correct.
> 
I thought we already covered this at LCA.  For mapped interrupts
(forwarded) we should never consider the software pending state, because
that state is managed by the hardware.  Or am I confusing concepts here?

Thanks,
-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger Aug. 5, 2015, 11:47 a.m. UTC | #9
On 08/05/2015 12:53 PM, Christoffer Dall wrote:
> On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
>> On 05/08/15 08:32, Eric Auger wrote:
>>> Hi Marc,
>>> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
>>>> On 04/08/15 17:21, Eric Auger wrote:
>>>>> Hi Marc,
>>>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>>>>> from inside the kernel. Otherwise, you could end up confusing the
>>>>>> kernel (and the GIC's) state machine.
>>>>>>
>>>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>>>>> used for mapped interrupts. The latter should only be called from
>>>>>> inside the kernel (timer, VFIO).
>>>>> nit: I would replace VFIO by irqfd.
>>>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>>>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>>>>> currently is implemented in vgic.c
>>>>
>>>> Ah, thanks for reminding me of the right terminology, I tend to think of
>>>> it as one big bag of nasty tricks... ;-)
>>>>
>>>> I'll update the commit message.
>>>>
>>>>>>
>>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>>>> ---
>>>>>>  include/kvm/arm_vgic.h |  2 +
>>>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>>>>
>>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>>>> index 7306b4b..f6bfd79 100644
>>>>>> --- a/include/kvm/arm_vgic.h
>>>>>> +++ b/include/kvm/arm_vgic.h
>>>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>>>>  			bool level);
>>>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>>>>> +			       struct irq_phys_map *map, bool level);
>>>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>>>>> index 3f7b690..e40ef70 100644
>>>>>> --- a/virt/kvm/arm/vgic.c
>>>>>> +++ b/virt/kvm/arm/vgic.c
>>>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>>>>  }
>>>>>>  
>>>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>>>>> -				  unsigned int irq_num, bool level)
>>>>>> +				   struct irq_phys_map *map,
>>>>>> +				   unsigned int irq_num, bool level)
>>>>>>  {
>>>>> In vgic_update_irq_pending, I needed to modify the following line and
>>>>> add the "&& !map".
>>>>>
>>>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>>>>
>>>>> Without that, the level being not properly modeled for level sensitive
>>>>> forwarded IRQs, the 2d injection fails.
>>>>
>>>> Ah! Is that because we never see the line being reset to zero, and the
>>>> VGIC still sees the line as pending at the distributor level?
>>> yes indeed
>>
>> Then it is a bigger problem we need to solve, and your solution just
>> papers over the issue.
>>
>> The main problem is that irqfd is essentially an edge-triggered
>> signalling. Fire and forget. Given that we're dealing with a level
>> triggered interrupt, we end up with the interrupt still marked as
>> pending (nobody took the signal down).
this does not really relate to irqfd: irqfd also comes with the concept
of resamplefd. in case the IRQ is not forwarded, this is the
irqfd_resampler_ack function that toggles the IRQ down (eventfd.c). Its
execution is triggered in vgic_process_maintenance. With forwarding the
EOI is not trappable anymore so this disappears.


>> The usual way to get out of that mess in is to evaluate the state of the
>> level on EOI. But we can't trap on EOI for a HW interrupt.
>>
>> So it raises the question: should we instead consider the HW pending
>> state instead of the software one for mapped interrupts? It is
>> expensive, but it feels more correct.
>>
> I thought we already covered this at LCA.  For mapped interrupts
> (forwarded) we should never consider the software pending state, because
> that state is managed by the hardware.  Or am I confusing concepts here?

Yes we discussed we should bypass most of the SW states.

in my previous integration I proposed a patch "KVM: arm: vgic: fix state
machine for forwarded IRQ". What's wrong with the below approach?


    Fix multiple injection of level sensitive forwarded IRQs.
    With current code, the second injection fails since the
    state bitmaps are not reset (process_maintenance is not
    called anymore).

    New implementation follows those principles:
    - A forwarded IRQ only can be sampled when it is pending
    - when queueing the IRQ (programming the LR), the pending state
      is removed as for edge sensitive IRQs
    - an injection of a forwarded IRQ is considered always valid since
      coming from the HW and level always is 1.

Eric

> 
> Thanks,
> -Christoffer
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoffer Dall Aug. 5, 2015, 1:47 p.m. UTC | #10
On Wed, Aug 05, 2015 at 01:47:27PM +0200, Eric Auger wrote:
> On 08/05/2015 12:53 PM, Christoffer Dall wrote:
> > On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
> >> On 05/08/15 08:32, Eric Auger wrote:
> >>> Hi Marc,
> >>> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
> >>>> On 04/08/15 17:21, Eric Auger wrote:
> >>>>> Hi Marc,
> >>>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
> >>>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
> >>>>>> from inside the kernel. Otherwise, you could end up confusing the
> >>>>>> kernel (and the GIC's) state machine.
> >>>>>>
> >>>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
> >>>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
> >>>>>> used for mapped interrupts. The latter should only be called from
> >>>>>> inside the kernel (timer, VFIO).
> >>>>> nit: I would replace VFIO by irqfd.
> >>>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
> >>>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
> >>>>> currently is implemented in vgic.c
> >>>>
> >>>> Ah, thanks for reminding me of the right terminology, I tend to think of
> >>>> it as one big bag of nasty tricks... ;-)
> >>>>
> >>>> I'll update the commit message.
> >>>>
> >>>>>>
> >>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >>>>>> ---
> >>>>>>  include/kvm/arm_vgic.h |  2 +
> >>>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
> >>>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
> >>>>>>
> >>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >>>>>> index 7306b4b..f6bfd79 100644
> >>>>>> --- a/include/kvm/arm_vgic.h
> >>>>>> +++ b/include/kvm/arm_vgic.h
> >>>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
> >>>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
> >>>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> >>>>>>  			bool level);
> >>>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
> >>>>>> +			       struct irq_phys_map *map, bool level);
> >>>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
> >>>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> >>>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
> >>>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >>>>>> index 3f7b690..e40ef70 100644
> >>>>>> --- a/virt/kvm/arm/vgic.c
> >>>>>> +++ b/virt/kvm/arm/vgic.c
> >>>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
> >>>>>>  }
> >>>>>>  
> >>>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
> >>>>>> -				  unsigned int irq_num, bool level)
> >>>>>> +				   struct irq_phys_map *map,
> >>>>>> +				   unsigned int irq_num, bool level)
> >>>>>>  {
> >>>>> In vgic_update_irq_pending, I needed to modify the following line and
> >>>>> add the "&& !map".
> >>>>>
> >>>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
> >>>>>
> >>>>> Without that, the level being not properly modeled for level sensitive
> >>>>> forwarded IRQs, the 2d injection fails.
> >>>>
> >>>> Ah! Is that because we never see the line being reset to zero, and the
> >>>> VGIC still sees the line as pending at the distributor level?
> >>> yes indeed
> >>
> >> Then it is a bigger problem we need to solve, and your solution just
> >> papers over the issue.
> >>
> >> The main problem is that irqfd is essentially an edge-triggered
> >> signalling. Fire and forget. Given that we're dealing with a level
> >> triggered interrupt, we end up with the interrupt still marked as
> >> pending (nobody took the signal down).
> this does not really relate to irqfd: irqfd also comes with the concept
> of resamplefd. in case the IRQ is not forwarded, this is the
> irqfd_resampler_ack function that toggles the IRQ down (eventfd.c). Its
> execution is triggered in vgic_process_maintenance. With forwarding the
> EOI is not trappable anymore so this disappears.
> 
> 
> >> The usual way to get out of that mess in is to evaluate the state of the
> >> level on EOI. But we can't trap on EOI for a HW interrupt.
> >>
> >> So it raises the question: should we instead consider the HW pending
> >> state instead of the software one for mapped interrupts? It is
> >> expensive, but it feels more correct.
> >>
> > I thought we already covered this at LCA.  For mapped interrupts
> > (forwarded) we should never consider the software pending state, because
> > that state is managed by the hardware.  Or am I confusing concepts here?
> 
> Yes we discussed we should bypass most of the SW states.
> 
> in my previous integration I proposed a patch "KVM: arm: vgic: fix state
> machine for forwarded IRQ". What's wrong with the below approach?
> 
> 
>     Fix multiple injection of level sensitive forwarded IRQs.
>     With current code, the second injection fails since the
>     state bitmaps are not reset (process_maintenance is not
>     called anymore).
> 
>     New implementation follows those principles:
>     - A forwarded IRQ only can be sampled when it is pending
>     - when queueing the IRQ (programming the LR), the pending state
>       is removed as for edge sensitive IRQs
>     - an injection of a forwarded IRQ is considered always valid since
>       coming from the HW and level always is 1.
> 
I don't see anything wrong, and I thought this was what we discussed.

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 6, 2015, 4:44 p.m. UTC | #11
On 05/08/15 14:47, Christoffer Dall wrote:
> On Wed, Aug 05, 2015 at 01:47:27PM +0200, Eric Auger wrote:
>> On 08/05/2015 12:53 PM, Christoffer Dall wrote:
>>> On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
>>>> On 05/08/15 08:32, Eric Auger wrote:
>>>>> Hi Marc,
>>>>> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
>>>>>> On 04/08/15 17:21, Eric Auger wrote:
>>>>>>> Hi Marc,
>>>>>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>>>>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>>>>>>> from inside the kernel. Otherwise, you could end up confusing the
>>>>>>>> kernel (and the GIC's) state machine.
>>>>>>>>
>>>>>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>>>>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>>>>>>> used for mapped interrupts. The latter should only be called from
>>>>>>>> inside the kernel (timer, VFIO).
>>>>>>> nit: I would replace VFIO by irqfd.
>>>>>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>>>>>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>>>>>>> currently is implemented in vgic.c
>>>>>>
>>>>>> Ah, thanks for reminding me of the right terminology, I tend to think of
>>>>>> it as one big bag of nasty tricks... ;-)
>>>>>>
>>>>>> I'll update the commit message.
>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>>>>>> ---
>>>>>>>>  include/kvm/arm_vgic.h |  2 +
>>>>>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>>>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>>>>>> index 7306b4b..f6bfd79 100644
>>>>>>>> --- a/include/kvm/arm_vgic.h
>>>>>>>> +++ b/include/kvm/arm_vgic.h
>>>>>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>>>>>>  			bool level);
>>>>>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>>>>>>> +			       struct irq_phys_map *map, bool level);
>>>>>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>>>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>>>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>>>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>>>>>>> index 3f7b690..e40ef70 100644
>>>>>>>> --- a/virt/kvm/arm/vgic.c
>>>>>>>> +++ b/virt/kvm/arm/vgic.c
>>>>>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>>>>>>  }
>>>>>>>>  
>>>>>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>>>>>>> -				  unsigned int irq_num, bool level)
>>>>>>>> +				   struct irq_phys_map *map,
>>>>>>>> +				   unsigned int irq_num, bool level)
>>>>>>>>  {
>>>>>>> In vgic_update_irq_pending, I needed to modify the following line and
>>>>>>> add the "&& !map".
>>>>>>>
>>>>>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>>>>>>
>>>>>>> Without that, the level being not properly modeled for level sensitive
>>>>>>> forwarded IRQs, the 2d injection fails.
>>>>>>
>>>>>> Ah! Is that because we never see the line being reset to zero, and the
>>>>>> VGIC still sees the line as pending at the distributor level?
>>>>> yes indeed
>>>>
>>>> Then it is a bigger problem we need to solve, and your solution just
>>>> papers over the issue.
>>>>
>>>> The main problem is that irqfd is essentially an edge-triggered
>>>> signalling. Fire and forget. Given that we're dealing with a level
>>>> triggered interrupt, we end up with the interrupt still marked as
>>>> pending (nobody took the signal down).
>> this does not really relate to irqfd: irqfd also comes with the concept
>> of resamplefd. in case the IRQ is not forwarded, this is the
>> irqfd_resampler_ack function that toggles the IRQ down (eventfd.c). Its
>> execution is triggered in vgic_process_maintenance. With forwarding the
>> EOI is not trappable anymore so this disappears.
>>
>>
>>>> The usual way to get out of that mess in is to evaluate the state of the
>>>> level on EOI. But we can't trap on EOI for a HW interrupt.
>>>>
>>>> So it raises the question: should we instead consider the HW pending
>>>> state instead of the software one for mapped interrupts? It is
>>>> expensive, but it feels more correct.
>>>>
>>> I thought we already covered this at LCA.  For mapped interrupts
>>> (forwarded) we should never consider the software pending state, because
>>> that state is managed by the hardware.  Or am I confusing concepts here?
>>
>> Yes we discussed we should bypass most of the SW states.
>>
>> in my previous integration I proposed a patch "KVM: arm: vgic: fix state
>> machine for forwarded IRQ". What's wrong with the below approach?
>>
>>
>>     Fix multiple injection of level sensitive forwarded IRQs.
>>     With current code, the second injection fails since the
>>     state bitmaps are not reset (process_maintenance is not
>>     called anymore).
>>
>>     New implementation follows those principles:
>>     - A forwarded IRQ only can be sampled when it is pending
>>     - when queueing the IRQ (programming the LR), the pending state
>>       is removed as for edge sensitive IRQs
>>     - an injection of a forwarded IRQ is considered always valid since
>>       coming from the HW and level always is 1.
>>
> I don't see anything wrong, and I thought this was what we discussed.

I think I just lost track of what we've discussed at LCA. Ignore me.

I'll repost the updated first 10 patches tomorrow. Eric, are you willing
to take custody of patch 11 as part of one of your series?

Thanks,

	M.
Eric Auger Aug. 7, 2015, 7:05 a.m. UTC | #12
Hi Marc,
On 08/06/2015 06:44 PM, Marc Zyngier wrote:
> On 05/08/15 14:47, Christoffer Dall wrote:
>> On Wed, Aug 05, 2015 at 01:47:27PM +0200, Eric Auger wrote:
>>> On 08/05/2015 12:53 PM, Christoffer Dall wrote:
>>>> On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
>>>>> On 05/08/15 08:32, Eric Auger wrote:
>>>>>> Hi Marc,
>>>>>> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
>>>>>>> On 04/08/15 17:21, Eric Auger wrote:
>>>>>>>> Hi Marc,
>>>>>>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>>>>>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>>>>>>>> from inside the kernel. Otherwise, you could end up confusing the
>>>>>>>>> kernel (and the GIC's) state machine.
>>>>>>>>>
>>>>>>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>>>>>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>>>>>>>> used for mapped interrupts. The latter should only be called from
>>>>>>>>> inside the kernel (timer, VFIO).
>>>>>>>> nit: I would replace VFIO by irqfd.
>>>>>>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>>>>>>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>>>>>>>> currently is implemented in vgic.c
>>>>>>>
>>>>>>> Ah, thanks for reminding me of the right terminology, I tend to think of
>>>>>>> it as one big bag of nasty tricks... ;-)
>>>>>>>
>>>>>>> I'll update the commit message.
>>>>>>>
>>>>>>>>>
>>>>>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>>>>>>> ---
>>>>>>>>>  include/kvm/arm_vgic.h |  2 +
>>>>>>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>>>>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>>>>>>> index 7306b4b..f6bfd79 100644
>>>>>>>>> --- a/include/kvm/arm_vgic.h
>>>>>>>>> +++ b/include/kvm/arm_vgic.h
>>>>>>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>>>>>>>  			bool level);
>>>>>>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>>>>>>>> +			       struct irq_phys_map *map, bool level);
>>>>>>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>>>>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>>>>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>>>>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>>>>>>>> index 3f7b690..e40ef70 100644
>>>>>>>>> --- a/virt/kvm/arm/vgic.c
>>>>>>>>> +++ b/virt/kvm/arm/vgic.c
>>>>>>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>>>>>>>  }
>>>>>>>>>  
>>>>>>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>>>>>>>> -				  unsigned int irq_num, bool level)
>>>>>>>>> +				   struct irq_phys_map *map,
>>>>>>>>> +				   unsigned int irq_num, bool level)
>>>>>>>>>  {
>>>>>>>> In vgic_update_irq_pending, I needed to modify the following line and
>>>>>>>> add the "&& !map".
>>>>>>>>
>>>>>>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>>>>>>>
>>>>>>>> Without that, the level being not properly modeled for level sensitive
>>>>>>>> forwarded IRQs, the 2d injection fails.
>>>>>>>
>>>>>>> Ah! Is that because we never see the line being reset to zero, and the
>>>>>>> VGIC still sees the line as pending at the distributor level?
>>>>>> yes indeed
>>>>>
>>>>> Then it is a bigger problem we need to solve, and your solution just
>>>>> papers over the issue.
>>>>>
>>>>> The main problem is that irqfd is essentially an edge-triggered
>>>>> signalling. Fire and forget. Given that we're dealing with a level
>>>>> triggered interrupt, we end up with the interrupt still marked as
>>>>> pending (nobody took the signal down).
>>> this does not really relate to irqfd: irqfd also comes with the concept
>>> of resamplefd. in case the IRQ is not forwarded, this is the
>>> irqfd_resampler_ack function that toggles the IRQ down (eventfd.c). Its
>>> execution is triggered in vgic_process_maintenance. With forwarding the
>>> EOI is not trappable anymore so this disappears.
>>>
>>>
>>>>> The usual way to get out of that mess in is to evaluate the state of the
>>>>> level on EOI. But we can't trap on EOI for a HW interrupt.
>>>>>
>>>>> So it raises the question: should we instead consider the HW pending
>>>>> state instead of the software one for mapped interrupts? It is
>>>>> expensive, but it feels more correct.
>>>>>
>>>> I thought we already covered this at LCA.  For mapped interrupts
>>>> (forwarded) we should never consider the software pending state, because
>>>> that state is managed by the hardware.  Or am I confusing concepts here?
>>>
>>> Yes we discussed we should bypass most of the SW states.
>>>
>>> in my previous integration I proposed a patch "KVM: arm: vgic: fix state
>>> machine for forwarded IRQ". What's wrong with the below approach?
>>>
>>>
>>>     Fix multiple injection of level sensitive forwarded IRQs.
>>>     With current code, the second injection fails since the
>>>     state bitmaps are not reset (process_maintenance is not
>>>     called anymore).
>>>
>>>     New implementation follows those principles:
>>>     - A forwarded IRQ only can be sampled when it is pending
>>>     - when queueing the IRQ (programming the LR), the pending state
>>>       is removed as for edge sensitive IRQs
>>>     - an injection of a forwarded IRQ is considered always valid since
>>>       coming from the HW and level always is 1.
>>>
>> I don't see anything wrong, and I thought this was what we discussed.
> 
> I think I just lost track of what we've discussed at LCA. Ignore me.
> 
> I'll repost the updated first 10 patches tomorrow. Eric, are you willing
> to take custody of patch 11 as part of one of your series?
Yes no problem I can take this over if you prefer. I can easily test it.
May I put it in the irq forwarding series?

Best Regards

Eric
> 
> Thanks,
> 
> 	M.
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Aug. 7, 2015, 8:29 a.m. UTC | #13
On 07/08/15 08:05, Eric Auger wrote:
> Hi Marc,
> On 08/06/2015 06:44 PM, Marc Zyngier wrote:
>> On 05/08/15 14:47, Christoffer Dall wrote:
>>> On Wed, Aug 05, 2015 at 01:47:27PM +0200, Eric Auger wrote:
>>>> On 08/05/2015 12:53 PM, Christoffer Dall wrote:
>>>>> On Wed, Aug 05, 2015 at 10:44:09AM +0100, Marc Zyngier wrote:
>>>>>> On 05/08/15 08:32, Eric Auger wrote:
>>>>>>> Hi Marc,
>>>>>>> On 08/04/2015 06:44 PM, Marc Zyngier wrote:
>>>>>>>> On 04/08/15 17:21, Eric Auger wrote:
>>>>>>>>> Hi Marc,
>>>>>>>>> On 07/24/2015 05:55 PM, Marc Zyngier wrote:
>>>>>>>>>> Virtual interrupts mapped to a HW interrupt should only be triggered
>>>>>>>>>> from inside the kernel. Otherwise, you could end up confusing the
>>>>>>>>>> kernel (and the GIC's) state machine.
>>>>>>>>>>
>>>>>>>>>> Rearrange the injection path so that kvm_vgic_inject_irq is
>>>>>>>>>> used for non-mapped interrupts, and kvm_vgic_inject_mapped_irq is
>>>>>>>>>> used for mapped interrupts. The latter should only be called from
>>>>>>>>>> inside the kernel (timer, VFIO).
>>>>>>>>> nit: I would replace VFIO by irqfd.
>>>>>>>>> VFIO just triggers the eventfd/irqfd. This is KVM/irqfd that injects the
>>>>>>>>> virtual irq upon the irqfd signaling and he irqfd adaptation/ARM
>>>>>>>>> currently is implemented in vgic.c
>>>>>>>>
>>>>>>>> Ah, thanks for reminding me of the right terminology, I tend to think of
>>>>>>>> it as one big bag of nasty tricks... ;-)
>>>>>>>>
>>>>>>>> I'll update the commit message.
>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>>>>>>>> ---
>>>>>>>>>>  include/kvm/arm_vgic.h |  2 +
>>>>>>>>>>  virt/kvm/arm/vgic.c    | 99 ++++++++++++++++++++++++++++++++++----------------
>>>>>>>>>>  2 files changed, 70 insertions(+), 31 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>>>>>>>> index 7306b4b..f6bfd79 100644
>>>>>>>>>> --- a/include/kvm/arm_vgic.h
>>>>>>>>>> +++ b/include/kvm/arm_vgic.h
>>>>>>>>>> @@ -351,6 +351,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>>>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>>>>>>>>>>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>>>>>>>>>>  			bool level);
>>>>>>>>>> +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
>>>>>>>>>> +			       struct irq_phys_map *map, bool level);
>>>>>>>>>>  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
>>>>>>>>>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>>>>>>>>>  int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
>>>>>>>>>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>>>>>>>>>> index 3f7b690..e40ef70 100644
>>>>>>>>>> --- a/virt/kvm/arm/vgic.c
>>>>>>>>>> +++ b/virt/kvm/arm/vgic.c
>>>>>>>>>> @@ -1533,7 +1533,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>>  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>>>>>>>>>> -				  unsigned int irq_num, bool level)
>>>>>>>>>> +				   struct irq_phys_map *map,
>>>>>>>>>> +				   unsigned int irq_num, bool level)
>>>>>>>>>>  {
>>>>>>>>> In vgic_update_irq_pending, I needed to modify the following line and
>>>>>>>>> add the "&& !map".
>>>>>>>>>
>>>>>>>>>         if (!vgic_validate_injection(vcpu, irq_num, level) && !map) {
>>>>>>>>>
>>>>>>>>> Without that, the level being not properly modeled for level sensitive
>>>>>>>>> forwarded IRQs, the 2d injection fails.
>>>>>>>>
>>>>>>>> Ah! Is that because we never see the line being reset to zero, and the
>>>>>>>> VGIC still sees the line as pending at the distributor level?
>>>>>>> yes indeed
>>>>>>
>>>>>> Then it is a bigger problem we need to solve, and your solution just
>>>>>> papers over the issue.
>>>>>>
>>>>>> The main problem is that irqfd is essentially an edge-triggered
>>>>>> signalling. Fire and forget. Given that we're dealing with a level
>>>>>> triggered interrupt, we end up with the interrupt still marked as
>>>>>> pending (nobody took the signal down).
>>>> this does not really relate to irqfd: irqfd also comes with the concept
>>>> of resamplefd. in case the IRQ is not forwarded, this is the
>>>> irqfd_resampler_ack function that toggles the IRQ down (eventfd.c). Its
>>>> execution is triggered in vgic_process_maintenance. With forwarding the
>>>> EOI is not trappable anymore so this disappears.
>>>>
>>>>
>>>>>> The usual way to get out of that mess in is to evaluate the state of the
>>>>>> level on EOI. But we can't trap on EOI for a HW interrupt.
>>>>>>
>>>>>> So it raises the question: should we instead consider the HW pending
>>>>>> state instead of the software one for mapped interrupts? It is
>>>>>> expensive, but it feels more correct.
>>>>>>
>>>>> I thought we already covered this at LCA.  For mapped interrupts
>>>>> (forwarded) we should never consider the software pending state, because
>>>>> that state is managed by the hardware.  Or am I confusing concepts here?
>>>>
>>>> Yes we discussed we should bypass most of the SW states.
>>>>
>>>> in my previous integration I proposed a patch "KVM: arm: vgic: fix state
>>>> machine for forwarded IRQ". What's wrong with the below approach?
>>>>
>>>>
>>>>     Fix multiple injection of level sensitive forwarded IRQs.
>>>>     With current code, the second injection fails since the
>>>>     state bitmaps are not reset (process_maintenance is not
>>>>     called anymore).
>>>>
>>>>     New implementation follows those principles:
>>>>     - A forwarded IRQ only can be sampled when it is pending
>>>>     - when queueing the IRQ (programming the LR), the pending state
>>>>       is removed as for edge sensitive IRQs
>>>>     - an injection of a forwarded IRQ is considered always valid since
>>>>       coming from the HW and level always is 1.
>>>>
>>> I don't see anything wrong, and I thought this was what we discussed.
>>
>> I think I just lost track of what we've discussed at LCA. Ignore me.
>>
>> I'll repost the updated first 10 patches tomorrow. Eric, are you willing
>> to take custody of patch 11 as part of one of your series?
> Yes no problem I can take this over if you prefer. I can easily test it.
> May I put it in the irq forwarding series?

That would be its natural location.

Thanks,

	M.
diff mbox

Patch

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7306b4b..f6bfd79 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -351,6 +351,8 @@  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level);
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
+			       struct irq_phys_map *map, bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 3f7b690..e40ef70 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1533,7 +1533,8 @@  static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 }
 
 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
-				  unsigned int irq_num, bool level)
+				   struct irq_phys_map *map,
+				   unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
@@ -1541,6 +1542,9 @@  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 	int enabled;
 	bool ret = true, can_inject = true;
 
+	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+		return -EINVAL;
+
 	spin_lock(&dist->lock);
 
 	vcpu = kvm_get_vcpu(kvm, cpuid);
@@ -1603,14 +1607,42 @@  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 out:
 	spin_unlock(&dist->lock);
 
-	return ret ? cpuid : -EINVAL;
+	if (!ret) {
+		/* kick the specified vcpu */
+		kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
+	}
+
+	return 0;
+}
+
+static int vgic_lazy_init(struct kvm *kvm)
+{
+	int ret = 0;
+
+	if (unlikely(!vgic_initialized(kvm))) {
+		/*
+		 * We only provide the automatic initialization of the VGIC
+		 * for the legacy case of a GICv2. Any other type must
+		 * be explicitly initialized once setup with the respective
+		 * KVM device call.
+		 */
+		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
+			return -EBUSY;
+
+		mutex_lock(&kvm->lock);
+		ret = vgic_init(kvm);
+		mutex_unlock(&kvm->lock);
+	}
+
+	return ret;
 }
 
 /**
  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
  * @kvm:     The VM structure pointer
  * @cpuid:   The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
+ * @irq_num: The IRQ number that is assigned to the device. This IRQ
+ *           must not be mapped to a HW interrupt.
  * @level:   Edge-triggered:  true:  to trigger the interrupt
  *			      false: to ignore the call
  *	     Level-sensitive  true:  activates an interrupt
@@ -1623,39 +1655,44 @@  out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	int ret = 0;
-	int vcpu_id;
-
-	if (unlikely(!vgic_initialized(kvm))) {
-		/*
-		 * We only provide the automatic initialization of the VGIC
-		 * for the legacy case of a GICv2. Any other type must
-		 * be explicitly initialized once setup with the respective
-		 * KVM device call.
-		 */
-		if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
-			ret = -EBUSY;
-			goto out;
-		}
-		mutex_lock(&kvm->lock);
-		ret = vgic_init(kvm);
-		mutex_unlock(&kvm->lock);
+	struct irq_phys_map *map;
+	int ret;
 
-		if (ret)
-			goto out;
-	}
+	ret = vgic_lazy_init(kvm);
+	if (ret)
+		return ret;
 
-	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+	map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
+	if (map)
 		return -EINVAL;
 
-	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
-	if (vcpu_id >= 0) {
-		/* kick the specified vcpu */
-		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
-	}
+	return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
+}
 
-out:
-	return ret;
+/**
+ * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
+ * @kvm:     The VM structure pointer
+ * @cpuid:   The CPU for PPIs
+ * @map:     Pointer to a irq_phys_map structure describing the mapping
+ * @level:   Edge-triggered:  true:  to trigger the interrupt
+ *			      false: to ignore the call
+ *	     Level-sensitive  true:  activates an interrupt
+ *			      false: deactivates an interrupt
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts.  You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
+			       struct irq_phys_map *map, bool level)
+{
+	int ret;
+
+	ret = vgic_lazy_init(kvm);
+	if (ret)
+		return ret;
+
+	return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
 }
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)