diff mbox

[v7,10/17] KVM: arm64: introduce new KVM ITS device

Message ID 20160628123230.26255-11-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara June 28, 2016, 12:32 p.m. UTC
Introduce a new KVM device that represents an ARM Interrupt Translation
Service (ITS) controller. Since there can be multiple of this per guest,
we can't piggy back on the existing GICv3 distributor device, but create
a new type of KVM device.
On the KVM_CREATE_DEVICE ioctl we allocate and initialize the ITS data
structure and store the pointer in the kvm_device data.
Upon an explicit init ioctl from userland (after having setup the MMIO
address) we register the handlers with the kvm_io_bus framework.
Any reference to an ITS thus has to go via this interface.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 Documentation/virtual/kvm/devices/arm-vgic.txt |  25 +++--
 arch/arm/kvm/arm.c                             |   1 +
 arch/arm64/include/uapi/asm/kvm.h              |   2 +
 include/kvm/vgic/vgic.h                        |   1 +
 include/uapi/linux/kvm.h                       |   2 +
 virt/kvm/arm/vgic/vgic-its.c                   | 127 ++++++++++++++++++++++++-
 virt/kvm/arm/vgic/vgic-kvm-device.c            |   7 +-
 virt/kvm/arm/vgic/vgic-mmio-v3.c               |   2 +-
 virt/kvm/arm/vgic/vgic.h                       |   8 ++
 9 files changed, 165 insertions(+), 10 deletions(-)

Comments

Eric Auger July 4, 2016, 9 a.m. UTC | #1
Hi Andre,

On 28/06/2016 14:32, Andre Przywara wrote:
> Introduce a new KVM device that represents an ARM Interrupt Translation
> Service (ITS) controller. Since there can be multiple of this per guest,
> we can't piggy back on the existing GICv3 distributor device, but create
> a new type of KVM device.
> On the KVM_CREATE_DEVICE ioctl we allocate and initialize the ITS data
> structure and store the pointer in the kvm_device data.
> Upon an explicit init ioctl from userland (after having setup the MMIO
> address) we register the handlers with the kvm_io_bus framework.
> Any reference to an ITS thus has to go via this interface.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  Documentation/virtual/kvm/devices/arm-vgic.txt |  25 +++--
>  arch/arm/kvm/arm.c                             |   1 +
>  arch/arm64/include/uapi/asm/kvm.h              |   2 +
>  include/kvm/vgic/vgic.h                        |   1 +
>  include/uapi/linux/kvm.h                       |   2 +
>  virt/kvm/arm/vgic/vgic-its.c                   | 127 ++++++++++++++++++++++++-
>  virt/kvm/arm/vgic/vgic-kvm-device.c            |   7 +-
>  virt/kvm/arm/vgic/vgic-mmio-v3.c               |   2 +-
>  virt/kvm/arm/vgic/vgic.h                       |   8 ++
>  9 files changed, 165 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
> index 59541d4..89182f8 100644
> --- a/Documentation/virtual/kvm/devices/arm-vgic.txt
> +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
> @@ -4,16 +4,22 @@ ARM Virtual Generic Interrupt Controller (VGIC)
>  Device types supported:
>    KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
>    KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
> +  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
>  
> -Only one VGIC instance may be instantiated through either this API or the
> -legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
> -controller, requiring emulated user-space devices to inject interrupts to the
> -VGIC instead of directly to CPUs.
> +Only one VGIC instance of the V2/V3 types above may be instantiated through
> +either this API or the legacy KVM_CREATE_IRQCHIP api.  The created VGIC will
> +act as the VM interrupt controller, requiring emulated user-space devices to
> +inject interrupts to the VGIC instead of directly to CPUs.
>  
>  Creating a guest GICv3 device requires a host GICv3 as well.
>  GICv3 implementations with hardware compatibility support allow a guest GICv2
>  as well.
>  
> +Creating a virtual ITS controller requires a host GICv3 (but does not depend
> +on having physical ITS controllers).
> +There can be multiple ITS controllers per guest, each of them has to have
> +a separate, non-overlapping MMIO region.
> +
>  Groups:
>    KVM_DEV_ARM_VGIC_GRP_ADDR
>    Attributes:
> @@ -39,6 +45,13 @@ Groups:
>        Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
>        This address needs to be 64K aligned.
>  
> +    KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit)
> +      Base address in the guest physical address space of the GICv3 ITS
> +      control register frame. The ITS allows MSI(-X) interrupts to be
> +      injected into guests. This extension is optional. If the kernel
> +      does not support the ITS, the call returns -ENODEV.
> +      Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS.
> +      This address needs to be 64K aligned and the region covers 128K.
>  
>    KVM_DEV_ARM_VGIC_GRP_DIST_REGS
>    Attributes:
> @@ -109,8 +122,8 @@ Groups:
>    KVM_DEV_ARM_VGIC_GRP_CTRL
>    Attributes:
>      KVM_DEV_ARM_VGIC_CTRL_INIT
> -      request the initialization of the VGIC, no additional parameter in
> -      kvm_device_attr.addr.
> +      request the initialization of the VGIC or ITS, no additional parameter
> +      in kvm_device_attr.addr.
>    Errors:
>      -ENXIO: VGIC not properly configured as required prior to calling
>       this attribute
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index a268c85..f4a953e 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -20,6 +20,7 @@
>  #include <linux/errno.h>
>  #include <linux/err.h>
>  #include <linux/kvm_host.h>
> +#include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/vmalloc.h>
>  #include <linux/fs.h>
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index f209ea1..f8c257b 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -87,9 +87,11 @@ struct kvm_regs {
>  /* Supported VGICv3 address types  */
>  #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
>  #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
> +#define KVM_VGIC_ITS_ADDR_TYPE		4
>  
>  #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
>  #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
> +#define KVM_VGIC_V3_ITS_SIZE		SZ_64K
>  
>  #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
>  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> index 949a0e1..8cec203 100644
> --- a/include/kvm/vgic/vgic.h
> +++ b/include/kvm/vgic/vgic.h
> @@ -159,6 +159,7 @@ struct vgic_dist {
>  
>  	struct vgic_io_device	dist_iodev;
>  
> +	bool			has_its;
>  	/*
>  	 * Contains the address of the LPI configuration table.
>  	 * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7de96f5..d8c4c32 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1077,6 +1077,8 @@ enum kvm_device_type {
>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
> +	KVM_DEV_TYPE_ARM_VGIC_ITS,
> +#define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
>  	KVM_DEV_TYPE_MAX,
>  };
>  
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index ab8d244..62d7484 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -21,6 +21,7 @@
>  #include <linux/kvm.h>
>  #include <linux/kvm_host.h>
>  #include <linux/interrupt.h>
> +#include <linux/uaccess.h>
>  
>  #include <linux/irqchip/arm-gic-v3.h>
>  
> @@ -80,7 +81,7 @@ static struct vgic_register_region its_registers[] = {
>  		VGIC_ACCESS_32bit),
>  };
>  
> -int vits_register(struct kvm *kvm, struct vgic_its *its)
> +static int vits_register(struct kvm *kvm, struct vgic_its *its)
>  {
>  	struct vgic_io_device *iodev = &its->iodev;
>  	int ret;
> @@ -98,3 +99,127 @@ int vits_register(struct kvm *kvm, struct vgic_its *its)
>  
>  	return ret;
>  }
> +
> +static int vgic_its_create(struct kvm_device *dev, u32 type)
> +{
> +	struct vgic_its *its;
> +
> +	if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
> +		return -ENODEV;
> +
> +	its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
> +	if (!its)
> +		return -ENOMEM;
> +
> +	its->vgic_its_base = VGIC_ADDR_UNDEF;
> +
> +	dev->kvm->arch.vgic.has_its = true;
> +	its->enabled = false;
> +
> +	dev->private = its;
> +
> +	return 0;
> +}
> +
> +static void vgic_its_destroy(struct kvm_device *kvm_dev)
> +{
> +	struct vgic_its *its = kvm_dev->private;
> +
> +	kfree(its);
> +}
> +
> +static int vgic_its_has_attr(struct kvm_device *dev,
> +			     struct kvm_device_attr *attr)
> +{
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_VGIC_GRP_ADDR:
> +		switch (attr->attr) {
> +		case KVM_VGIC_ITS_ADDR_TYPE:
> +			return 0;
> +		}
> +		break;
> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
> +		switch (attr->attr) {
> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
> +			return 0;
> +		}
> +		break;
> +	}
> +	return -ENXIO;
> +}
> +
> +static int vgic_its_set_attr(struct kvm_device *dev,
> +			     struct kvm_device_attr *attr)
> +{
> +	struct vgic_its *its = dev->private;
> +	int ret;
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> +		unsigned long type = (unsigned long)attr->attr;
> +		u64 addr;
> +
> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
> +			return -ENODEV;
> +
> +		if (copy_from_user(&addr, uaddr, sizeof(addr)))
> +			return -EFAULT;
> +
> +		ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
> +					addr, SZ_64K);
> +		if (ret)
> +			return ret;
> +
> +		its->vgic_its_base = addr;
> +
> +		return 0;
> +	}
> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
> +		switch (attr->attr) {
> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
> +			return vits_register(dev->kvm, its);
This does not look homogeneous with the GICv2/3 code init sequence

on vgic GICv2/v3 KVM_DEV_ARM_VGIC_GRP_CTRL/KVM_DEV_ARM_VGIC_CTRL_INIT
we call vgic_init/kvm_vgic_dist_init/kvm_vgic_vcpu_init.

the kvm_vgic_map_resources was responsible for registering the iodevs.
this was called on kvm_vcpu_first_run_init.

Here for ITS you propose to do the iodev registration on
KVM_DEV_ARM_VGIC_CTRL_INIT

From a QEMU integration point of view this means the init sequence used
for KVM GIC interrupt controllers cannot be reused for ITS and more
importantly this is not straightforward to have the proper sequence
ordering (hence the previously reported case). Why not offering a
similar mechanism?

Thanks

Eric





> +		}
> +		break;
> +	}
> +	return -ENXIO;
> +}
> +
> +static int vgic_its_get_attr(struct kvm_device *dev,
> +			     struct kvm_device_attr *attr)
> +{
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
> +		struct vgic_its *its = dev->private;
> +		u64 addr = its->vgic_its_base;
> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> +		unsigned long type = (unsigned long)attr->attr;
> +
> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
> +			return -ENODEV;
> +
> +		if (copy_to_user(uaddr, &addr, sizeof(addr)))
> +			return -EFAULT;
> +		break;
> +	default:
> +		return -ENXIO;
> +	}
> +	}
> +
> +	return 0;
> +}
> +
> +struct kvm_device_ops kvm_arm_vgic_its_ops = {
> +	.name = "kvm-arm-vgic-its",
> +	.create = vgic_its_create,
> +	.destroy = vgic_its_destroy,
> +	.set_attr = vgic_its_set_attr,
> +	.get_attr = vgic_its_get_attr,
> +	.has_attr = vgic_its_has_attr,
> +};
> +
> +int kvm_vgic_register_its_device(void)
> +{
> +	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
> +				       KVM_DEV_TYPE_ARM_VGIC_ITS);
> +}
> diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
> index 2f24f13..1813f93 100644
> --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
> +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
> @@ -21,8 +21,8 @@
>  
>  /* common helpers */
>  
> -static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
> -			     phys_addr_t addr, phys_addr_t alignment)
> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
> +		      phys_addr_t addr, phys_addr_t alignment)
>  {
>  	if (addr & ~KVM_PHYS_MASK)
>  		return -E2BIG;
> @@ -223,6 +223,9 @@ int kvm_register_vgic_device(unsigned long type)
>  	case KVM_DEV_TYPE_ARM_VGIC_V3:
>  		ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
>  					      KVM_DEV_TYPE_ARM_VGIC_V3);
> +		if (ret)
> +			break;
> +		ret = kvm_vgic_register_its_device();
>  		break;
>  #endif
>  	}
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index 5fcc33a..9bcffa6 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -49,7 +49,7 @@ bool vgic_has_its(struct kvm *kvm)
>  	if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
>  		return false;
>  
> -	return false;
> +	return dist->has_its;
>  }
>  
>  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 31807c1..9dc7207 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -42,6 +42,9 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
>  bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
>  void vgic_kick_vcpus(struct kvm *kvm);
>  
> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
> +		      phys_addr_t addr, phys_addr_t alignment);
> +
>  void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
>  void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
>  void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
> @@ -73,6 +76,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
>  int vgic_v3_map_resources(struct kvm *kvm);
>  int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
>  bool vgic_has_its(struct kvm *kvm);
> +int kvm_vgic_register_its_device(void);
>  #else
>  static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
>  {
> @@ -130,6 +134,10 @@ static inline bool vgic_has_its(struct kvm *kvm)
>  	return false;
>  }
>  
> +static inline int kvm_vgic_register_its_device(void)
> +{
> +	return -ENODEV;
> +}
>  #endif
>  
>  int kvm_register_vgic_device(unsigned long type);
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andre Przywara July 4, 2016, 2:05 p.m. UTC | #2
Hi,

On 04/07/16 10:00, Auger Eric wrote:
> Hi Andre,
> 
> On 28/06/2016 14:32, Andre Przywara wrote:
>> Introduce a new KVM device that represents an ARM Interrupt Translation
>> Service (ITS) controller. Since there can be multiple of this per guest,
>> we can't piggy back on the existing GICv3 distributor device, but create
>> a new type of KVM device.
>> On the KVM_CREATE_DEVICE ioctl we allocate and initialize the ITS data
>> structure and store the pointer in the kvm_device data.
>> Upon an explicit init ioctl from userland (after having setup the MMIO
>> address) we register the handlers with the kvm_io_bus framework.
>> Any reference to an ITS thus has to go via this interface.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  Documentation/virtual/kvm/devices/arm-vgic.txt |  25 +++--
>>  arch/arm/kvm/arm.c                             |   1 +
>>  arch/arm64/include/uapi/asm/kvm.h              |   2 +
>>  include/kvm/vgic/vgic.h                        |   1 +
>>  include/uapi/linux/kvm.h                       |   2 +
>>  virt/kvm/arm/vgic/vgic-its.c                   | 127 ++++++++++++++++++++++++-
>>  virt/kvm/arm/vgic/vgic-kvm-device.c            |   7 +-
>>  virt/kvm/arm/vgic/vgic-mmio-v3.c               |   2 +-
>>  virt/kvm/arm/vgic/vgic.h                       |   8 ++
>>  9 files changed, 165 insertions(+), 10 deletions(-)
>>
>> diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
>> index 59541d4..89182f8 100644
>> --- a/Documentation/virtual/kvm/devices/arm-vgic.txt
>> +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
>> @@ -4,16 +4,22 @@ ARM Virtual Generic Interrupt Controller (VGIC)
>>  Device types supported:
>>    KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
>>    KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
>> +  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
>>  
>> -Only one VGIC instance may be instantiated through either this API or the
>> -legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
>> -controller, requiring emulated user-space devices to inject interrupts to the
>> -VGIC instead of directly to CPUs.
>> +Only one VGIC instance of the V2/V3 types above may be instantiated through
>> +either this API or the legacy KVM_CREATE_IRQCHIP api.  The created VGIC will
>> +act as the VM interrupt controller, requiring emulated user-space devices to
>> +inject interrupts to the VGIC instead of directly to CPUs.
>>  
>>  Creating a guest GICv3 device requires a host GICv3 as well.
>>  GICv3 implementations with hardware compatibility support allow a guest GICv2
>>  as well.
>>  
>> +Creating a virtual ITS controller requires a host GICv3 (but does not depend
>> +on having physical ITS controllers).
>> +There can be multiple ITS controllers per guest, each of them has to have
>> +a separate, non-overlapping MMIO region.
>> +
>>  Groups:
>>    KVM_DEV_ARM_VGIC_GRP_ADDR
>>    Attributes:
>> @@ -39,6 +45,13 @@ Groups:
>>        Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
>>        This address needs to be 64K aligned.
>>  
>> +    KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit)
>> +      Base address in the guest physical address space of the GICv3 ITS
>> +      control register frame. The ITS allows MSI(-X) interrupts to be
>> +      injected into guests. This extension is optional. If the kernel
>> +      does not support the ITS, the call returns -ENODEV.
>> +      Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS.
>> +      This address needs to be 64K aligned and the region covers 128K.
>>  
>>    KVM_DEV_ARM_VGIC_GRP_DIST_REGS
>>    Attributes:
>> @@ -109,8 +122,8 @@ Groups:
>>    KVM_DEV_ARM_VGIC_GRP_CTRL
>>    Attributes:
>>      KVM_DEV_ARM_VGIC_CTRL_INIT
>> -      request the initialization of the VGIC, no additional parameter in
>> -      kvm_device_attr.addr.
>> +      request the initialization of the VGIC or ITS, no additional parameter
>> +      in kvm_device_attr.addr.
>>    Errors:
>>      -ENXIO: VGIC not properly configured as required prior to calling
>>       this attribute
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index a268c85..f4a953e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -20,6 +20,7 @@
>>  #include <linux/errno.h>
>>  #include <linux/err.h>
>>  #include <linux/kvm_host.h>
>> +#include <linux/list.h>
>>  #include <linux/module.h>
>>  #include <linux/vmalloc.h>
>>  #include <linux/fs.h>
>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>> index f209ea1..f8c257b 100644
>> --- a/arch/arm64/include/uapi/asm/kvm.h
>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>> @@ -87,9 +87,11 @@ struct kvm_regs {
>>  /* Supported VGICv3 address types  */
>>  #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
>>  #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
>> +#define KVM_VGIC_ITS_ADDR_TYPE		4
>>  
>>  #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
>>  #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
>> +#define KVM_VGIC_V3_ITS_SIZE		SZ_64K
>>  
>>  #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
>>  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
>> index 949a0e1..8cec203 100644
>> --- a/include/kvm/vgic/vgic.h
>> +++ b/include/kvm/vgic/vgic.h
>> @@ -159,6 +159,7 @@ struct vgic_dist {
>>  
>>  	struct vgic_io_device	dist_iodev;
>>  
>> +	bool			has_its;
>>  	/*
>>  	 * Contains the address of the LPI configuration table.
>>  	 * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 7de96f5..d8c4c32 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -1077,6 +1077,8 @@ enum kvm_device_type {
>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
>> +	KVM_DEV_TYPE_ARM_VGIC_ITS,
>> +#define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
>>  	KVM_DEV_TYPE_MAX,
>>  };
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>> index ab8d244..62d7484 100644
>> --- a/virt/kvm/arm/vgic/vgic-its.c
>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>> @@ -21,6 +21,7 @@
>>  #include <linux/kvm.h>
>>  #include <linux/kvm_host.h>
>>  #include <linux/interrupt.h>
>> +#include <linux/uaccess.h>
>>  
>>  #include <linux/irqchip/arm-gic-v3.h>
>>  
>> @@ -80,7 +81,7 @@ static struct vgic_register_region its_registers[] = {
>>  		VGIC_ACCESS_32bit),
>>  };
>>  
>> -int vits_register(struct kvm *kvm, struct vgic_its *its)
>> +static int vits_register(struct kvm *kvm, struct vgic_its *its)
>>  {
>>  	struct vgic_io_device *iodev = &its->iodev;
>>  	int ret;
>> @@ -98,3 +99,127 @@ int vits_register(struct kvm *kvm, struct vgic_its *its)
>>  
>>  	return ret;
>>  }
>> +
>> +static int vgic_its_create(struct kvm_device *dev, u32 type)
>> +{
>> +	struct vgic_its *its;
>> +
>> +	if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
>> +		return -ENODEV;
>> +
>> +	its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
>> +	if (!its)
>> +		return -ENOMEM;
>> +
>> +	its->vgic_its_base = VGIC_ADDR_UNDEF;
>> +
>> +	dev->kvm->arch.vgic.has_its = true;
>> +	its->enabled = false;
>> +
>> +	dev->private = its;
>> +
>> +	return 0;
>> +}
>> +
>> +static void vgic_its_destroy(struct kvm_device *kvm_dev)
>> +{
>> +	struct vgic_its *its = kvm_dev->private;
>> +
>> +	kfree(its);
>> +}
>> +
>> +static int vgic_its_has_attr(struct kvm_device *dev,
>> +			     struct kvm_device_attr *attr)
>> +{
>> +	switch (attr->group) {
>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR:
>> +		switch (attr->attr) {
>> +		case KVM_VGIC_ITS_ADDR_TYPE:
>> +			return 0;
>> +		}
>> +		break;
>> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
>> +		switch (attr->attr) {
>> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
>> +			return 0;
>> +		}
>> +		break;
>> +	}
>> +	return -ENXIO;
>> +}
>> +
>> +static int vgic_its_set_attr(struct kvm_device *dev,
>> +			     struct kvm_device_attr *attr)
>> +{
>> +	struct vgic_its *its = dev->private;
>> +	int ret;
>> +
>> +	switch (attr->group) {
>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
>> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
>> +		unsigned long type = (unsigned long)attr->attr;
>> +		u64 addr;
>> +
>> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
>> +			return -ENODEV;
>> +
>> +		if (copy_from_user(&addr, uaddr, sizeof(addr)))
>> +			return -EFAULT;
>> +
>> +		ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
>> +					addr, SZ_64K);
>> +		if (ret)
>> +			return ret;
>> +
>> +		its->vgic_its_base = addr;
>> +
>> +		return 0;
>> +	}
>> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
>> +		switch (attr->attr) {
>> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
>> +			return vits_register(dev->kvm, its);
> This does not look homogeneous with the GICv2/3 code init sequence
> 
> on vgic GICv2/v3 KVM_DEV_ARM_VGIC_GRP_CTRL/KVM_DEV_ARM_VGIC_CTRL_INIT
> we call vgic_init/kvm_vgic_dist_init/kvm_vgic_vcpu_init.
> 
> the kvm_vgic_map_resources was responsible for registering the iodevs.
> this was called on kvm_vcpu_first_run_init.

Which I think is something that we do for keeping compatibility with the
older lazy VGIC init sequence only?

> Here for ITS you propose to do the iodev registration on
> KVM_DEV_ARM_VGIC_CTRL_INIT

I think it's more logical to do it then. With CTRL_INIT userland
signalizes that it's done with the setup, so we can setup everything.

> From a QEMU integration point of view this means the init sequence used
> for KVM GIC interrupt controllers cannot be reused for ITS and more
> importantly this is not straightforward to have the proper sequence
> ordering (hence the previously reported case).

I am confused, can you please elaborate what the problem is?
Or alternatively sketch what you ideally would the ITS init sequence to
look like? I am totally open to any changes, just need to know what
you/QEMU needs.

Cheers,
Andre.


> Why not offering a similar mechanism?
> 
> Thanks
> 
> Eric
> 
> 
> 
> 
> 
>> +		}
>> +		break;
>> +	}
>> +	return -ENXIO;
>> +}
>> +
>> +static int vgic_its_get_attr(struct kvm_device *dev,
>> +			     struct kvm_device_attr *attr)
>> +{
>> +	switch (attr->group) {
>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
>> +		struct vgic_its *its = dev->private;
>> +		u64 addr = its->vgic_its_base;
>> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
>> +		unsigned long type = (unsigned long)attr->attr;
>> +
>> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
>> +			return -ENODEV;
>> +
>> +		if (copy_to_user(uaddr, &addr, sizeof(addr)))
>> +			return -EFAULT;
>> +		break;
>> +	default:
>> +		return -ENXIO;
>> +	}
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +struct kvm_device_ops kvm_arm_vgic_its_ops = {
>> +	.name = "kvm-arm-vgic-its",
>> +	.create = vgic_its_create,
>> +	.destroy = vgic_its_destroy,
>> +	.set_attr = vgic_its_set_attr,
>> +	.get_attr = vgic_its_get_attr,
>> +	.has_attr = vgic_its_has_attr,
>> +};
>> +
>> +int kvm_vgic_register_its_device(void)
>> +{
>> +	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
>> +				       KVM_DEV_TYPE_ARM_VGIC_ITS);
>> +}
>> diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
>> index 2f24f13..1813f93 100644
>> --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
>> +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
>> @@ -21,8 +21,8 @@
>>  
>>  /* common helpers */
>>  
>> -static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>> -			     phys_addr_t addr, phys_addr_t alignment)
>> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>> +		      phys_addr_t addr, phys_addr_t alignment)
>>  {
>>  	if (addr & ~KVM_PHYS_MASK)
>>  		return -E2BIG;
>> @@ -223,6 +223,9 @@ int kvm_register_vgic_device(unsigned long type)
>>  	case KVM_DEV_TYPE_ARM_VGIC_V3:
>>  		ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
>>  					      KVM_DEV_TYPE_ARM_VGIC_V3);
>> +		if (ret)
>> +			break;
>> +		ret = kvm_vgic_register_its_device();
>>  		break;
>>  #endif
>>  	}
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> index 5fcc33a..9bcffa6 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> @@ -49,7 +49,7 @@ bool vgic_has_its(struct kvm *kvm)
>>  	if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
>>  		return false;
>>  
>> -	return false;
>> +	return dist->has_its;
>>  }
>>  
>>  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
>> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
>> index 31807c1..9dc7207 100644
>> --- a/virt/kvm/arm/vgic/vgic.h
>> +++ b/virt/kvm/arm/vgic/vgic.h
>> @@ -42,6 +42,9 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
>>  bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
>>  void vgic_kick_vcpus(struct kvm *kvm);
>>  
>> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>> +		      phys_addr_t addr, phys_addr_t alignment);
>> +
>>  void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
>>  void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
>>  void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
>> @@ -73,6 +76,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
>>  int vgic_v3_map_resources(struct kvm *kvm);
>>  int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
>>  bool vgic_has_its(struct kvm *kvm);
>> +int kvm_vgic_register_its_device(void);
>>  #else
>>  static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
>>  {
>> @@ -130,6 +134,10 @@ static inline bool vgic_has_its(struct kvm *kvm)
>>  	return false;
>>  }
>>  
>> +static inline int kvm_vgic_register_its_device(void)
>> +{
>> +	return -ENODEV;
>> +}
>>  #endif
>>  
>>  int kvm_register_vgic_device(unsigned long type);
>>
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger July 4, 2016, 2:27 p.m. UTC | #3
Andre,

On 04/07/2016 16:05, Andre Przywara wrote:
> Hi,
> 
> On 04/07/16 10:00, Auger Eric wrote:
>> Hi Andre,
>>
>> On 28/06/2016 14:32, Andre Przywara wrote:
>>> Introduce a new KVM device that represents an ARM Interrupt Translation
>>> Service (ITS) controller. Since there can be multiple of this per guest,
>>> we can't piggy back on the existing GICv3 distributor device, but create
>>> a new type of KVM device.
>>> On the KVM_CREATE_DEVICE ioctl we allocate and initialize the ITS data
>>> structure and store the pointer in the kvm_device data.
>>> Upon an explicit init ioctl from userland (after having setup the MMIO
>>> address) we register the handlers with the kvm_io_bus framework.
>>> Any reference to an ITS thus has to go via this interface.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  Documentation/virtual/kvm/devices/arm-vgic.txt |  25 +++--
>>>  arch/arm/kvm/arm.c                             |   1 +
>>>  arch/arm64/include/uapi/asm/kvm.h              |   2 +
>>>  include/kvm/vgic/vgic.h                        |   1 +
>>>  include/uapi/linux/kvm.h                       |   2 +
>>>  virt/kvm/arm/vgic/vgic-its.c                   | 127 ++++++++++++++++++++++++-
>>>  virt/kvm/arm/vgic/vgic-kvm-device.c            |   7 +-
>>>  virt/kvm/arm/vgic/vgic-mmio-v3.c               |   2 +-
>>>  virt/kvm/arm/vgic/vgic.h                       |   8 ++
>>>  9 files changed, 165 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
>>> index 59541d4..89182f8 100644
>>> --- a/Documentation/virtual/kvm/devices/arm-vgic.txt
>>> +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
>>> @@ -4,16 +4,22 @@ ARM Virtual Generic Interrupt Controller (VGIC)
>>>  Device types supported:
>>>    KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
>>>    KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
>>> +  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
>>>  
>>> -Only one VGIC instance may be instantiated through either this API or the
>>> -legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
>>> -controller, requiring emulated user-space devices to inject interrupts to the
>>> -VGIC instead of directly to CPUs.
>>> +Only one VGIC instance of the V2/V3 types above may be instantiated through
>>> +either this API or the legacy KVM_CREATE_IRQCHIP api.  The created VGIC will
>>> +act as the VM interrupt controller, requiring emulated user-space devices to
>>> +inject interrupts to the VGIC instead of directly to CPUs.
>>>  
>>>  Creating a guest GICv3 device requires a host GICv3 as well.
>>>  GICv3 implementations with hardware compatibility support allow a guest GICv2
>>>  as well.
>>>  
>>> +Creating a virtual ITS controller requires a host GICv3 (but does not depend
>>> +on having physical ITS controllers).
>>> +There can be multiple ITS controllers per guest, each of them has to have
>>> +a separate, non-overlapping MMIO region.
>>> +
>>>  Groups:
>>>    KVM_DEV_ARM_VGIC_GRP_ADDR
>>>    Attributes:
>>> @@ -39,6 +45,13 @@ Groups:
>>>        Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
>>>        This address needs to be 64K aligned.
>>>  
>>> +    KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit)
>>> +      Base address in the guest physical address space of the GICv3 ITS
>>> +      control register frame. The ITS allows MSI(-X) interrupts to be
>>> +      injected into guests. This extension is optional. If the kernel
>>> +      does not support the ITS, the call returns -ENODEV.
>>> +      Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS.
>>> +      This address needs to be 64K aligned and the region covers 128K.
>>>  
>>>    KVM_DEV_ARM_VGIC_GRP_DIST_REGS
>>>    Attributes:
>>> @@ -109,8 +122,8 @@ Groups:
>>>    KVM_DEV_ARM_VGIC_GRP_CTRL
>>>    Attributes:
>>>      KVM_DEV_ARM_VGIC_CTRL_INIT
>>> -      request the initialization of the VGIC, no additional parameter in
>>> -      kvm_device_attr.addr.
>>> +      request the initialization of the VGIC or ITS, no additional parameter
>>> +      in kvm_device_attr.addr.
>>>    Errors:
>>>      -ENXIO: VGIC not properly configured as required prior to calling
>>>       this attribute
>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>>> index a268c85..f4a953e 100644
>>> --- a/arch/arm/kvm/arm.c
>>> +++ b/arch/arm/kvm/arm.c
>>> @@ -20,6 +20,7 @@
>>>  #include <linux/errno.h>
>>>  #include <linux/err.h>
>>>  #include <linux/kvm_host.h>
>>> +#include <linux/list.h>
>>>  #include <linux/module.h>
>>>  #include <linux/vmalloc.h>
>>>  #include <linux/fs.h>
>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>>> index f209ea1..f8c257b 100644
>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>> @@ -87,9 +87,11 @@ struct kvm_regs {
>>>  /* Supported VGICv3 address types  */
>>>  #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
>>>  #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
>>> +#define KVM_VGIC_ITS_ADDR_TYPE		4
>>>  
>>>  #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
>>>  #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
>>> +#define KVM_VGIC_V3_ITS_SIZE		SZ_64K
>>>  
>>>  #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
>>>  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
>>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
>>> index 949a0e1..8cec203 100644
>>> --- a/include/kvm/vgic/vgic.h
>>> +++ b/include/kvm/vgic/vgic.h
>>> @@ -159,6 +159,7 @@ struct vgic_dist {
>>>  
>>>  	struct vgic_io_device	dist_iodev;
>>>  
>>> +	bool			has_its;
>>>  	/*
>>>  	 * Contains the address of the LPI configuration table.
>>>  	 * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index 7de96f5..d8c4c32 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -1077,6 +1077,8 @@ enum kvm_device_type {
>>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
>>> +	KVM_DEV_TYPE_ARM_VGIC_ITS,
>>> +#define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
>>>  	KVM_DEV_TYPE_MAX,
>>>  };
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>>> index ab8d244..62d7484 100644
>>> --- a/virt/kvm/arm/vgic/vgic-its.c
>>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>>> @@ -21,6 +21,7 @@
>>>  #include <linux/kvm.h>
>>>  #include <linux/kvm_host.h>
>>>  #include <linux/interrupt.h>
>>> +#include <linux/uaccess.h>
>>>  
>>>  #include <linux/irqchip/arm-gic-v3.h>
>>>  
>>> @@ -80,7 +81,7 @@ static struct vgic_register_region its_registers[] = {
>>>  		VGIC_ACCESS_32bit),
>>>  };
>>>  
>>> -int vits_register(struct kvm *kvm, struct vgic_its *its)
>>> +static int vits_register(struct kvm *kvm, struct vgic_its *its)
>>>  {
>>>  	struct vgic_io_device *iodev = &its->iodev;
>>>  	int ret;
>>> @@ -98,3 +99,127 @@ int vits_register(struct kvm *kvm, struct vgic_its *its)
>>>  
>>>  	return ret;
>>>  }
>>> +
>>> +static int vgic_its_create(struct kvm_device *dev, u32 type)
>>> +{
>>> +	struct vgic_its *its;
>>> +
>>> +	if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
>>> +		return -ENODEV;
>>> +
>>> +	its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
>>> +	if (!its)
>>> +		return -ENOMEM;
>>> +
>>> +	its->vgic_its_base = VGIC_ADDR_UNDEF;
>>> +
>>> +	dev->kvm->arch.vgic.has_its = true;
>>> +	its->enabled = false;
>>> +
>>> +	dev->private = its;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void vgic_its_destroy(struct kvm_device *kvm_dev)
>>> +{
>>> +	struct vgic_its *its = kvm_dev->private;
>>> +
>>> +	kfree(its);
>>> +}
>>> +
>>> +static int vgic_its_has_attr(struct kvm_device *dev,
>>> +			     struct kvm_device_attr *attr)
>>> +{
>>> +	switch (attr->group) {
>>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR:
>>> +		switch (attr->attr) {
>>> +		case KVM_VGIC_ITS_ADDR_TYPE:
>>> +			return 0;
>>> +		}
>>> +		break;
>>> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
>>> +		switch (attr->attr) {
>>> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
>>> +			return 0;
>>> +		}
>>> +		break;
>>> +	}
>>> +	return -ENXIO;
>>> +}
>>> +
>>> +static int vgic_its_set_attr(struct kvm_device *dev,
>>> +			     struct kvm_device_attr *attr)
>>> +{
>>> +	struct vgic_its *its = dev->private;
>>> +	int ret;
>>> +
>>> +	switch (attr->group) {
>>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
>>> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
>>> +		unsigned long type = (unsigned long)attr->attr;
>>> +		u64 addr;
>>> +
>>> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
>>> +			return -ENODEV;
>>> +
>>> +		if (copy_from_user(&addr, uaddr, sizeof(addr)))
>>> +			return -EFAULT;
>>> +
>>> +		ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
>>> +					addr, SZ_64K);
>>> +		if (ret)
>>> +			return ret;
>>> +
>>> +		its->vgic_its_base = addr;
>>> +
>>> +		return 0;
>>> +	}
>>> +	case KVM_DEV_ARM_VGIC_GRP_CTRL:
>>> +		switch (attr->attr) {
>>> +		case KVM_DEV_ARM_VGIC_CTRL_INIT:
>>> +			return vits_register(dev->kvm, its);
>> This does not look homogeneous with the GICv2/3 code init sequence
>>
>> on vgic GICv2/v3 KVM_DEV_ARM_VGIC_GRP_CTRL/KVM_DEV_ARM_VGIC_CTRL_INIT
>> we call vgic_init/kvm_vgic_dist_init/kvm_vgic_vcpu_init.
>>
>> the kvm_vgic_map_resources was responsible for registering the iodevs.
>> this was called on kvm_vcpu_first_run_init.
> 
> Which I think is something that we do for keeping compatibility with the
> older lazy VGIC init sequence only?
> 
>> Here for ITS you propose to do the iodev registration on
>> KVM_DEV_ARM_VGIC_CTRL_INIT
> 
> I think it's more logical to do it then. With CTRL_INIT userland
> signalizes that it's done with the setup, so we can setup everything.
> 
>> From a QEMU integration point of view this means the init sequence used
>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>> importantly this is not straightforward to have the proper sequence
>> ordering (hence the previously reported case).
> 
> I am confused, can you please elaborate what the problem is?
> Or alternatively sketch what you ideally would the ITS init sequence to
> look like? I am totally open to any changes, just need to know what
> you/QEMU needs.

in QEMU the address setting is done on a so-called qemu
"machine_init_done_notifier", ie. a callback that is registered at ITS
device init, to be called once the virt machine code has executed. This
callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);

In case the userspace needs to explicitly "init" the ITS (actually ~
map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
also must register a callback in the same way. However there is a
framework existing to register kvm device addresses but this does not
exist to set other attributes than device addresses.

This is feasible I think but this does not fit qemu nicely. So can't the
map_resources happen implicitly on the first VCPU run?

Thanks

Eric

> 
> Cheers,
> Andre.
> 
> 
>> Why not offering a similar mechanism?
>>
>> Thanks
>>
>> Eric
>>
>>
>>
>>
>>
>>> +		}
>>> +		break;
>>> +	}
>>> +	return -ENXIO;
>>> +}
>>> +
>>> +static int vgic_its_get_attr(struct kvm_device *dev,
>>> +			     struct kvm_device_attr *attr)
>>> +{
>>> +	switch (attr->group) {
>>> +	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
>>> +		struct vgic_its *its = dev->private;
>>> +		u64 addr = its->vgic_its_base;
>>> +		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
>>> +		unsigned long type = (unsigned long)attr->attr;
>>> +
>>> +		if (type != KVM_VGIC_ITS_ADDR_TYPE)
>>> +			return -ENODEV;
>>> +
>>> +		if (copy_to_user(uaddr, &addr, sizeof(addr)))
>>> +			return -EFAULT;
>>> +		break;
>>> +	default:
>>> +		return -ENXIO;
>>> +	}
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +struct kvm_device_ops kvm_arm_vgic_its_ops = {
>>> +	.name = "kvm-arm-vgic-its",
>>> +	.create = vgic_its_create,
>>> +	.destroy = vgic_its_destroy,
>>> +	.set_attr = vgic_its_set_attr,
>>> +	.get_attr = vgic_its_get_attr,
>>> +	.has_attr = vgic_its_has_attr,
>>> +};
>>> +
>>> +int kvm_vgic_register_its_device(void)
>>> +{
>>> +	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
>>> +				       KVM_DEV_TYPE_ARM_VGIC_ITS);
>>> +}
>>> diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
>>> index 2f24f13..1813f93 100644
>>> --- a/virt/kvm/arm/vgic/vgic-kvm-device.c
>>> +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
>>> @@ -21,8 +21,8 @@
>>>  
>>>  /* common helpers */
>>>  
>>> -static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>>> -			     phys_addr_t addr, phys_addr_t alignment)
>>> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>>> +		      phys_addr_t addr, phys_addr_t alignment)
>>>  {
>>>  	if (addr & ~KVM_PHYS_MASK)
>>>  		return -E2BIG;
>>> @@ -223,6 +223,9 @@ int kvm_register_vgic_device(unsigned long type)
>>>  	case KVM_DEV_TYPE_ARM_VGIC_V3:
>>>  		ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
>>>  					      KVM_DEV_TYPE_ARM_VGIC_V3);
>>> +		if (ret)
>>> +			break;
>>> +		ret = kvm_vgic_register_its_device();
>>>  		break;
>>>  #endif
>>>  	}
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> index 5fcc33a..9bcffa6 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> @@ -49,7 +49,7 @@ bool vgic_has_its(struct kvm *kvm)
>>>  	if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
>>>  		return false;
>>>  
>>> -	return false;
>>> +	return dist->has_its;
>>>  }
>>>  
>>>  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
>>> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
>>> index 31807c1..9dc7207 100644
>>> --- a/virt/kvm/arm/vgic/vgic.h
>>> +++ b/virt/kvm/arm/vgic/vgic.h
>>> @@ -42,6 +42,9 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
>>>  bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
>>>  void vgic_kick_vcpus(struct kvm *kvm);
>>>  
>>> +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
>>> +		      phys_addr_t addr, phys_addr_t alignment);
>>> +
>>>  void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
>>>  void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
>>>  void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
>>> @@ -73,6 +76,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info);
>>>  int vgic_v3_map_resources(struct kvm *kvm);
>>>  int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
>>>  bool vgic_has_its(struct kvm *kvm);
>>> +int kvm_vgic_register_its_device(void);
>>>  #else
>>>  static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
>>>  {
>>> @@ -130,6 +134,10 @@ static inline bool vgic_has_its(struct kvm *kvm)
>>>  	return false;
>>>  }
>>>  
>>> +static inline int kvm_vgic_register_its_device(void)
>>> +{
>>> +	return -ENODEV;
>>> +}
>>>  #endif
>>>  
>>>  int kvm_register_vgic_device(unsigned long type);
>>>
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Maydell July 4, 2016, 2:32 p.m. UTC | #4
On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
> Andre,
>
> On 04/07/2016 16:05, Andre Przywara wrote:
>> Hi,
>>
>> On 04/07/16 10:00, Auger Eric wrote:
>>> From a QEMU integration point of view this means the init sequence used
>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>> importantly this is not straightforward to have the proper sequence
>>> ordering (hence the previously reported case).
>>
>> I am confused, can you please elaborate what the problem is?
>> Or alternatively sketch what you ideally would the ITS init sequence to
>> look like? I am totally open to any changes, just need to know what
>> you/QEMU needs.
>
> in QEMU the address setting is done on a so-called qemu
> "machine_init_done_notifier", ie. a callback that is registered at ITS
> device init, to be called once the virt machine code has executed. This
> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>
> In case the userspace needs to explicitly "init" the ITS (actually ~
> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
> also must register a callback in the same way. However there is a
> framework existing to register kvm device addresses but this does not
> exist to set other attributes than device addresses.
>
> This is feasible I think but this does not fit qemu nicely. So can't the
> map_resources happen implicitly on the first VCPU run?

I'm not clear what you think the problem here for QEMU is.
We definitely want the API for the kernel to be:
 create device
 set attributes
 explicitly complete init of the device
 [attribute setting after this is illegal]
 run CPUs

so I'm not sure why QEMU would care if the kernel does things at
"final init" rather than "run CPUs".

This is how the GICv3 init works and how the ITS should work too;
we don't want to extend the GICv2 mistake of "no explicit complete
init" to anything else, because then you end up with ad-hoc
"do this when we first run the vCPU; oh, but also do it if
userspace tries to write a register content; and also if...".

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger July 4, 2016, 3 p.m. UTC | #5
Hi Peter,

On 04/07/2016 16:32, Peter Maydell wrote:
> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>> Andre,
>>
>> On 04/07/2016 16:05, Andre Przywara wrote:
>>> Hi,
>>>
>>> On 04/07/16 10:00, Auger Eric wrote:
>>>> From a QEMU integration point of view this means the init sequence used
>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>> importantly this is not straightforward to have the proper sequence
>>>> ordering (hence the previously reported case).
>>>
>>> I am confused, can you please elaborate what the problem is?
>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>> look like? I am totally open to any changes, just need to know what
>>> you/QEMU needs.
>>
>> in QEMU the address setting is done on a so-called qemu
>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>> device init, to be called once the virt machine code has executed. This
>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>
>> In case the userspace needs to explicitly "init" the ITS (actually ~
>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>> also must register a callback in the same way. However there is a
>> framework existing to register kvm device addresses but this does not
>> exist to set other attributes than device addresses.
>>
>> This is feasible I think but this does not fit qemu nicely. So can't the
>> map_resources happen implicitly on the first VCPU run?
> 
> I'm not clear what you think the problem here for QEMU is.
> We definitely want the API for the kernel to be:
>  create device
>  set attributes
>  explicitly complete init of the device
>  [attribute setting after this is illegal]
>  run CPUs
> 
> so I'm not sure why QEMU would care if the kernel does things at
> "final init" rather than "run CPUs".
> 
> This is how the GICv3 init works and how the ITS should work too;
The GICv3 explicit does not do the same as the ITS init.
GICv3 init does not map the resources (KVM iodevice registration). This
is done at 1st VCPU run.
ITS init does map the resources. If we call the ITS init at the same
place as we call the GICv3 init, in the realization function, the region
mapping is not yet done so you will map resources at undefined location.

I am definitively not opposed to call the ITS init function explicitly
from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
another machine_init_done function must be registered and the notifier
must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
ioctl. However you cannot easily master the machine init done notifier
registration order because in target-arm/kvm.c there is a single
notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
(kvm_arm_machine_init_done). So it is not possible to register the ITS
init notifier before the "kvm_arm_set_device_addr" notifier.

So my understanding is one must do things outside of the existing framework?

Hope this clarifies

Thanks

Eric


> we don't want to extend the GICv2 mistake of "no explicit complete
> init" to anything else, because then you end up with ad-hoc
> "do this when we first run the vCPU; oh, but also do it if
> userspace tries to write a register content; and also if...".
> 
> thanks
> -- PMM
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andre Przywara July 4, 2016, 5:40 p.m. UTC | #6
Hi Eric,

On 04/07/16 16:00, Auger Eric wrote:
> Hi Peter,
> 
> On 04/07/2016 16:32, Peter Maydell wrote:
>> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>>> Andre,
>>>
>>> On 04/07/2016 16:05, Andre Przywara wrote:
>>>> Hi,
>>>>
>>>> On 04/07/16 10:00, Auger Eric wrote:
>>>>> From a QEMU integration point of view this means the init sequence used
>>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>>> importantly this is not straightforward to have the proper sequence
>>>>> ordering (hence the previously reported case).
>>>>
>>>> I am confused, can you please elaborate what the problem is?
>>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>>> look like? I am totally open to any changes, just need to know what
>>>> you/QEMU needs.
>>>
>>> in QEMU the address setting is done on a so-called qemu
>>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>>> device init, to be called once the virt machine code has executed. This
>>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>>
>>> In case the userspace needs to explicitly "init" the ITS (actually ~
>>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>>> also must register a callback in the same way. However there is a
>>> framework existing to register kvm device addresses but this does not
>>> exist to set other attributes than device addresses.
>>>
>>> This is feasible I think but this does not fit qemu nicely. So can't the
>>> map_resources happen implicitly on the first VCPU run?
>>
>> I'm not clear what you think the problem here for QEMU is.
>> We definitely want the API for the kernel to be:
>>  create device
>>  set attributes
>>  explicitly complete init of the device
>>  [attribute setting after this is illegal]
>>  run CPUs
>>
>> so I'm not sure why QEMU would care if the kernel does things at
>> "final init" rather than "run CPUs".
>>
>> This is how the GICv3 init works and how the ITS should work too;
> The GICv3 explicit does not do the same as the ITS init.
> GICv3 init does not map the resources (KVM iodevice registration). This
> is done at 1st VCPU run.
> ITS init does map the resources. If we call the ITS init at the same
> place as we call the GICv3 init, in the realization function, the region
> mapping is not yet done so you will map resources at undefined location.

What do you mean with "region mapping"? QEMU's internal mapping?

But you set the GICv3 redist/dist  addresses (or the ITS address, for
that matter) before calling CTRL_INIT, right? So are you concerned that
the kernel "maps" the region before QEMU connects the memory region? Is
that really a problem? This "map_resources" equivalent for the ITS just
creates a kvm_io_bus mapping, which would never fire without either a
guest running (which we clearly don't at this point) or userland
explicitly requesting access (which would require QEMU to have done the
mapping?).

Is that about right or do I miss something again?
Sorry for my ignorance on the QEMU internals in that matter ;-)

> I am definitively not opposed to call the ITS init function explicitly
> from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
> another machine_init_done function must be registered and the notifier
> must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
> ioctl. However you cannot easily master the machine init done notifier
> registration order because in target-arm/kvm.c there is a single
> notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
> (kvm_arm_machine_init_done). So it is not possible to register the ITS
> init notifier before the "kvm_arm_set_device_addr" notifier.
> 
> So my understanding is one must do things outside of the existing framework?

While I am certainly not interested in making QEMU's (or the QEMU patch
author's) life harder than needed, I am wondering if we should really
model the userland/kernel interface according to QEMU's current
framework design.
Is the current approach a leftover of the initial vGICv2 code, that was
just slightly adjusted to support GICv3?

Cheers,
Andre

> 
>> we don't want to extend the GICv2 mistake of "no explicit complete
>> init" to anything else, because then you end up with ad-hoc
>> "do this when we first run the vCPU; oh, but also do it if
>> userspace tries to write a register content; and also if...".
>>
>> thanks
>> -- PMM
>>
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger July 5, 2016, 7:40 a.m. UTC | #7
Hi Andre,
On 04/07/2016 19:40, Andre Przywara wrote:
> Hi Eric,
> 
> On 04/07/16 16:00, Auger Eric wrote:
>> Hi Peter,
>>
>> On 04/07/2016 16:32, Peter Maydell wrote:
>>> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>>>> Andre,
>>>>
>>>> On 04/07/2016 16:05, Andre Przywara wrote:
>>>>> Hi,
>>>>>
>>>>> On 04/07/16 10:00, Auger Eric wrote:
>>>>>> From a QEMU integration point of view this means the init sequence used
>>>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>>>> importantly this is not straightforward to have the proper sequence
>>>>>> ordering (hence the previously reported case).
>>>>>
>>>>> I am confused, can you please elaborate what the problem is?
>>>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>>>> look like? I am totally open to any changes, just need to know what
>>>>> you/QEMU needs.
>>>>
>>>> in QEMU the address setting is done on a so-called qemu
>>>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>>>> device init, to be called once the virt machine code has executed. This
>>>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>>>
>>>> In case the userspace needs to explicitly "init" the ITS (actually ~
>>>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>>>> also must register a callback in the same way. However there is a
>>>> framework existing to register kvm device addresses but this does not
>>>> exist to set other attributes than device addresses.
>>>>
>>>> This is feasible I think but this does not fit qemu nicely. So can't the
>>>> map_resources happen implicitly on the first VCPU run?
>>>
>>> I'm not clear what you think the problem here for QEMU is.
>>> We definitely want the API for the kernel to be:
>>>  create device
>>>  set attributes
>>>  explicitly complete init of the device
>>>  [attribute setting after this is illegal]
>>>  run CPUs
>>>
>>> so I'm not sure why QEMU would care if the kernel does things at
>>> "final init" rather than "run CPUs".
>>>
>>> This is how the GICv3 init works and how the ITS should work too;
>> The GICv3 explicit does not do the same as the ITS init.
>> GICv3 init does not map the resources (KVM iodevice registration). This
>> is done at 1st VCPU run.
>> ITS init does map the resources. If we call the ITS init at the same
>> place as we call the GICv3 init, in the realization function, the region
>> mapping is not yet done so you will map resources at undefined location.
> 
> What do you mean with "region mapping"? QEMU's internal mapping?
1st the device regions are created. 2d they are attached at some place
in the guest memory PA address space. 2d is what I call "region
mapping". and 3d you have the kvm_device_ioctl(kd->dev_fd,
KVM_SET_DEVICE_ATTR, attr) called, setting the ITS base address on
kernel side. This is done on the so-called machine_init_done_notifier.
So if we follow that scheme the explicit INIT must happen after 3).
> 
> But you set the GICv3 redist/dist  addresses (or the ITS address, for
> that matter) before calling CTRL_INIT, right?
No similarly the GICv3 base addresses are provided to the kernel very
late (at the same place as 3, on machine init notifier). for VGIC the
CTRL_INIT is called at device creation 1), after freezing the number of
SPIs. if you look at the vgic_init function that is called upon
CTRL_INIT, it never uses base addresses. It just uses dimensionning
parameters such as the number of SPIs. for VGIC, the map_resources is
called implicitly on first vcpu run, ie. after 3). For ITS with current
patch this needs to be done on a machine_init_done_notifier after 3).

 So are you concerned that
> the kernel "maps" the region before QEMU connects the memory region? Is
> that really a problem? This "map_resources" equivalent for the ITS just
> creates a kvm_io_bus mapping, which would never fire without either a
> guest running (which we clearly don't at this point) or userland
> explicitly requesting access (which would require QEMU to have done the
> mapping?).
> 
> Is that about right or do I miss something again?
> Sorry for my ignorance on the QEMU internals in that matter ;-)
> 
>> I am definitively not opposed to call the ITS init function explicitly
>> from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
>> another machine_init_done function must be registered and the notifier
>> must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
>> ioctl. However you cannot easily master the machine init done notifier
>> registration order because in target-arm/kvm.c there is a single
>> notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
>> (kvm_arm_machine_init_done). So it is not possible to register the ITS
>> init notifier before the "kvm_arm_set_device_addr" notifier.
>>
>> So my understanding is one must do things outside of the existing framework?
> 
> While I am certainly not interested in making QEMU's (or the QEMU patch
> author's) life harder than needed, I am wondering if we should really
> model the userland/kernel interface according to QEMU's current
> framework design.
> Is the current approach a leftover of the initial vGICv2 code, that was
> just slightly adjusted to support GICv3?

Well I don't think we need to devise the kernel API according to the
QEMU framework. However from a kernel pov I wanted to shed the light on
the difference between vgic_init and its_init which are not homogeneous
in terms of actions and map_resources which in one case is called
implicitly and in the other case must be called explicitly, with impact
on qemu framework.

Best Regards

Eric
> 
> Cheers,
> Andre
> 
>>
>>> we don't want to extend the GICv2 mistake of "no explicit complete
>>> init" to anything else, because then you end up with ad-hoc
>>> "do this when we first run the vCPU; oh, but also do it if
>>> userspace tries to write a register content; and also if...".
>>>
>>> thanks
>>> -- PMM
>>>
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger July 5, 2016, 8:34 a.m. UTC | #8
Hi Andre,

On 04/07/2016 19:40, Andre Przywara wrote:
> Hi Eric,
> 
> On 04/07/16 16:00, Auger Eric wrote:
>> Hi Peter,
>>
>> On 04/07/2016 16:32, Peter Maydell wrote:
>>> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>>>> Andre,
>>>>
>>>> On 04/07/2016 16:05, Andre Przywara wrote:
>>>>> Hi,
>>>>>
>>>>> On 04/07/16 10:00, Auger Eric wrote:
>>>>>> From a QEMU integration point of view this means the init sequence used
>>>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>>>> importantly this is not straightforward to have the proper sequence
>>>>>> ordering (hence the previously reported case).
>>>>>
>>>>> I am confused, can you please elaborate what the problem is?
>>>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>>>> look like? I am totally open to any changes, just need to know what
>>>>> you/QEMU needs.
>>>>
>>>> in QEMU the address setting is done on a so-called qemu
>>>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>>>> device init, to be called once the virt machine code has executed. This
>>>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>>>
>>>> In case the userspace needs to explicitly "init" the ITS (actually ~
>>>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>>>> also must register a callback in the same way. However there is a
>>>> framework existing to register kvm device addresses but this does not
>>>> exist to set other attributes than device addresses.
>>>>
>>>> This is feasible I think but this does not fit qemu nicely. So can't the
>>>> map_resources happen implicitly on the first VCPU run?
>>>
>>> I'm not clear what you think the problem here for QEMU is.
>>> We definitely want the API for the kernel to be:
>>>  create device
>>>  set attributes
>>>  explicitly complete init of the device
>>>  [attribute setting after this is illegal]
>>>  run CPUs
>>>
>>> so I'm not sure why QEMU would care if the kernel does things at
>>> "final init" rather than "run CPUs".
>>>
>>> This is how the GICv3 init works and how the ITS should work too;
>> The GICv3 explicit does not do the same as the ITS init.
>> GICv3 init does not map the resources (KVM iodevice registration). This
>> is done at 1st VCPU run.
>> ITS init does map the resources. If we call the ITS init at the same
>> place as we call the GICv3 init, in the realization function, the region
>> mapping is not yet done so you will map resources at undefined location.
> 
> What do you mean with "region mapping"? QEMU's internal mapping?
> 
> But you set the GICv3 redist/dist  addresses (or the ITS address, for
> that matter) before calling CTRL_INIT, right? So are you concerned that
> the kernel "maps" the region before QEMU connects the memory region? Is
> that really a problem? This "map_resources" equivalent for the ITS just
> creates a kvm_io_bus mapping, which would never fire without either a
> guest running (which we clearly don't at this point) or userland
> explicitly requesting access (which would require QEMU to have done the
> mapping?).
> 
> Is that about right or do I miss something again?
> Sorry for my ignorance on the QEMU internals in that matter ;-)
> 
>> I am definitively not opposed to call the ITS init function explicitly
>> from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
>> another machine_init_done function must be registered and the notifier
>> must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
>> ioctl. However you cannot easily master the machine init done notifier
>> registration order because in target-arm/kvm.c there is a single
>> notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
>> (kvm_arm_machine_init_done). So it is not possible to register the ITS
>> init notifier before the "kvm_arm_set_device_addr" notifier.
>>
>> So my understanding is one must do things outside of the existing framework?
> 
> While I am certainly not interested in making QEMU's (or the QEMU patch
> author's) life harder than needed, I am wondering if we should really
> model the userland/kernel interface according to QEMU's current
> framework design.
> Is the current approach a leftover of the initial vGICv2 code, that was
> just slightly adjusted to support GICv3?

I have a solution to workaround the issue on qemu side and I can see the
guest ITS properly initialized now so proceed according to your will &
consensus.

Eric
> 
> Cheers,
> Andre
> 
>>
>>> we don't want to extend the GICv2 mistake of "no explicit complete
>>> init" to anything else, because then you end up with ad-hoc
>>> "do this when we first run the vCPU; oh, but also do it if
>>> userspace tries to write a register content; and also if...".
>>>
>>> thanks
>>> -- PMM
>>>
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andre Przywara July 5, 2016, 8:59 a.m. UTC | #9
Hi Eric,

thank you very much for the elaborate explanation!

On 05/07/16 08:40, Auger Eric wrote:
> Hi Andre,
> On 04/07/2016 19:40, Andre Przywara wrote:
>> Hi Eric,
>>
>> On 04/07/16 16:00, Auger Eric wrote:
>>> Hi Peter,
>>>
>>> On 04/07/2016 16:32, Peter Maydell wrote:
>>>> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>>>>> Andre,
>>>>>
>>>>> On 04/07/2016 16:05, Andre Przywara wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 04/07/16 10:00, Auger Eric wrote:
>>>>>>> From a QEMU integration point of view this means the init sequence used
>>>>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>>>>> importantly this is not straightforward to have the proper sequence
>>>>>>> ordering (hence the previously reported case).
>>>>>>
>>>>>> I am confused, can you please elaborate what the problem is?
>>>>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>>>>> look like? I am totally open to any changes, just need to know what
>>>>>> you/QEMU needs.
>>>>>
>>>>> in QEMU the address setting is done on a so-called qemu
>>>>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>>>>> device init, to be called once the virt machine code has executed. This
>>>>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>>>>
>>>>> In case the userspace needs to explicitly "init" the ITS (actually ~
>>>>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>>>>> also must register a callback in the same way. However there is a
>>>>> framework existing to register kvm device addresses but this does not
>>>>> exist to set other attributes than device addresses.
>>>>>
>>>>> This is feasible I think but this does not fit qemu nicely. So can't the
>>>>> map_resources happen implicitly on the first VCPU run?
>>>>
>>>> I'm not clear what you think the problem here for QEMU is.
>>>> We definitely want the API for the kernel to be:
>>>>  create device
>>>>  set attributes
>>>>  explicitly complete init of the device
>>>>  [attribute setting after this is illegal]
>>>>  run CPUs
>>>>
>>>> so I'm not sure why QEMU would care if the kernel does things at
>>>> "final init" rather than "run CPUs".
>>>>
>>>> This is how the GICv3 init works and how the ITS should work too;
>>> The GICv3 explicit does not do the same as the ITS init.
>>> GICv3 init does not map the resources (KVM iodevice registration). This
>>> is done at 1st VCPU run.
>>> ITS init does map the resources. If we call the ITS init at the same
>>> place as we call the GICv3 init, in the realization function, the region
>>> mapping is not yet done so you will map resources at undefined location.
>>
>> What do you mean with "region mapping"? QEMU's internal mapping?
> 1st the device regions are created. 2d they are attached at some place
> in the guest memory PA address space. 2d is what I call "region
> mapping". and 3d you have the kvm_device_ioctl(kd->dev_fd,
> KVM_SET_DEVICE_ATTR, attr) called, setting the ITS base address on
> kernel side. This is done on the so-called machine_init_done_notifier.
> So if we follow that scheme the explicit INIT must happen after 3).

Ah, OK, so you _do_ the address setup _after_ the INIT.
My understanding of the KVM API was that this isn't allowed, as with the
INIT _everything_ should have been setup. kvmtool works this way.

So we obviously can't change this for GICv3, but I wonder if we should
make this explicit with the ITS:
1) Create the device
2) setup _all_ parameters (address, number of ...)
3) call INIT, any setup calls from here on are denied

That sounds like the proper setup sequence to me.
I don't know about your workaround you just mentioned, but maybe it's
worth to do the GICv2/v3 initialization in the same way? This would
avoid having two paths for ITS and GICv2/v3 setup in the QEMU code.

Sorry if that means more work to you!

>> But you set the GICv3 redist/dist  addresses (or the ITS address, for
>> that matter) before calling CTRL_INIT, right?
> No similarly the GICv3 base addresses are provided to the kernel very
> late (at the same place as 3, on machine init notifier). for VGIC the
> CTRL_INIT is called at device creation 1), after freezing the number of
> SPIs. if you look at the vgic_init function that is called upon
> CTRL_INIT, it never uses base addresses. It just uses dimensionning
> parameters such as the number of SPIs. for VGIC, the map_resources is
> called implicitly on first vcpu run, ie. after 3). For ITS with current
> patch this needs to be done on a machine_init_done_notifier after 3).

I see. Doing the map_resources on the first vcpu run sounds like an
anachronism from the implicit GICv2 init days to me, which we just
couldn't change for compatibility reasons and had no real reason to
change for GICv3.

>  So are you concerned that
>> the kernel "maps" the region before QEMU connects the memory region? Is
>> that really a problem? This "map_resources" equivalent for the ITS just
>> creates a kvm_io_bus mapping, which would never fire without either a
>> guest running (which we clearly don't at this point) or userland
>> explicitly requesting access (which would require QEMU to have done the
>> mapping?).
>>
>> Is that about right or do I miss something again?
>> Sorry for my ignorance on the QEMU internals in that matter ;-)
>>
>>> I am definitively not opposed to call the ITS init function explicitly
>>> from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
>>> another machine_init_done function must be registered and the notifier
>>> must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
>>> ioctl. However you cannot easily master the machine init done notifier
>>> registration order because in target-arm/kvm.c there is a single
>>> notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
>>> (kvm_arm_machine_init_done). So it is not possible to register the ITS
>>> init notifier before the "kvm_arm_set_device_addr" notifier.
>>>
>>> So my understanding is one must do things outside of the existing framework?
>>
>> While I am certainly not interested in making QEMU's (or the QEMU patch
>> author's) life harder than needed, I am wondering if we should really
>> model the userland/kernel interface according to QEMU's current
>> framework design.
>> Is the current approach a leftover of the initial vGICv2 code, that was
>> just slightly adjusted to support GICv3?
> 
> Well I don't think we need to devise the kernel API according to the
> QEMU framework. However from a kernel pov I wanted to shed the light on
> the difference between vgic_init and its_init which are not homogeneous
> in terms of actions and map_resources which in one case is called
> implicitly and in the other case must be called explicitly, with impact
> on qemu framework.

OK, got it. I see that the documentation doesn't demand any setup
activities to be finished upon INIT, though I understood it that way.
I will document the demand for the ITS INIT call.

Let me send out another revision, but still with the current sequence.
If the workaround you mentioned is too involved, I might still change
the kernel.

Cheers,
Andre.

>>>> we don't want to extend the GICv2 mistake of "no explicit complete
>>>> init" to anything else, because then you end up with ad-hoc
>>>> "do this when we first run the vCPU; oh, but also do it if
>>>> userspace tries to write a register content; and also if...".
>>>>
>>>> thanks
>>>> -- PMM
>>>>
>>>
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Auger July 5, 2016, 9:13 a.m. UTC | #10
On 05/07/2016 10:59, Andre Przywara wrote:
> Hi Eric,
> 
> thank you very much for the elaborate explanation!
> 
> On 05/07/16 08:40, Auger Eric wrote:
>> Hi Andre,
>> On 04/07/2016 19:40, Andre Przywara wrote:
>>> Hi Eric,
>>>
>>> On 04/07/16 16:00, Auger Eric wrote:
>>>> Hi Peter,
>>>>
>>>> On 04/07/2016 16:32, Peter Maydell wrote:
>>>>> On 4 July 2016 at 15:27, Auger Eric <eric.auger@redhat.com> wrote:
>>>>>> Andre,
>>>>>>
>>>>>> On 04/07/2016 16:05, Andre Przywara wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> On 04/07/16 10:00, Auger Eric wrote:
>>>>>>>> From a QEMU integration point of view this means the init sequence used
>>>>>>>> for KVM GIC interrupt controllers cannot be reused for ITS and more
>>>>>>>> importantly this is not straightforward to have the proper sequence
>>>>>>>> ordering (hence the previously reported case).
>>>>>>>
>>>>>>> I am confused, can you please elaborate what the problem is?
>>>>>>> Or alternatively sketch what you ideally would the ITS init sequence to
>>>>>>> look like? I am totally open to any changes, just need to know what
>>>>>>> you/QEMU needs.
>>>>>>
>>>>>> in QEMU the address setting is done on a so-called qemu
>>>>>> "machine_init_done_notifier", ie. a callback that is registered at ITS
>>>>>> device init, to be called once the virt machine code has executed. This
>>>>>> callback calls  kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
>>>>>>
>>>>>> In case the userspace needs to explicitly "init" the ITS (actually ~
>>>>>> map_resources) this must happen after the KVM_SET_DEVICE_ATTR. So you
>>>>>> also must register a callback in the same way. However there is a
>>>>>> framework existing to register kvm device addresses but this does not
>>>>>> exist to set other attributes than device addresses.
>>>>>>
>>>>>> This is feasible I think but this does not fit qemu nicely. So can't the
>>>>>> map_resources happen implicitly on the first VCPU run?
>>>>>
>>>>> I'm not clear what you think the problem here for QEMU is.
>>>>> We definitely want the API for the kernel to be:
>>>>>  create device
>>>>>  set attributes
>>>>>  explicitly complete init of the device
>>>>>  [attribute setting after this is illegal]
>>>>>  run CPUs
>>>>>
>>>>> so I'm not sure why QEMU would care if the kernel does things at
>>>>> "final init" rather than "run CPUs".
>>>>>
>>>>> This is how the GICv3 init works and how the ITS should work too;
>>>> The GICv3 explicit does not do the same as the ITS init.
>>>> GICv3 init does not map the resources (KVM iodevice registration). This
>>>> is done at 1st VCPU run.
>>>> ITS init does map the resources. If we call the ITS init at the same
>>>> place as we call the GICv3 init, in the realization function, the region
>>>> mapping is not yet done so you will map resources at undefined location.
>>>
>>> What do you mean with "region mapping"? QEMU's internal mapping?
>> 1st the device regions are created. 2d they are attached at some place
>> in the guest memory PA address space. 2d is what I call "region
>> mapping". and 3d you have the kvm_device_ioctl(kd->dev_fd,
>> KVM_SET_DEVICE_ATTR, attr) called, setting the ITS base address on
>> kernel side. This is done on the so-called machine_init_done_notifier.
>> So if we follow that scheme the explicit INIT must happen after 3).
> 
> Ah, OK, so you _do_ the address setup _after_ the INIT.
> My understanding of the KVM API was that this isn't allowed, as with the
> INIT _everything_ should have been setup. kvmtool works this way.
yep. This is not done that way in QEMU for both gicv2 and gicv3.

Besides the KVM device doc is not that detailed, and I am the one to blame:
    KVM_DEV_ARM_VGIC_CTRL_INIT
      request the initialization of the VGIC or ITS, no additional
parameter in kvm_device_attr.addr.


> 
> So we obviously can't change this for GICv3, but I wonder if we should
> make this explicit with the ITS:
> 1) Create the device
> 2) setup _all_ parameters (address, number of ...)
> 3) call INIT, any setup calls from here on are denied
> 
> That sounds like the proper setup sequence to me.
> I don't know about your workaround you just mentioned, but maybe it's
> worth to do the GICv2/v3 initialization in the same way? This would
> avoid having two paths for ITS and GICv2/v3 setup in the QEMU code.
yes makes sense. Currently I am not using the qemu framework (basically
the  kvm_arm_register_device function that both vgic v2 and v3 are
using) but I am using a custom machine init done callback. Now  I am not
yet 100% sure this will not break something else ;-)

my own callback implements the sequence you describe above.

Cheers

Eric
> 
> Sorry if that means more work to you!
> 
>>> But you set the GICv3 redist/dist  addresses (or the ITS address, for
>>> that matter) before calling CTRL_INIT, right?
>> No similarly the GICv3 base addresses are provided to the kernel very
>> late (at the same place as 3, on machine init notifier). for VGIC the
>> CTRL_INIT is called at device creation 1), after freezing the number of
>> SPIs. if you look at the vgic_init function that is called upon
>> CTRL_INIT, it never uses base addresses. It just uses dimensionning
>> parameters such as the number of SPIs. for VGIC, the map_resources is
>> called implicitly on first vcpu run, ie. after 3). For ITS with current
>> patch this needs to be done on a machine_init_done_notifier after 3).
> 
> I see. Doing the map_resources on the first vcpu run sounds like an
> anachronism from the implicit GICv2 init days to me, which we just
> couldn't change for compatibility reasons and had no real reason to
> change for GICv3.
> 
>>  So are you concerned that
>>> the kernel "maps" the region before QEMU connects the memory region? Is
>>> that really a problem? This "map_resources" equivalent for the ITS just
>>> creates a kvm_io_bus mapping, which would never fire without either a
>>> guest running (which we clearly don't at this point) or userland
>>> explicitly requesting access (which would require QEMU to have done the
>>> mapping?).
>>>
>>> Is that about right or do I miss something again?
>>> Sorry for my ignorance on the QEMU internals in that matter ;-)
>>>
>>>> I am definitively not opposed to call the ITS init function explicitly
>>>> from user side but this must happen after the KVM_SET_DEVICE_ATTR. So
>>>> another machine_init_done function must be registered and the notifier
>>>> must be called AFTER the notifier that calls the KVM_SET_DEVICE_ATTR
>>>> ioctl. However you cannot easily master the machine init done notifier
>>>> registration order because in target-arm/kvm.c there is a single
>>>> notifier that calls all the KVM_SET_DEVICE_ATTR for all the KVM devices
>>>> (kvm_arm_machine_init_done). So it is not possible to register the ITS
>>>> init notifier before the "kvm_arm_set_device_addr" notifier.
>>>>
>>>> So my understanding is one must do things outside of the existing framework?
>>>
>>> While I am certainly not interested in making QEMU's (or the QEMU patch
>>> author's) life harder than needed, I am wondering if we should really
>>> model the userland/kernel interface according to QEMU's current
>>> framework design.
>>> Is the current approach a leftover of the initial vGICv2 code, that was
>>> just slightly adjusted to support GICv3?
>>
>> Well I don't think we need to devise the kernel API according to the
>> QEMU framework. However from a kernel pov I wanted to shed the light on
>> the difference between vgic_init and its_init which are not homogeneous
>> in terms of actions and map_resources which in one case is called
>> implicitly and in the other case must be called explicitly, with impact
>> on qemu framework.
> 
> OK, got it. I see that the documentation doesn't demand any setup
> activities to be finished upon INIT, though I understood it that way.
> I will document the demand for the ITS INIT call.
> 
> Let me send out another revision, but still with the current sequence.
> If the workaround you mentioned is too involved, I might still change
> the kernel.
> 
> Cheers,
> Andre.
> 
>>>>> we don't want to extend the GICv2 mistake of "no explicit complete
>>>>> init" to anything else, because then you end up with ad-hoc
>>>>> "do this when we first run the vCPU; oh, but also do it if
>>>>> userspace tries to write a register content; and also if...".
>>>>>
>>>>> thanks
>>>>> -- PMM
>>>>>
>>>>
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Peter Maydell July 5, 2016, 9:55 a.m. UTC | #11
On 5 July 2016 at 09:59, Andre Przywara <andre.przywara@arm.com> wrote:
> Ah, OK, so you _do_ the address setup _after_ the INIT.
> My understanding of the KVM API was that this isn't allowed, as with the
> INIT _everything_ should have been setup. kvmtool works this way.
>
> So we obviously can't change this for GICv3, but I wonder if we should
> make this explicit with the ITS:
> 1) Create the device
> 2) setup _all_ parameters (address, number of ...)
> 3) call INIT, any setup calls from here on are denied
>
> That sounds like the proper setup sequence to me.

So we shouldn't necessarily let QEMU drive the kernel API
design here, but:

(1) consistency with GICv2-with-explicit-init and GICv3
seems worth preserving
(2) there is a coherent model here which maps onto QEMU's
device model:

 (i) create device
 (ii) configure device
 (iii) explicitly finish init of device (in QEMU's
   object model this is called "realize")
 (iv) wire completed device into the system, by
   plugging in its IRQs, mapping its memory regions
   into the address space, etc
 (v) at this point it is ok to either read/write registers
   or to start vcpus

Which I think is why "set register base addresses"
looks like an odd-one-out from the kernel's point of view.

So I think for the ITS we should continue to do the same
thing we do for GICv2 and v3 (but not the GICv2
"explicit completion of init is optional" part).

I'd forgotten exactly how this worked; sorry for any
confusion in my earlier email.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 59541d4..89182f8 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -4,16 +4,22 @@  ARM Virtual Generic Interrupt Controller (VGIC)
 Device types supported:
   KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
   KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
+  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
 
-Only one VGIC instance may be instantiated through either this API or the
-legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
-controller, requiring emulated user-space devices to inject interrupts to the
-VGIC instead of directly to CPUs.
+Only one VGIC instance of the V2/V3 types above may be instantiated through
+either this API or the legacy KVM_CREATE_IRQCHIP api.  The created VGIC will
+act as the VM interrupt controller, requiring emulated user-space devices to
+inject interrupts to the VGIC instead of directly to CPUs.
 
 Creating a guest GICv3 device requires a host GICv3 as well.
 GICv3 implementations with hardware compatibility support allow a guest GICv2
 as well.
 
+Creating a virtual ITS controller requires a host GICv3 (but does not depend
+on having physical ITS controllers).
+There can be multiple ITS controllers per guest, each of them has to have
+a separate, non-overlapping MMIO region.
+
 Groups:
   KVM_DEV_ARM_VGIC_GRP_ADDR
   Attributes:
@@ -39,6 +45,13 @@  Groups:
       Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
       This address needs to be 64K aligned.
 
+    KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit)
+      Base address in the guest physical address space of the GICv3 ITS
+      control register frame. The ITS allows MSI(-X) interrupts to be
+      injected into guests. This extension is optional. If the kernel
+      does not support the ITS, the call returns -ENODEV.
+      Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS.
+      This address needs to be 64K aligned and the region covers 128K.
 
   KVM_DEV_ARM_VGIC_GRP_DIST_REGS
   Attributes:
@@ -109,8 +122,8 @@  Groups:
   KVM_DEV_ARM_VGIC_GRP_CTRL
   Attributes:
     KVM_DEV_ARM_VGIC_CTRL_INIT
-      request the initialization of the VGIC, no additional parameter in
-      kvm_device_attr.addr.
+      request the initialization of the VGIC or ITS, no additional parameter
+      in kvm_device_attr.addr.
   Errors:
     -ENXIO: VGIC not properly configured as required prior to calling
      this attribute
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a268c85..f4a953e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -20,6 +20,7 @@ 
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f209ea1..f8c257b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@  struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		SZ_64K
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
index 949a0e1..8cec203 100644
--- a/include/kvm/vgic/vgic.h
+++ b/include/kvm/vgic/vgic.h
@@ -159,6 +159,7 @@  struct vgic_dist {
 
 	struct vgic_io_device	dist_iodev;
 
+	bool			has_its;
 	/*
 	 * Contains the address of the LPI configuration table.
 	 * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7de96f5..d8c4c32 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1077,6 +1077,8 @@  enum kvm_device_type {
 #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
 	KVM_DEV_TYPE_ARM_VGIC_V3,
 #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
+	KVM_DEV_TYPE_ARM_VGIC_ITS,
+#define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
 	KVM_DEV_TYPE_MAX,
 };
 
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index ab8d244..62d7484 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -21,6 +21,7 @@ 
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
+#include <linux/uaccess.h>
 
 #include <linux/irqchip/arm-gic-v3.h>
 
@@ -80,7 +81,7 @@  static struct vgic_register_region its_registers[] = {
 		VGIC_ACCESS_32bit),
 };
 
-int vits_register(struct kvm *kvm, struct vgic_its *its)
+static int vits_register(struct kvm *kvm, struct vgic_its *its)
 {
 	struct vgic_io_device *iodev = &its->iodev;
 	int ret;
@@ -98,3 +99,127 @@  int vits_register(struct kvm *kvm, struct vgic_its *its)
 
 	return ret;
 }
+
+static int vgic_its_create(struct kvm_device *dev, u32 type)
+{
+	struct vgic_its *its;
+
+	if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
+		return -ENODEV;
+
+	its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
+	if (!its)
+		return -ENOMEM;
+
+	its->vgic_its_base = VGIC_ADDR_UNDEF;
+
+	dev->kvm->arch.vgic.has_its = true;
+	its->enabled = false;
+
+	dev->private = its;
+
+	return 0;
+}
+
+static void vgic_its_destroy(struct kvm_device *kvm_dev)
+{
+	struct vgic_its *its = kvm_dev->private;
+
+	kfree(its);
+}
+
+static int vgic_its_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+		switch (attr->attr) {
+		case KVM_VGIC_ITS_ADDR_TYPE:
+			return 0;
+		}
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return 0;
+		}
+		break;
+	}
+	return -ENXIO;
+}
+
+static int vgic_its_set_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	struct vgic_its *its = dev->private;
+	int ret;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		unsigned long type = (unsigned long)attr->attr;
+		u64 addr;
+
+		if (type != KVM_VGIC_ITS_ADDR_TYPE)
+			return -ENODEV;
+
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
+					addr, SZ_64K);
+		if (ret)
+			return ret;
+
+		its->vgic_its_base = addr;
+
+		return 0;
+	}
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return vits_register(dev->kvm, its);
+		}
+		break;
+	}
+	return -ENXIO;
+}
+
+static int vgic_its_get_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		struct vgic_its *its = dev->private;
+		u64 addr = its->vgic_its_base;
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		if (type != KVM_VGIC_ITS_ADDR_TYPE)
+			return -ENODEV;
+
+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
+			return -EFAULT;
+		break;
+	default:
+		return -ENXIO;
+	}
+	}
+
+	return 0;
+}
+
+struct kvm_device_ops kvm_arm_vgic_its_ops = {
+	.name = "kvm-arm-vgic-its",
+	.create = vgic_its_create,
+	.destroy = vgic_its_destroy,
+	.set_attr = vgic_its_set_attr,
+	.get_attr = vgic_its_get_attr,
+	.has_attr = vgic_its_has_attr,
+};
+
+int kvm_vgic_register_its_device(void)
+{
+	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_ITS);
+}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index 2f24f13..1813f93 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -21,8 +21,8 @@ 
 
 /* common helpers */
 
-static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
-			     phys_addr_t addr, phys_addr_t alignment)
+int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+		      phys_addr_t addr, phys_addr_t alignment)
 {
 	if (addr & ~KVM_PHYS_MASK)
 		return -E2BIG;
@@ -223,6 +223,9 @@  int kvm_register_vgic_device(unsigned long type)
 	case KVM_DEV_TYPE_ARM_VGIC_V3:
 		ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
 					      KVM_DEV_TYPE_ARM_VGIC_V3);
+		if (ret)
+			break;
+		ret = kvm_vgic_register_its_device();
 		break;
 #endif
 	}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 5fcc33a..9bcffa6 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -49,7 +49,7 @@  bool vgic_has_its(struct kvm *kvm)
 	if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
 		return false;
 
-	return false;
+	return dist->has_its;
 }
 
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 31807c1..9dc7207 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -42,6 +42,9 @@  void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
 void vgic_kick_vcpus(struct kvm *kvm);
 
+int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+		      phys_addr_t addr, phys_addr_t alignment);
+
 void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
 void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
@@ -73,6 +76,7 @@  int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
 bool vgic_has_its(struct kvm *kvm);
+int kvm_vgic_register_its_device(void);
 #else
 static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
 {
@@ -130,6 +134,10 @@  static inline bool vgic_has_its(struct kvm *kvm)
 	return false;
 }
 
+static inline int kvm_vgic_register_its_device(void)
+{
+	return -ENODEV;
+}
 #endif
 
 int kvm_register_vgic_device(unsigned long type);