diff mbox

[v7,19/19] KVM: ARM64: Add a new kvm ARM PMU device

Message ID 1450169379-12336-20-git-send-email-zhaoshenglong@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shannon Zhao Dec. 15, 2015, 8:49 a.m. UTC
From: Shannon Zhao <shannon.zhao@linaro.org>

Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
the kvm_device_ops for it.

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
---
 Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
 arch/arm64/include/uapi/asm/kvm.h             |   3 +
 include/linux/kvm_host.h                      |   1 +
 include/uapi/linux/kvm.h                      |   2 +
 virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
 virt/kvm/kvm_main.c                           |   4 +
 6 files changed, 141 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt

Comments

Marc Zyngier Dec. 15, 2015, 3:33 p.m. UTC | #1
On 15/12/15 08:49, Shannon Zhao wrote:
> From: Shannon Zhao <shannon.zhao@linaro.org>
> 
> Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
> the kvm_device_ops for it.
> 
> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
> ---
>  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
>  arch/arm64/include/uapi/asm/kvm.h             |   3 +
>  include/linux/kvm_host.h                      |   1 +
>  include/uapi/linux/kvm.h                      |   2 +
>  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c                           |   4 +
>  6 files changed, 141 insertions(+)
>  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
> 
> diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
> new file mode 100644
> index 0000000..5121f1f
> --- /dev/null
> +++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
> @@ -0,0 +1,16 @@
> +ARM Virtual Performance Monitor Unit (vPMU)
> +===========================================
> +
> +Device types supported:
> +  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> +
> +Instantiate one PMU instance for per VCPU through this API.
> +
> +Groups:
> +  KVM_DEV_ARM_PMU_GRP_IRQ
> +  Attributes:
> +    A value describing the interrupt number of PMU overflow interrupt. This
> +    interrupt should be a PPI.
> +
> +  Errors:
> +    -EINVAL: Value set is out of the expected range (from 16 to 31)
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index 2d4ca4b..568afa2 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
>  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
>  
> +/* Device Control API: ARM PMU */
> +#define KVM_DEV_ARM_PMU_GRP_IRQ		0
> +
>  /* KVM_IRQ_LINE irq field index values */
>  #define KVM_ARM_IRQ_TYPE_SHIFT		24
>  #define KVM_ARM_IRQ_TYPE_MASK		0xff
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index c923350..608dea6 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
>  extern struct kvm_device_ops kvm_xics_ops;
>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
> +extern struct kvm_device_ops kvm_arm_pmu_ops;
>  
>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>  
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 03f3618..4ba6fdd 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1032,6 +1032,8 @@ enum kvm_device_type {
>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
> +	KVM_DEV_TYPE_ARM_PMU_V3,
> +#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
>  	KVM_DEV_TYPE_MAX,
>  };
>  
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index d113ee4..1965d0d 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -19,6 +19,7 @@
>  #include <linux/kvm.h>
>  #include <linux/kvm_host.h>
>  #include <linux/perf_event.h>
> +#include <linux/uaccess.h>
>  #include <asm/kvm_emulate.h>
>  #include <kvm/arm_pmu.h>
>  #include <kvm/arm_vgic.h>
> @@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
>  
>  	pmc->perf_event = event;
>  }
> +
> +static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.pmu.irq_num != -1;
> +}
> +
> +static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
> +{
> +	int j;
> +	struct kvm_vcpu *vcpu;
> +
> +	kvm_for_each_vcpu(j, vcpu, kvm) {
> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +
> +		if (!is_set) {
> +			if (!kvm_arm_pmu_initialized(vcpu))
> +				return -EBUSY;

Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
anyway. Actually, why would you return an error in this case?

> +
> +			*irq = pmu->irq_num;
> +			break;
> +		}
> +
> +		if (kvm_arm_pmu_initialized(vcpu))
> +			return -EBUSY;
> +
> +		kvm_debug("Set kvm ARM PMU irq: %d\n", *irq);
> +		pmu->irq_num = *irq;
> +	}
> +
> +	return 0;
> +}
> +
> +static int kvm_arm_pmu_create(struct kvm_device *dev, u32 type)
> +{
> +	int i;
> +	struct kvm_vcpu *vcpu;
> +	struct kvm *kvm = dev->kvm;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +
> +		memset(pmu, 0, sizeof(*pmu));
> +		kvm_pmu_vcpu_reset(vcpu);
> +		pmu->irq_num = -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static void kvm_arm_pmu_destroy(struct kvm_device *dev)
> +{
> +	kfree(dev);
> +}
> +
> +static int kvm_arm_pmu_set_attr(struct kvm_device *dev,
> +				struct kvm_device_attr *attr)
> +{
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_PMU_GRP_IRQ: {
> +		int __user *uaddr = (int __user *)(long)attr->addr;
> +		int reg;
> +
> +		if (get_user(reg, uaddr))
> +			return -EFAULT;
> +
> +		if (reg < VGIC_NR_SGIS || reg >= VGIC_NR_PRIVATE_IRQS)
> +			return -EINVAL;
> +
> +		return kvm_arm_pmu_irq_access(dev->kvm, &reg, true);
> +	}
> +	}
> +
> +	return -ENXIO;
> +}
> +
> +static int kvm_arm_pmu_get_attr(struct kvm_device *dev,
> +				struct kvm_device_attr *attr)
> +{
> +	int ret;
> +
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_PMU_GRP_IRQ: {
> +		int __user *uaddr = (int __user *)(long)attr->addr;
> +		int reg = -1;
> +
> +		ret = kvm_arm_pmu_irq_access(dev->kvm, &reg, false);
> +		if (ret)
> +			return ret;
> +		return put_user(reg, uaddr);
> +	}
> +	}
> +
> +	return -ENXIO;
> +}
> +
> +static int kvm_arm_pmu_has_attr(struct kvm_device *dev,
> +				struct kvm_device_attr *attr)
> +{
> +	switch (attr->group) {
> +	case KVM_DEV_ARM_PMU_GRP_IRQ:
> +		return 0;
> +	}
> +
> +	return -ENXIO;
> +}
> +
> +struct kvm_device_ops kvm_arm_pmu_ops = {
> +	.name = "kvm-arm-pmu",
> +	.create = kvm_arm_pmu_create,
> +	.destroy = kvm_arm_pmu_destroy,
> +	.set_attr = kvm_arm_pmu_set_attr,
> +	.get_attr = kvm_arm_pmu_get_attr,
> +	.has_attr = kvm_arm_pmu_has_attr,
> +};
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 484079e..81a42cc 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2647,6 +2647,10 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
>  #ifdef CONFIG_KVM_XICS
>  	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
>  #endif
> +
> +#ifdef CONFIG_KVM_ARM_PMU
> +	[KVM_DEV_TYPE_ARM_PMU_V3]	= &kvm_arm_pmu_ops,
> +#endif
>  };
>  
>  int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
> 

Thanks,

	M.
Shannon Zhao Dec. 15, 2015, 3:50 p.m. UTC | #2
On 2015/12/15 23:33, Marc Zyngier wrote:
> On 15/12/15 08:49, Shannon Zhao wrote:
>> >From: Shannon Zhao<shannon.zhao@linaro.org>
>> >
>> >Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
>> >the kvm_device_ops for it.
>> >
>> >Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
>> >---
>> >  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
>> >  arch/arm64/include/uapi/asm/kvm.h             |   3 +
>> >  include/linux/kvm_host.h                      |   1 +
>> >  include/uapi/linux/kvm.h                      |   2 +
>> >  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
>> >  virt/kvm/kvm_main.c                           |   4 +
>> >  6 files changed, 141 insertions(+)
>> >  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
>> >
>> >diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
>> >new file mode 100644
>> >index 0000000..5121f1f
>> >--- /dev/null
>> >+++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
>> >@@ -0,0 +1,16 @@
>> >+ARM Virtual Performance Monitor Unit (vPMU)
>> >+===========================================
>> >+
>> >+Device types supported:
>> >+  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>> >+
>> >+Instantiate one PMU instance for per VCPU through this API.
>> >+
>> >+Groups:
>> >+  KVM_DEV_ARM_PMU_GRP_IRQ
>> >+  Attributes:
>> >+    A value describing the interrupt number of PMU overflow interrupt. This
>> >+    interrupt should be a PPI.
>> >+
>> >+  Errors:
>> >+    -EINVAL: Value set is out of the expected range (from 16 to 31)
>> >diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>> >index 2d4ca4b..568afa2 100644
>> >--- a/arch/arm64/include/uapi/asm/kvm.h
>> >+++ b/arch/arm64/include/uapi/asm/kvm.h
>> >@@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
>> >  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
>> >  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
>> >
>> >+/* Device Control API: ARM PMU */
>> >+#define KVM_DEV_ARM_PMU_GRP_IRQ		0
>> >+
>> >  /* KVM_IRQ_LINE irq field index values */
>> >  #define KVM_ARM_IRQ_TYPE_SHIFT		24
>> >  #define KVM_ARM_IRQ_TYPE_MASK		0xff
>> >diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> >index c923350..608dea6 100644
>> >--- a/include/linux/kvm_host.h
>> >+++ b/include/linux/kvm_host.h
>> >@@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
>> >  extern struct kvm_device_ops kvm_xics_ops;
>> >  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>> >  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
>> >+extern struct kvm_device_ops kvm_arm_pmu_ops;
>> >
>> >  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>> >
>> >diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> >index 03f3618..4ba6fdd 100644
>> >--- a/include/uapi/linux/kvm.h
>> >+++ b/include/uapi/linux/kvm.h
>> >@@ -1032,6 +1032,8 @@ enum kvm_device_type {
>> >  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>> >  	KVM_DEV_TYPE_ARM_VGIC_V3,
>> >  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
>> >+	KVM_DEV_TYPE_ARM_PMU_V3,
>> >+#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
>> >  	KVM_DEV_TYPE_MAX,
>> >  };
>> >
>> >diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>> >index d113ee4..1965d0d 100644
>> >--- a/virt/kvm/arm/pmu.c
>> >+++ b/virt/kvm/arm/pmu.c
>> >@@ -19,6 +19,7 @@
>> >  #include <linux/kvm.h>
>> >  #include <linux/kvm_host.h>
>> >  #include <linux/perf_event.h>
>> >+#include <linux/uaccess.h>
>> >  #include <asm/kvm_emulate.h>
>> >  #include <kvm/arm_pmu.h>
>> >  #include <kvm/arm_vgic.h>
>> >@@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
>> >
>> >  	pmc->perf_event = event;
>> >  }
>> >+
>> >+static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
>> >+{
>> >+	return vcpu->arch.pmu.irq_num != -1;
>> >+}
>> >+
>> >+static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
>> >+{
>> >+	int j;
>> >+	struct kvm_vcpu *vcpu;
>> >+
>> >+	kvm_for_each_vcpu(j, vcpu, kvm) {
>> >+		struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> >+
>> >+		if (!is_set) {
>> >+			if (!kvm_arm_pmu_initialized(vcpu))
>> >+				return -EBUSY;
> Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
> anyway. Actually, why would you return an error in this case?
>
While this is a unexpected operation from user space and it's already 
initialized and working, so I think it should return an error to user 
and tell user that it's already initialized and working (this should 
mean "busy" ?).
Marc Zyngier Dec. 15, 2015, 3:59 p.m. UTC | #3
On 15/12/15 15:50, Shannon Zhao wrote:
> 
> 
> On 2015/12/15 23:33, Marc Zyngier wrote:
>> On 15/12/15 08:49, Shannon Zhao wrote:
>>>> From: Shannon Zhao<shannon.zhao@linaro.org>
>>>>
>>>> Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
>>>> the kvm_device_ops for it.
>>>>
>>>> Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
>>>> ---
>>>>  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
>>>>  arch/arm64/include/uapi/asm/kvm.h             |   3 +
>>>>  include/linux/kvm_host.h                      |   1 +
>>>>  include/uapi/linux/kvm.h                      |   2 +
>>>>  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
>>>>  virt/kvm/kvm_main.c                           |   4 +
>>>>  6 files changed, 141 insertions(+)
>>>>  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
>>>>
>>>> diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
>>>> new file mode 100644
>>>> index 0000000..5121f1f
>>>> --- /dev/null
>>>> +++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
>>>> @@ -0,0 +1,16 @@
>>>> +ARM Virtual Performance Monitor Unit (vPMU)
>>>> +===========================================
>>>> +
>>>> +Device types supported:
>>>> +  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>> +
>>>> +Instantiate one PMU instance for per VCPU through this API.
>>>> +
>>>> +Groups:
>>>> +  KVM_DEV_ARM_PMU_GRP_IRQ
>>>> +  Attributes:
>>>> +    A value describing the interrupt number of PMU overflow interrupt. This
>>>> +    interrupt should be a PPI.
>>>> +
>>>> +  Errors:
>>>> +    -EINVAL: Value set is out of the expected range (from 16 to 31)
>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>>>> index 2d4ca4b..568afa2 100644
>>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>> @@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
>>>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
>>>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
>>>>
>>>> +/* Device Control API: ARM PMU */
>>>> +#define KVM_DEV_ARM_PMU_GRP_IRQ		0
>>>> +
>>>>  /* KVM_IRQ_LINE irq field index values */
>>>>  #define KVM_ARM_IRQ_TYPE_SHIFT		24
>>>>  #define KVM_ARM_IRQ_TYPE_MASK		0xff
>>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>> index c923350..608dea6 100644
>>>> --- a/include/linux/kvm_host.h
>>>> +++ b/include/linux/kvm_host.h
>>>> @@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
>>>>  extern struct kvm_device_ops kvm_xics_ops;
>>>>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>>>>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
>>>> +extern struct kvm_device_ops kvm_arm_pmu_ops;
>>>>
>>>>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>>>>
>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>> index 03f3618..4ba6fdd 100644
>>>> --- a/include/uapi/linux/kvm.h
>>>> +++ b/include/uapi/linux/kvm.h
>>>> @@ -1032,6 +1032,8 @@ enum kvm_device_type {
>>>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>>>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>>>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
>>>> +	KVM_DEV_TYPE_ARM_PMU_V3,
>>>> +#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
>>>>  	KVM_DEV_TYPE_MAX,
>>>>  };
>>>>
>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>> index d113ee4..1965d0d 100644
>>>> --- a/virt/kvm/arm/pmu.c
>>>> +++ b/virt/kvm/arm/pmu.c
>>>> @@ -19,6 +19,7 @@
>>>>  #include <linux/kvm.h>
>>>>  #include <linux/kvm_host.h>
>>>>  #include <linux/perf_event.h>
>>>> +#include <linux/uaccess.h>
>>>>  #include <asm/kvm_emulate.h>
>>>>  #include <kvm/arm_pmu.h>
>>>>  #include <kvm/arm_vgic.h>
>>>> @@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
>>>>
>>>>  	pmc->perf_event = event;
>>>>  }
>>>> +
>>>> +static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	return vcpu->arch.pmu.irq_num != -1;
>>>> +}
>>>> +
>>>> +static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
>>>> +{
>>>> +	int j;
>>>> +	struct kvm_vcpu *vcpu;
>>>> +
>>>> +	kvm_for_each_vcpu(j, vcpu, kvm) {
>>>> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
>>>> +
>>>> +		if (!is_set) {
>>>> +			if (!kvm_arm_pmu_initialized(vcpu))
>>>> +				return -EBUSY;
>> Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
>> anyway. Actually, why would you return an error in this case?
>>
> While this is a unexpected operation from user space and it's already 
> initialized and working, so I think it should return an error to user 
> and tell user that it's already initialized and working (this should 
> mean "busy" ?).

But in this case, you're returning an error if it is *not* initialized.
I understand that in that case you cannot return an interrupt number (-1
would be weird), but returning -EBUSY feels even more weird.

I'd settle for -ENOXIO, or something similar. Anyone having a better idea?

Thanks,

	M.
Andrew Jones Dec. 15, 2015, 5:50 p.m. UTC | #4
On Tue, Dec 15, 2015 at 03:59:31PM +0000, Marc Zyngier wrote:
> On 15/12/15 15:50, Shannon Zhao wrote:
> > 
> > 
> > On 2015/12/15 23:33, Marc Zyngier wrote:
> >> On 15/12/15 08:49, Shannon Zhao wrote:
> >>>> From: Shannon Zhao<shannon.zhao@linaro.org>
> >>>>
> >>>> Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
> >>>> the kvm_device_ops for it.
> >>>>
> >>>> Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
> >>>> ---
> >>>>  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
> >>>>  arch/arm64/include/uapi/asm/kvm.h             |   3 +
> >>>>  include/linux/kvm_host.h                      |   1 +
> >>>>  include/uapi/linux/kvm.h                      |   2 +
> >>>>  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
> >>>>  virt/kvm/kvm_main.c                           |   4 +
> >>>>  6 files changed, 141 insertions(+)
> >>>>  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>>
> >>>> diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>> new file mode 100644
> >>>> index 0000000..5121f1f
> >>>> --- /dev/null
> >>>> +++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>> @@ -0,0 +1,16 @@
> >>>> +ARM Virtual Performance Monitor Unit (vPMU)
> >>>> +===========================================
> >>>> +
> >>>> +Device types supported:
> >>>> +  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> >>>> +
> >>>> +Instantiate one PMU instance for per VCPU through this API.
> >>>> +
> >>>> +Groups:
> >>>> +  KVM_DEV_ARM_PMU_GRP_IRQ
> >>>> +  Attributes:
> >>>> +    A value describing the interrupt number of PMU overflow interrupt. This
> >>>> +    interrupt should be a PPI.
> >>>> +
> >>>> +  Errors:
> >>>> +    -EINVAL: Value set is out of the expected range (from 16 to 31)
> >>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> >>>> index 2d4ca4b..568afa2 100644
> >>>> --- a/arch/arm64/include/uapi/asm/kvm.h
> >>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
> >>>> @@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
> >>>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
> >>>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
> >>>>
> >>>> +/* Device Control API: ARM PMU */
> >>>> +#define KVM_DEV_ARM_PMU_GRP_IRQ		0
> >>>> +
> >>>>  /* KVM_IRQ_LINE irq field index values */
> >>>>  #define KVM_ARM_IRQ_TYPE_SHIFT		24
> >>>>  #define KVM_ARM_IRQ_TYPE_MASK		0xff
> >>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>>> index c923350..608dea6 100644
> >>>> --- a/include/linux/kvm_host.h
> >>>> +++ b/include/linux/kvm_host.h
> >>>> @@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
> >>>>  extern struct kvm_device_ops kvm_xics_ops;
> >>>>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
> >>>>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
> >>>> +extern struct kvm_device_ops kvm_arm_pmu_ops;
> >>>>
> >>>>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
> >>>>
> >>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>>> index 03f3618..4ba6fdd 100644
> >>>> --- a/include/uapi/linux/kvm.h
> >>>> +++ b/include/uapi/linux/kvm.h
> >>>> @@ -1032,6 +1032,8 @@ enum kvm_device_type {
> >>>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
> >>>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
> >>>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
> >>>> +	KVM_DEV_TYPE_ARM_PMU_V3,
> >>>> +#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
> >>>>  	KVM_DEV_TYPE_MAX,
> >>>>  };
> >>>>
> >>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> >>>> index d113ee4..1965d0d 100644
> >>>> --- a/virt/kvm/arm/pmu.c
> >>>> +++ b/virt/kvm/arm/pmu.c
> >>>> @@ -19,6 +19,7 @@
> >>>>  #include <linux/kvm.h>
> >>>>  #include <linux/kvm_host.h>
> >>>>  #include <linux/perf_event.h>
> >>>> +#include <linux/uaccess.h>
> >>>>  #include <asm/kvm_emulate.h>
> >>>>  #include <kvm/arm_pmu.h>
> >>>>  #include <kvm/arm_vgic.h>
> >>>> @@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
> >>>>
> >>>>  	pmc->perf_event = event;
> >>>>  }
> >>>> +
> >>>> +static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	return vcpu->arch.pmu.irq_num != -1;
> >>>> +}
> >>>> +
> >>>> +static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
> >>>> +{
> >>>> +	int j;
> >>>> +	struct kvm_vcpu *vcpu;
> >>>> +
> >>>> +	kvm_for_each_vcpu(j, vcpu, kvm) {
> >>>> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
> >>>> +
> >>>> +		if (!is_set) {
> >>>> +			if (!kvm_arm_pmu_initialized(vcpu))
> >>>> +				return -EBUSY;
> >> Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
> >> anyway. Actually, why would you return an error in this case?
> >>
> > While this is a unexpected operation from user space and it's already 
> > initialized and working, so I think it should return an error to user 
> > and tell user that it's already initialized and working (this should 
> > mean "busy" ?).
> 
> But in this case, you're returning an error if it is *not* initialized.
> I understand that in that case you cannot return an interrupt number (-1
> would be weird), but returning -EBUSY feels even more weird.
> 
> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?

Added qemu-devel (Peter). I think for the most part QEMU just stringifies
errnos and outputs them. So, in the absence of a kernel message, having
more unique errnos does help zero-in on a problem. However EINVAL plus a
kernel message is probably even better, and it doesn't start with people
trying to understand why a particular errno (which makes about as much
sense as EINVAL, but seems to be attempting to provide something more...)
was used.

Thanks,
drew
Christoffer Dall Dec. 15, 2015, 8:47 p.m. UTC | #5
On Tue, Dec 15, 2015 at 03:59:31PM +0000, Marc Zyngier wrote:
> On 15/12/15 15:50, Shannon Zhao wrote:
> > 
> > 
> > On 2015/12/15 23:33, Marc Zyngier wrote:
> >> On 15/12/15 08:49, Shannon Zhao wrote:
> >>>> From: Shannon Zhao<shannon.zhao@linaro.org>
> >>>>
> >>>> Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
> >>>> the kvm_device_ops for it.
> >>>>
> >>>> Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
> >>>> ---
> >>>>  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
> >>>>  arch/arm64/include/uapi/asm/kvm.h             |   3 +
> >>>>  include/linux/kvm_host.h                      |   1 +
> >>>>  include/uapi/linux/kvm.h                      |   2 +
> >>>>  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
> >>>>  virt/kvm/kvm_main.c                           |   4 +
> >>>>  6 files changed, 141 insertions(+)
> >>>>  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>>
> >>>> diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>> new file mode 100644
> >>>> index 0000000..5121f1f
> >>>> --- /dev/null
> >>>> +++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
> >>>> @@ -0,0 +1,16 @@
> >>>> +ARM Virtual Performance Monitor Unit (vPMU)
> >>>> +===========================================
> >>>> +
> >>>> +Device types supported:
> >>>> +  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> >>>> +
> >>>> +Instantiate one PMU instance for per VCPU through this API.
> >>>> +
> >>>> +Groups:
> >>>> +  KVM_DEV_ARM_PMU_GRP_IRQ
> >>>> +  Attributes:
> >>>> +    A value describing the interrupt number of PMU overflow interrupt. This
> >>>> +    interrupt should be a PPI.
> >>>> +
> >>>> +  Errors:
> >>>> +    -EINVAL: Value set is out of the expected range (from 16 to 31)
> >>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> >>>> index 2d4ca4b..568afa2 100644
> >>>> --- a/arch/arm64/include/uapi/asm/kvm.h
> >>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
> >>>> @@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
> >>>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
> >>>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
> >>>>
> >>>> +/* Device Control API: ARM PMU */
> >>>> +#define KVM_DEV_ARM_PMU_GRP_IRQ		0
> >>>> +
> >>>>  /* KVM_IRQ_LINE irq field index values */
> >>>>  #define KVM_ARM_IRQ_TYPE_SHIFT		24
> >>>>  #define KVM_ARM_IRQ_TYPE_MASK		0xff
> >>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >>>> index c923350..608dea6 100644
> >>>> --- a/include/linux/kvm_host.h
> >>>> +++ b/include/linux/kvm_host.h
> >>>> @@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
> >>>>  extern struct kvm_device_ops kvm_xics_ops;
> >>>>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
> >>>>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
> >>>> +extern struct kvm_device_ops kvm_arm_pmu_ops;
> >>>>
> >>>>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
> >>>>
> >>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>>> index 03f3618..4ba6fdd 100644
> >>>> --- a/include/uapi/linux/kvm.h
> >>>> +++ b/include/uapi/linux/kvm.h
> >>>> @@ -1032,6 +1032,8 @@ enum kvm_device_type {
> >>>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
> >>>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
> >>>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
> >>>> +	KVM_DEV_TYPE_ARM_PMU_V3,
> >>>> +#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
> >>>>  	KVM_DEV_TYPE_MAX,
> >>>>  };
> >>>>
> >>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> >>>> index d113ee4..1965d0d 100644
> >>>> --- a/virt/kvm/arm/pmu.c
> >>>> +++ b/virt/kvm/arm/pmu.c
> >>>> @@ -19,6 +19,7 @@
> >>>>  #include <linux/kvm.h>
> >>>>  #include <linux/kvm_host.h>
> >>>>  #include <linux/perf_event.h>
> >>>> +#include <linux/uaccess.h>
> >>>>  #include <asm/kvm_emulate.h>
> >>>>  #include <kvm/arm_pmu.h>
> >>>>  #include <kvm/arm_vgic.h>
> >>>> @@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
> >>>>
> >>>>  	pmc->perf_event = event;
> >>>>  }
> >>>> +
> >>>> +static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	return vcpu->arch.pmu.irq_num != -1;
> >>>> +}
> >>>> +
> >>>> +static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
> >>>> +{
> >>>> +	int j;
> >>>> +	struct kvm_vcpu *vcpu;
> >>>> +
> >>>> +	kvm_for_each_vcpu(j, vcpu, kvm) {
> >>>> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
> >>>> +
> >>>> +		if (!is_set) {
> >>>> +			if (!kvm_arm_pmu_initialized(vcpu))
> >>>> +				return -EBUSY;
> >> Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
> >> anyway. Actually, why would you return an error in this case?
> >>
> > While this is a unexpected operation from user space and it's already 
> > initialized and working, so I think it should return an error to user 
> > and tell user that it's already initialized and working (this should 
> > mean "busy" ?).
> 
> But in this case, you're returning an error if it is *not* initialized.
> I understand that in that case you cannot return an interrupt number (-1
> would be weird), but returning -EBUSY feels even more weird.
> 
> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
> 
ENXIO or ENODEV would be my choice too, and add that to the
Documentation clearly describing when this error code is used.

By the way, why do you loop over all VCPUS to set the same value when
you can't do anything per VCPU anyway?  It seems to me it's either a
per-VM property (that you can store on the VM data structure) or it's a
true per-VCPU property?

-Christoffer
Shannon Zhao Dec. 16, 2015, 7:31 a.m. UTC | #6
On 2015/12/16 4:47, Christoffer Dall wrote:
> On Tue, Dec 15, 2015 at 03:59:31PM +0000, Marc Zyngier wrote:
>> > On 15/12/15 15:50, Shannon Zhao wrote:
>>> > > 
>>> > > 
>>> > > On 2015/12/15 23:33, Marc Zyngier wrote:
>>>> > >> On 15/12/15 08:49, Shannon Zhao wrote:
>>>>>> > >>>> From: Shannon Zhao<shannon.zhao@linaro.org>
>>>>>> > >>>>
>>>>>> > >>>> Add a new kvm device type KVM_DEV_TYPE_ARM_PMU_V3 for ARM PMU. Implement
>>>>>> > >>>> the kvm_device_ops for it.
>>>>>> > >>>>
>>>>>> > >>>> Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
>>>>>> > >>>> ---
>>>>>> > >>>>  Documentation/virtual/kvm/devices/arm-pmu.txt |  16 ++++
>>>>>> > >>>>  arch/arm64/include/uapi/asm/kvm.h             |   3 +
>>>>>> > >>>>  include/linux/kvm_host.h                      |   1 +
>>>>>> > >>>>  include/uapi/linux/kvm.h                      |   2 +
>>>>>> > >>>>  virt/kvm/arm/pmu.c                            | 115 ++++++++++++++++++++++++++
>>>>>> > >>>>  virt/kvm/kvm_main.c                           |   4 +
>>>>>> > >>>>  6 files changed, 141 insertions(+)
>>>>>> > >>>>  create mode 100644 Documentation/virtual/kvm/devices/arm-pmu.txt
>>>>>> > >>>>
>>>>>> > >>>> diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
>>>>>> > >>>> new file mode 100644
>>>>>> > >>>> index 0000000..5121f1f
>>>>>> > >>>> --- /dev/null
>>>>>> > >>>> +++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
>>>>>> > >>>> @@ -0,0 +1,16 @@
>>>>>> > >>>> +ARM Virtual Performance Monitor Unit (vPMU)
>>>>>> > >>>> +===========================================
>>>>>> > >>>> +
>>>>>> > >>>> +Device types supported:
>>>>>> > >>>> +  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>>>> > >>>> +
>>>>>> > >>>> +Instantiate one PMU instance for per VCPU through this API.
>>>>>> > >>>> +
>>>>>> > >>>> +Groups:
>>>>>> > >>>> +  KVM_DEV_ARM_PMU_GRP_IRQ
>>>>>> > >>>> +  Attributes:
>>>>>> > >>>> +    A value describing the interrupt number of PMU overflow interrupt. This
>>>>>> > >>>> +    interrupt should be a PPI.
>>>>>> > >>>> +
>>>>>> > >>>> +  Errors:
>>>>>> > >>>> +    -EINVAL: Value set is out of the expected range (from 16 to 31)
>>>>>> > >>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>>>>>> > >>>> index 2d4ca4b..568afa2 100644
>>>>>> > >>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>>>> > >>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>>>> > >>>> @@ -204,6 +204,9 @@ struct kvm_arch_memory_slot {
>>>>>> > >>>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
>>>>>> > >>>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
>>>>>> > >>>>
>>>>>> > >>>> +/* Device Control API: ARM PMU */
>>>>>> > >>>> +#define KVM_DEV_ARM_PMU_GRP_IRQ		0
>>>>>> > >>>> +
>>>>>> > >>>>  /* KVM_IRQ_LINE irq field index values */
>>>>>> > >>>>  #define KVM_ARM_IRQ_TYPE_SHIFT		24
>>>>>> > >>>>  #define KVM_ARM_IRQ_TYPE_MASK		0xff
>>>>>> > >>>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>>>>>> > >>>> index c923350..608dea6 100644
>>>>>> > >>>> --- a/include/linux/kvm_host.h
>>>>>> > >>>> +++ b/include/linux/kvm_host.h
>>>>>> > >>>> @@ -1161,6 +1161,7 @@ extern struct kvm_device_ops kvm_mpic_ops;
>>>>>> > >>>>  extern struct kvm_device_ops kvm_xics_ops;
>>>>>> > >>>>  extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
>>>>>> > >>>>  extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
>>>>>> > >>>> +extern struct kvm_device_ops kvm_arm_pmu_ops;
>>>>>> > >>>>
>>>>>> > >>>>  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
>>>>>> > >>>>
>>>>>> > >>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>>>> > >>>> index 03f3618..4ba6fdd 100644
>>>>>> > >>>> --- a/include/uapi/linux/kvm.h
>>>>>> > >>>> +++ b/include/uapi/linux/kvm.h
>>>>>> > >>>> @@ -1032,6 +1032,8 @@ enum kvm_device_type {
>>>>>> > >>>>  #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
>>>>>> > >>>>  	KVM_DEV_TYPE_ARM_VGIC_V3,
>>>>>> > >>>>  #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
>>>>>> > >>>> +	KVM_DEV_TYPE_ARM_PMU_V3,
>>>>>> > >>>> +#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
>>>>>> > >>>>  	KVM_DEV_TYPE_MAX,
>>>>>> > >>>>  };
>>>>>> > >>>>
>>>>>> > >>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>>>> > >>>> index d113ee4..1965d0d 100644
>>>>>> > >>>> --- a/virt/kvm/arm/pmu.c
>>>>>> > >>>> +++ b/virt/kvm/arm/pmu.c
>>>>>> > >>>> @@ -19,6 +19,7 @@
>>>>>> > >>>>  #include <linux/kvm.h>
>>>>>> > >>>>  #include <linux/kvm_host.h>
>>>>>> > >>>>  #include <linux/perf_event.h>
>>>>>> > >>>> +#include <linux/uaccess.h>
>>>>>> > >>>>  #include <asm/kvm_emulate.h>
>>>>>> > >>>>  #include <kvm/arm_pmu.h>
>>>>>> > >>>>  #include <kvm/arm_vgic.h>
>>>>>> > >>>> @@ -357,3 +358,117 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
>>>>>> > >>>>
>>>>>> > >>>>  	pmc->perf_event = event;
>>>>>> > >>>>  }
>>>>>> > >>>> +
>>>>>> > >>>> +static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
>>>>>> > >>>> +{
>>>>>> > >>>> +	return vcpu->arch.pmu.irq_num != -1;
>>>>>> > >>>> +}
>>>>>> > >>>> +
>>>>>> > >>>> +static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
>>>>>> > >>>> +{
>>>>>> > >>>> +	int j;
>>>>>> > >>>> +	struct kvm_vcpu *vcpu;
>>>>>> > >>>> +
>>>>>> > >>>> +	kvm_for_each_vcpu(j, vcpu, kvm) {
>>>>>> > >>>> +		struct kvm_pmu *pmu = &vcpu->arch.pmu;
>>>>>> > >>>> +
>>>>>> > >>>> +		if (!is_set) {
>>>>>> > >>>> +			if (!kvm_arm_pmu_initialized(vcpu))
>>>>>> > >>>> +				return -EBUSY;
>>>> > >> Returning -EBUSY is a bit odd. Maybe -EINVAL? But this seems weird
>>>> > >> anyway. Actually, why would you return an error in this case?
>>>> > >>
>>> > > While this is a unexpected operation from user space and it's already 
>>> > > initialized and working, so I think it should return an error to user 
>>> > > and tell user that it's already initialized and working (this should 
>>> > > mean "busy" ?).
>> > 
>> > But in this case, you're returning an error if it is *not* initialized.
>> > I understand that in that case you cannot return an interrupt number (-1
>> > would be weird), but returning -EBUSY feels even more weird.
>> > 
>> > I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>> > 
> ENXIO or ENODEV would be my choice too, and add that to the
> Documentation clearly describing when this error code is used.
> 
> By the way, why do you loop over all VCPUS to set the same value when
> you can't do anything per VCPU anyway?  It seems to me it's either a
> per-VM property (that you can store on the VM data structure) or it's a
> true per-VCPU property?

This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
the interrupt numbers are same for each vcpu, while for SPI they are
different, so it needs to set them separately. I planned to support both
PPI and SPI. I think I should add support for SPI at this moment and let
users (QEMU) to set these interrupts for each one.

Thanks,
Shannon Zhao Dec. 16, 2015, 8:06 a.m. UTC | #7
Hi,

On 2015/12/16 15:31, Shannon Zhao wrote:
>>>> >> > But in this case, you're returning an error if it is *not* initialized.
>>>> >> > I understand that in that case you cannot return an interrupt number (-1
>>>> >> > would be weird), but returning -EBUSY feels even more weird.
>>>> >> > 
>>>> >> > I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>> >> > 
>> > ENXIO or ENODEV would be my choice too, and add that to the
>> > Documentation clearly describing when this error code is used.
>> > 
>> > By the way, why do you loop over all VCPUS to set the same value when
>> > you can't do anything per VCPU anyway?  It seems to me it's either a
>> > per-VM property (that you can store on the VM data structure) or it's a
>> > true per-VCPU property?
> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
> the interrupt numbers are same for each vcpu, while for SPI they are
> different, so it needs to set them separately. I planned to support both
> PPI and SPI. I think I should add support for SPI at this moment and let
> users (QEMU) to set these interrupts for each one.

How about below vPMU Documentation?

ARM Virtual Performance Monitor Unit (vPMU)
===========================================

Device types supported:
  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3

Instantiate one PMU instance for per VCPU through this API.

Groups:
  KVM_DEV_ARM_PMU_GRP_IRQ
  Attributes:
    The attr field of kvm_device_attr encodes two values:
    bits:     | 63 .... 32 | 31 .... 0 |
    values:   | vcpu_index |  irq_num  |
    The irq_num describes the PMU overflow interrupt number for the
specified
    vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
VM the
    interrupt type must be same for each vcpu.

  Errors:
    -ENXIO: Getting or setting this attribute is not yet supported
    -ENODEV: Getting the PMU overflow interrupt number while it's not set
    -EBUSY: The PMU overflow interrupt is already set
    -EINVAL: Invalid vcpu_index or irq_num supplied
Marc Zyngier Dec. 16, 2015, 9:04 a.m. UTC | #8
On 16/12/15 08:06, Shannon Zhao wrote:
> Hi,
> 
> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>
>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>
>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>> Documentation clearly describing when this error code is used.
>>>>
>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>> true per-VCPU property?
>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>> the interrupt numbers are same for each vcpu, while for SPI they are
>> different, so it needs to set them separately. I planned to support both
>> PPI and SPI. I think I should add support for SPI at this moment and let
>> users (QEMU) to set these interrupts for each one.
> 
> How about below vPMU Documentation?
> 
> ARM Virtual Performance Monitor Unit (vPMU)
> ===========================================
> 
> Device types supported:
>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> 
> Instantiate one PMU instance for per VCPU through this API.
> 
> Groups:
>   KVM_DEV_ARM_PMU_GRP_IRQ
>   Attributes:
>     The attr field of kvm_device_attr encodes two values:
>     bits:     | 63 .... 32 | 31 .... 0 |
>     values:   | vcpu_index |  irq_num  |
>     The irq_num describes the PMU overflow interrupt number for the
> specified
>     vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
> VM the
>     interrupt type must be same for each vcpu.
> 
>   Errors:
>     -ENXIO: Getting or setting this attribute is not yet supported
>     -ENODEV: Getting the PMU overflow interrupt number while it's not set
>     -EBUSY: The PMU overflow interrupt is already set
>     -EINVAL: Invalid vcpu_index or irq_num supplied
> 
> 

Let's add at least one comment that forbids two vcpus from getting the
same SPI. This is too common a mistake that we see in actual SoCs, and I
don't want to see it replicated in VMs...

Thanks,

	M.
Shannon Zhao Dec. 16, 2015, 9:29 a.m. UTC | #9
On 2015/12/16 17:04, Marc Zyngier wrote:
> On 16/12/15 08:06, Shannon Zhao wrote:
>> > Hi,
>> > 
>> > On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>> >>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>> >>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>> >>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>> >>>>>>>>
>>>>> >>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>> >>>> Documentation clearly describing when this error code is used.
>>>>> >>>>
>>>>> >>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>> >>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>> >>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>> >>>> true per-VCPU property?
>>> >> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>> >> the interrupt numbers are same for each vcpu, while for SPI they are
>>> >> different, so it needs to set them separately. I planned to support both
>>> >> PPI and SPI. I think I should add support for SPI at this moment and let
>>> >> users (QEMU) to set these interrupts for each one.
>> > 
>> > How about below vPMU Documentation?
>> > 
>> > ARM Virtual Performance Monitor Unit (vPMU)
>> > ===========================================
>> > 
>> > Device types supported:
>> >   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>> > 
>> > Instantiate one PMU instance for per VCPU through this API.
>> > 
>> > Groups:
>> >   KVM_DEV_ARM_PMU_GRP_IRQ
>> >   Attributes:
>> >     The attr field of kvm_device_attr encodes two values:
>> >     bits:     | 63 .... 32 | 31 .... 0 |
>> >     values:   | vcpu_index |  irq_num  |
>> >     The irq_num describes the PMU overflow interrupt number for the
>> > specified
>> >     vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
>> > VM the
>> >     interrupt type must be same for each vcpu.
>> > 
>> >   Errors:
>> >     -ENXIO: Getting or setting this attribute is not yet supported
>> >     -ENODEV: Getting the PMU overflow interrupt number while it's not set
>> >     -EBUSY: The PMU overflow interrupt is already set
>> >     -EINVAL: Invalid vcpu_index or irq_num supplied
>> > 
>> > 
> Let's add at least one comment that forbids two vcpus from getting the
> same SPI. This is too common a mistake that we see in actual SoCs, and I
> don't want to see it replicated in VMs...

Ok, will add.

Thanks,
Christoffer Dall Dec. 16, 2015, 8:33 p.m. UTC | #10
On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
> Hi,
> 
> On 2015/12/16 15:31, Shannon Zhao wrote:
> >>>> >> > But in this case, you're returning an error if it is *not* initialized.
> >>>> >> > I understand that in that case you cannot return an interrupt number (-1
> >>>> >> > would be weird), but returning -EBUSY feels even more weird.
> >>>> >> > 
> >>>> >> > I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
> >>>> >> > 
> >> > ENXIO or ENODEV would be my choice too, and add that to the
> >> > Documentation clearly describing when this error code is used.
> >> > 
> >> > By the way, why do you loop over all VCPUS to set the same value when
> >> > you can't do anything per VCPU anyway?  It seems to me it's either a
> >> > per-VM property (that you can store on the VM data structure) or it's a
> >> > true per-VCPU property?
> > This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
> > the interrupt numbers are same for each vcpu, while for SPI they are
> > different, so it needs to set them separately. I planned to support both
> > PPI and SPI. I think I should add support for SPI at this moment and let
> > users (QEMU) to set these interrupts for each one.
> 
> How about below vPMU Documentation?
> 
> ARM Virtual Performance Monitor Unit (vPMU)
> ===========================================
> 
> Device types supported:
>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> 
> Instantiate one PMU instance for per VCPU through this API.
> 
> Groups:
>   KVM_DEV_ARM_PMU_GRP_IRQ
>   Attributes:
>     The attr field of kvm_device_attr encodes two values:
>     bits:     | 63 .... 32 | 31 .... 0 |
>     values:   | vcpu_index |  irq_num  |
>     The irq_num describes the PMU overflow interrupt number for the
> specified
>     vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
> VM the
>     interrupt type must be same for each vcpu.

some formatting snafus that I expect come from pasting the text in an
e-mail client.

> 
>   Errors:
>     -ENXIO: Getting or setting this attribute is not yet supported

'not yet supported' as in something we'll implement later, or as in you
need to call this other function before you can access this state?

>     -ENODEV: Getting the PMU overflow interrupt number while it's not set
>     -EBUSY: The PMU overflow interrupt is already set
>     -EINVAL: Invalid vcpu_index or irq_num supplied
> 
> 
Otherwise looks good.

Thanks,
-Christoffer
Shannon Zhao Dec. 17, 2015, 7:22 a.m. UTC | #11
On 2015/12/17 4:33, Christoffer Dall wrote:
> On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
>> Hi,
>>
>> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>>
>>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>>
>>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>> Documentation clearly describing when this error code is used.
>>>>>
>>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>> true per-VCPU property?
>>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>> the interrupt numbers are same for each vcpu, while for SPI they are
>>> different, so it needs to set them separately. I planned to support both
>>> PPI and SPI. I think I should add support for SPI at this moment and let
>>> users (QEMU) to set these interrupts for each one.
>>
>> How about below vPMU Documentation?
>>
>> ARM Virtual Performance Monitor Unit (vPMU)
>> ===========================================
>>
>> Device types supported:
>>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>
>> Instantiate one PMU instance for per VCPU through this API.
>>
>> Groups:
>>   KVM_DEV_ARM_PMU_GRP_IRQ
>>   Attributes:
>>     The attr field of kvm_device_attr encodes two values:
>>     bits:     | 63 .... 32 | 31 .... 0 |
>>     values:   | vcpu_index |  irq_num  |
BTW, I change this Attribute to below format and pass vcpu_index through
this Attribute while pass irq_num through kvm_device_attr.addr.
Is it fine?

    bits:     | 63 .... 32 | 31 ....  0 |
    values:   |  reserved  | vcpu_index |

>>     The irq_num describes the PMU overflow interrupt number for the
>> specified
>>     vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
>> VM the
>>     interrupt type must be same for each vcpu.
> 
> some formatting snafus that I expect come from pasting the text in an
> e-mail client.
> 
>>
>>   Errors:
>>     -ENXIO: Getting or setting this attribute is not yet supported
> 
> 'not yet supported' as in something we'll implement later, or as in you
> need to call this other function before you can access this state?
> 
Since only when the group is not KVM_DEV_ARM_PMU_GRP_IRQ, it will return
-ENXIO. So what about this?

"-ENXIO: Unsupported attribute group"

Thanks,
Marc Zyngier Dec. 17, 2015, 8:33 a.m. UTC | #12
On Thu, 17 Dec 2015 15:22:50 +0800
Shannon Zhao <zhaoshenglong@huawei.com> wrote:

> 
> 
> On 2015/12/17 4:33, Christoffer Dall wrote:
> > On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
> >> Hi,
> >>
> >> On 2015/12/16 15:31, Shannon Zhao wrote:
> >>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
> >>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
> >>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
> >>>>>>>>>
> >>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
> >>>>>>>>>
> >>>>> ENXIO or ENODEV would be my choice too, and add that to the
> >>>>> Documentation clearly describing when this error code is used.
> >>>>>
> >>>>> By the way, why do you loop over all VCPUS to set the same value when
> >>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
> >>>>> per-VM property (that you can store on the VM data structure) or it's a
> >>>>> true per-VCPU property?
> >>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
> >>> the interrupt numbers are same for each vcpu, while for SPI they are
> >>> different, so it needs to set them separately. I planned to support both
> >>> PPI and SPI. I think I should add support for SPI at this moment and let
> >>> users (QEMU) to set these interrupts for each one.
> >>
> >> How about below vPMU Documentation?
> >>
> >> ARM Virtual Performance Monitor Unit (vPMU)
> >> ===========================================
> >>
> >> Device types supported:
> >>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> >>
> >> Instantiate one PMU instance for per VCPU through this API.
> >>
> >> Groups:
> >>   KVM_DEV_ARM_PMU_GRP_IRQ
> >>   Attributes:
> >>     The attr field of kvm_device_attr encodes two values:
> >>     bits:     | 63 .... 32 | 31 .... 0 |
> >>     values:   | vcpu_index |  irq_num  |
> BTW, I change this Attribute to below format and pass vcpu_index through
> this Attribute while pass irq_num through kvm_device_attr.addr.
> Is it fine?
> 
>     bits:     | 63 .... 32 | 31 ....  0 |
>     values:   |  reserved  | vcpu_index |

Using the .addr field for something that is clearly not an address is
rather odd. Is there any prior usage of that field for something that
is not an address?

Thanks,

	M.
Shannon Zhao Dec. 17, 2015, 8:41 a.m. UTC | #13
On 2015/12/17 16:33, Marc Zyngier wrote:
> On Thu, 17 Dec 2015 15:22:50 +0800
> Shannon Zhao <zhaoshenglong@huawei.com> wrote:
> 
>> > 
>> > 
>> > On 2015/12/17 4:33, Christoffer Dall wrote:
>>> > > On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
>>>> > >> Hi,
>>>> > >>
>>>> > >> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>>>> > >>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>>>> > >>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>>>> > >>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>>>> > >>>>>>>>>
>>>>>>>>>>> > >>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>>>> > >>>>>>>>>
>>>>>>> > >>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>>>> > >>>>> Documentation clearly describing when this error code is used.
>>>>>>> > >>>>>
>>>>>>> > >>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>>>> > >>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>>>> > >>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>>>> > >>>>> true per-VCPU property?
>>>>> > >>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>>>> > >>> the interrupt numbers are same for each vcpu, while for SPI they are
>>>>> > >>> different, so it needs to set them separately. I planned to support both
>>>>> > >>> PPI and SPI. I think I should add support for SPI at this moment and let
>>>>> > >>> users (QEMU) to set these interrupts for each one.
>>>> > >>
>>>> > >> How about below vPMU Documentation?
>>>> > >>
>>>> > >> ARM Virtual Performance Monitor Unit (vPMU)
>>>> > >> ===========================================
>>>> > >>
>>>> > >> Device types supported:
>>>> > >>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>> > >>
>>>> > >> Instantiate one PMU instance for per VCPU through this API.
>>>> > >>
>>>> > >> Groups:
>>>> > >>   KVM_DEV_ARM_PMU_GRP_IRQ
>>>> > >>   Attributes:
>>>> > >>     The attr field of kvm_device_attr encodes two values:
>>>> > >>     bits:     | 63 .... 32 | 31 .... 0 |
>>>> > >>     values:   | vcpu_index |  irq_num  |
>> > BTW, I change this Attribute to below format and pass vcpu_index through
>> > this Attribute while pass irq_num through kvm_device_attr.addr.
>> > Is it fine?
>> > 
>> >     bits:     | 63 .... 32 | 31 ....  0 |
>> >     values:   |  reserved  | vcpu_index |
> Using the .addr field for something that is clearly not an address is
> rather odd. Is there any prior usage of that field for something that
> is not an address?

I see this usage in vgic_attr_regs_access(). But if you prefer previous
one, I'll use that.
Marc Zyngier Dec. 17, 2015, 9:38 a.m. UTC | #14
On 17/12/15 08:41, Shannon Zhao wrote:
> 
> 
> On 2015/12/17 16:33, Marc Zyngier wrote:
>> On Thu, 17 Dec 2015 15:22:50 +0800
>> Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>
>>>>
>>>>
>>>> On 2015/12/17 4:33, Christoffer Dall wrote:
>>>>>> On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>>>>>>>>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>>>>>>>>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>>>>>>>>>>> Documentation clearly describing when this error code is used.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>>>>>>>>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>>>>>>>>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>>>>>>>>>>> true per-VCPU property?
>>>>>>>>>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>>>>>>>>> the interrupt numbers are same for each vcpu, while for SPI they are
>>>>>>>>>> different, so it needs to set them separately. I planned to support both
>>>>>>>>>> PPI and SPI. I think I should add support for SPI at this moment and let
>>>>>>>>>> users (QEMU) to set these interrupts for each one.
>>>>>>>>
>>>>>>>> How about below vPMU Documentation?
>>>>>>>>
>>>>>>>> ARM Virtual Performance Monitor Unit (vPMU)
>>>>>>>> ===========================================
>>>>>>>>
>>>>>>>> Device types supported:
>>>>>>>>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>>>>>>
>>>>>>>> Instantiate one PMU instance for per VCPU through this API.
>>>>>>>>
>>>>>>>> Groups:
>>>>>>>>   KVM_DEV_ARM_PMU_GRP_IRQ
>>>>>>>>   Attributes:
>>>>>>>>     The attr field of kvm_device_attr encodes two values:
>>>>>>>>     bits:     | 63 .... 32 | 31 .... 0 |
>>>>>>>>     values:   | vcpu_index |  irq_num  |
>>>> BTW, I change this Attribute to below format and pass vcpu_index through
>>>> this Attribute while pass irq_num through kvm_device_attr.addr.
>>>> Is it fine?
>>>>
>>>>     bits:     | 63 .... 32 | 31 ....  0 |
>>>>     values:   |  reserved  | vcpu_index |
>> Using the .addr field for something that is clearly not an address is
>> rather odd. Is there any prior usage of that field for something that
>> is not an address?
> 
> I see this usage in vgic_attr_regs_access(). But if you prefer previous
> one, I'll use that.

Ah, you're using the .addr field to point to a userspace value, not to
carry the value itself! That'd be fine by me (even if I still prefer the
original one), but I'd like others to chime in (I'm quite bad at
defining userspace stuff...).

Thanks,

	M.
Shannon Zhao Dec. 17, 2015, 10:10 a.m. UTC | #15
On 2015/12/17 17:38, Marc Zyngier wrote:
> On 17/12/15 08:41, Shannon Zhao wrote:
>> > 
>> > 
>> > On 2015/12/17 16:33, Marc Zyngier wrote:
>>> >> On Thu, 17 Dec 2015 15:22:50 +0800
>>> >> Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>> >>
>>>>> >>>>
>>>>> >>>>
>>>>> >>>> On 2015/12/17 4:33, Christoffer Dall wrote:
>>>>>>> >>>>>> On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
>>>>>>>>> >>>>>>>> Hi,
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> Documentation clearly describing when this error code is used.
>>>>>>>>>>>>>>> >>>>>>>>>>>>>>
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>>>>>>>>>>>> >>>>>>>>>>>>>> true per-VCPU property?
>>>>>>>>>>> >>>>>>>>>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>>>>>>>>>> >>>>>>>>>> the interrupt numbers are same for each vcpu, while for SPI they are
>>>>>>>>>>> >>>>>>>>>> different, so it needs to set them separately. I planned to support both
>>>>>>>>>>> >>>>>>>>>> PPI and SPI. I think I should add support for SPI at this moment and let
>>>>>>>>>>> >>>>>>>>>> users (QEMU) to set these interrupts for each one.
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> How about below vPMU Documentation?
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> ARM Virtual Performance Monitor Unit (vPMU)
>>>>>>>>> >>>>>>>> ===========================================
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> Device types supported:
>>>>>>>>> >>>>>>>>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> Instantiate one PMU instance for per VCPU through this API.
>>>>>>>>> >>>>>>>>
>>>>>>>>> >>>>>>>> Groups:
>>>>>>>>> >>>>>>>>   KVM_DEV_ARM_PMU_GRP_IRQ
>>>>>>>>> >>>>>>>>   Attributes:
>>>>>>>>> >>>>>>>>     The attr field of kvm_device_attr encodes two values:
>>>>>>>>> >>>>>>>>     bits:     | 63 .... 32 | 31 .... 0 |
>>>>>>>>> >>>>>>>>     values:   | vcpu_index |  irq_num  |
>>>>> >>>> BTW, I change this Attribute to below format and pass vcpu_index through
>>>>> >>>> this Attribute while pass irq_num through kvm_device_attr.addr.
>>>>> >>>> Is it fine?
>>>>> >>>>
>>>>> >>>>     bits:     | 63 .... 32 | 31 ....  0 |
>>>>> >>>>     values:   |  reserved  | vcpu_index |
>>> >> Using the .addr field for something that is clearly not an address is
>>> >> rather odd. Is there any prior usage of that field for something that
>>> >> is not an address?
>> > 
>> > I see this usage in vgic_attr_regs_access(). But if you prefer previous
>> > one, I'll use that.
> Ah, you're using the .addr field to point to a userspace value, not to
> carry the value itself! That'd be fine by me (even if I still prefer the
> original one), but I'd like others to chime in (I'm quite bad at
> defining userspace stuff...).

Another reason I think is that it needs to pass the irq_num to user
space when calling get_attr. It could be passed via kvm_device_attr.addr
while can't be passed via kvm_device_attr.attr within current framework.

Thanks,
Marc Zyngier Dec. 17, 2015, 10:38 a.m. UTC | #16
On 17/12/15 10:10, Shannon Zhao wrote:
> 
> 
> On 2015/12/17 17:38, Marc Zyngier wrote:
>> On 17/12/15 08:41, Shannon Zhao wrote:
>>>>
>>>>
>>>> On 2015/12/17 16:33, Marc Zyngier wrote:
>>>>>> On Thu, 17 Dec 2015 15:22:50 +0800
>>>>>> Shannon Zhao <zhaoshenglong@huawei.com> wrote:
>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 2015/12/17 4:33, Christoffer Dall wrote:
>>>>>>>>>>>>>> On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>> Hi,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On 2015/12/16 15:31, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ENXIO or ENODEV would be my choice too, and add that to the
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Documentation clearly describing when this error code is used.
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> By the way, why do you loop over all VCPUS to set the same value when
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> per-VM property (that you can store on the VM data structure) or it's a
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> true per-VCPU property?
>>>>>>>>>>>>>>>>>>>>>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
>>>>>>>>>>>>>>>>>>>>>> the interrupt numbers are same for each vcpu, while for SPI they are
>>>>>>>>>>>>>>>>>>>>>> different, so it needs to set them separately. I planned to support both
>>>>>>>>>>>>>>>>>>>>>> PPI and SPI. I think I should add support for SPI at this moment and let
>>>>>>>>>>>>>>>>>>>>>> users (QEMU) to set these interrupts for each one.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> How about below vPMU Documentation?
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> ARM Virtual Performance Monitor Unit (vPMU)
>>>>>>>>>>>>>>>>>> ===========================================
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Device types supported:
>>>>>>>>>>>>>>>>>>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Instantiate one PMU instance for per VCPU through this API.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Groups:
>>>>>>>>>>>>>>>>>>   KVM_DEV_ARM_PMU_GRP_IRQ
>>>>>>>>>>>>>>>>>>   Attributes:
>>>>>>>>>>>>>>>>>>     The attr field of kvm_device_attr encodes two values:
>>>>>>>>>>>>>>>>>>     bits:     | 63 .... 32 | 31 .... 0 |
>>>>>>>>>>>>>>>>>>     values:   | vcpu_index |  irq_num  |
>>>>>>>>>> BTW, I change this Attribute to below format and pass vcpu_index through
>>>>>>>>>> this Attribute while pass irq_num through kvm_device_attr.addr.
>>>>>>>>>> Is it fine?
>>>>>>>>>>
>>>>>>>>>>     bits:     | 63 .... 32 | 31 ....  0 |
>>>>>>>>>>     values:   |  reserved  | vcpu_index |
>>>>>> Using the .addr field for something that is clearly not an address is
>>>>>> rather odd. Is there any prior usage of that field for something that
>>>>>> is not an address?
>>>>
>>>> I see this usage in vgic_attr_regs_access(). But if you prefer previous
>>>> one, I'll use that.
>> Ah, you're using the .addr field to point to a userspace value, not to
>> carry the value itself! That'd be fine by me (even if I still prefer the
>> original one), but I'd like others to chime in (I'm quite bad at
>> defining userspace stuff...).
> 
> Another reason I think is that it needs to pass the irq_num to user
> space when calling get_attr. It could be passed via kvm_device_attr.addr
> while can't be passed via kvm_device_attr.attr within current framework.

That's a very good reason. Go for it.

Thanks,

	M.
Christoffer Dall Dec. 18, 2015, 10 a.m. UTC | #17
On Thu, Dec 17, 2015 at 03:22:50PM +0800, Shannon Zhao wrote:
> 
> 
> On 2015/12/17 4:33, Christoffer Dall wrote:
> > On Wed, Dec 16, 2015 at 04:06:49PM +0800, Shannon Zhao wrote:
> >> Hi,
> >>
> >> On 2015/12/16 15:31, Shannon Zhao wrote:
> >>>>>>>>> But in this case, you're returning an error if it is *not* initialized.
> >>>>>>>>> I understand that in that case you cannot return an interrupt number (-1
> >>>>>>>>> would be weird), but returning -EBUSY feels even more weird.
> >>>>>>>>>
> >>>>>>>>> I'd settle for -ENOXIO, or something similar. Anyone having a better idea?
> >>>>>>>>>
> >>>>> ENXIO or ENODEV would be my choice too, and add that to the
> >>>>> Documentation clearly describing when this error code is used.
> >>>>>
> >>>>> By the way, why do you loop over all VCPUS to set the same value when
> >>>>> you can't do anything per VCPU anyway?  It seems to me it's either a
> >>>>> per-VM property (that you can store on the VM data structure) or it's a
> >>>>> true per-VCPU property?
> >>> This is a per-VCPU property. PMU interrupt could be PPI or SPI. For PPI
> >>> the interrupt numbers are same for each vcpu, while for SPI they are
> >>> different, so it needs to set them separately. I planned to support both
> >>> PPI and SPI. I think I should add support for SPI at this moment and let
> >>> users (QEMU) to set these interrupts for each one.
> >>
> >> How about below vPMU Documentation?
> >>
> >> ARM Virtual Performance Monitor Unit (vPMU)
> >> ===========================================
> >>
> >> Device types supported:
> >>   KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
> >>
> >> Instantiate one PMU instance for per VCPU through this API.
> >>
> >> Groups:
> >>   KVM_DEV_ARM_PMU_GRP_IRQ
> >>   Attributes:
> >>     The attr field of kvm_device_attr encodes two values:
> >>     bits:     | 63 .... 32 | 31 .... 0 |
> >>     values:   | vcpu_index |  irq_num  |
> BTW, I change this Attribute to below format and pass vcpu_index through
> this Attribute while pass irq_num through kvm_device_attr.addr.
> Is it fine?
> 
>     bits:     | 63 .... 32 | 31 ....  0 |
>     values:   |  reserved  | vcpu_index |
> 
> >>     The irq_num describes the PMU overflow interrupt number for the
> >> specified
> >>     vcpu_index vcpu. This interrupt could be a PPI or SPI, but for one
> >> VM the
> >>     interrupt type must be same for each vcpu.
> > 
> > some formatting snafus that I expect come from pasting the text in an
> > e-mail client.
> > 
> >>
> >>   Errors:
> >>     -ENXIO: Getting or setting this attribute is not yet supported
> > 
> > 'not yet supported' as in something we'll implement later, or as in you
> > need to call this other function before you can access this state?
> > 
> Since only when the group is not KVM_DEV_ARM_PMU_GRP_IRQ, it will return
> -ENXIO. So what about this?
> 
> "-ENXIO: Unsupported attribute group"
> 
better,

-Christoffer
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/devices/arm-pmu.txt b/Documentation/virtual/kvm/devices/arm-pmu.txt
new file mode 100644
index 0000000..5121f1f
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-pmu.txt
@@ -0,0 +1,16 @@ 
+ARM Virtual Performance Monitor Unit (vPMU)
+===========================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_PMU_V3         ARM Performance Monitor Unit v3
+
+Instantiate one PMU instance for per VCPU through this API.
+
+Groups:
+  KVM_DEV_ARM_PMU_GRP_IRQ
+  Attributes:
+    A value describing the interrupt number of PMU overflow interrupt. This
+    interrupt should be a PPI.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range (from 16 to 31)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 2d4ca4b..568afa2 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -204,6 +204,9 @@  struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CTRL	4
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT	0
 
+/* Device Control API: ARM PMU */
+#define KVM_DEV_ARM_PMU_GRP_IRQ		0
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c923350..608dea6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1161,6 +1161,7 @@  extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
+extern struct kvm_device_ops kvm_arm_pmu_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 03f3618..4ba6fdd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1032,6 +1032,8 @@  enum kvm_device_type {
 #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
 	KVM_DEV_TYPE_ARM_VGIC_V3,
 #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
+	KVM_DEV_TYPE_ARM_PMU_V3,
+#define	KVM_DEV_TYPE_ARM_PMU_V3		KVM_DEV_TYPE_ARM_PMU_V3
 	KVM_DEV_TYPE_MAX,
 };
 
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index d113ee4..1965d0d 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -19,6 +19,7 @@ 
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
+#include <linux/uaccess.h>
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
@@ -357,3 +358,117 @@  void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 
 	pmc->perf_event = event;
 }
+
+static inline bool kvm_arm_pmu_initialized(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pmu.irq_num != -1;
+}
+
+static int kvm_arm_pmu_irq_access(struct kvm *kvm, int *irq, bool is_set)
+{
+	int j;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(j, vcpu, kvm) {
+		struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+		if (!is_set) {
+			if (!kvm_arm_pmu_initialized(vcpu))
+				return -EBUSY;
+
+			*irq = pmu->irq_num;
+			break;
+		}
+
+		if (kvm_arm_pmu_initialized(vcpu))
+			return -EBUSY;
+
+		kvm_debug("Set kvm ARM PMU irq: %d\n", *irq);
+		pmu->irq_num = *irq;
+	}
+
+	return 0;
+}
+
+static int kvm_arm_pmu_create(struct kvm_device *dev, u32 type)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+	struct kvm *kvm = dev->kvm;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+		memset(pmu, 0, sizeof(*pmu));
+		kvm_pmu_vcpu_reset(vcpu);
+		pmu->irq_num = -1;
+	}
+
+	return 0;
+}
+
+static void kvm_arm_pmu_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int kvm_arm_pmu_set_attr(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_PMU_GRP_IRQ: {
+		int __user *uaddr = (int __user *)(long)attr->addr;
+		int reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		if (reg < VGIC_NR_SGIS || reg >= VGIC_NR_PRIVATE_IRQS)
+			return -EINVAL;
+
+		return kvm_arm_pmu_irq_access(dev->kvm, &reg, true);
+	}
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_arm_pmu_get_attr(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_PMU_GRP_IRQ: {
+		int __user *uaddr = (int __user *)(long)attr->addr;
+		int reg = -1;
+
+		ret = kvm_arm_pmu_irq_access(dev->kvm, &reg, false);
+		if (ret)
+			return ret;
+		return put_user(reg, uaddr);
+	}
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_arm_pmu_has_attr(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_PMU_GRP_IRQ:
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+struct kvm_device_ops kvm_arm_pmu_ops = {
+	.name = "kvm-arm-pmu",
+	.create = kvm_arm_pmu_create,
+	.destroy = kvm_arm_pmu_destroy,
+	.set_attr = kvm_arm_pmu_set_attr,
+	.get_attr = kvm_arm_pmu_get_attr,
+	.has_attr = kvm_arm_pmu_has_attr,
+};
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 484079e..81a42cc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2647,6 +2647,10 @@  static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
+
+#ifdef CONFIG_KVM_ARM_PMU
+	[KVM_DEV_TYPE_ARM_PMU_V3]	= &kvm_arm_pmu_ops,
+#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)