diff mbox series

[v2,1/2] KVM: arm64: Add PMU event filtering infrastructure

Message ID 20200309124837.19908-2-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Filtering PMU events | expand

Commit Message

Marc Zyngier March 9, 2020, 12:48 p.m. UTC
It can be desirable to expose a PMU to a guest, and yet not want the
guest to be able to count some of the implemented events (because this
would give information on shared resources, for example.

For this, let's extend the PMUv3 device API, and offer a way to setup a
bitmap of the allowed events (the default being no bitmap, and thus no
filtering).

Userspace can thus allow/deny ranges of event. The default policy
depends on the "polarity" of the first filter setup (default deny if the
filter allows events, and default allow if the filter denies events).
This allows to setup exactly what is allowed for a given guest.

Note that although the ioctl is per-vcpu, the map of allowed events is
global to the VM (it can be setup from any vcpu until the vcpu PMU is
initialized).

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h |  6 +++
 arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
 virt/kvm/arm/arm.c                |  2 +
 virt/kvm/arm/pmu.c                | 84 +++++++++++++++++++++++++------
 4 files changed, 92 insertions(+), 16 deletions(-)

Comments

Eric Auger March 9, 2020, 6:05 p.m. UTC | #1
Hi Marc,

On 3/9/20 1:48 PM, Marc Zyngier wrote:
> It can be desirable to expose a PMU to a guest, and yet not want the
> guest to be able to count some of the implemented events (because this
> would give information on shared resources, for example.
> 
> For this, let's extend the PMUv3 device API, and offer a way to setup a
> bitmap of the allowed events (the default being no bitmap, and thus no
> filtering).
> 
> Userspace can thus allow/deny ranges of event. The default policy
> depends on the "polarity" of the first filter setup (default deny if the
> filter allows events, and default allow if the filter denies events).
> This allows to setup exactly what is allowed for a given guest.
> 
> Note that although the ioctl is per-vcpu, the map of allowed events is
> global to the VM (it can be setup from any vcpu until the vcpu PMU is
> initialized).
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 +++
>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>  virt/kvm/arm/arm.c                |  2 +
>  virt/kvm/arm/pmu.c                | 84 +++++++++++++++++++++++++------
>  4 files changed, 92 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 57fd46acd058..8e63c618688d 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -91,6 +91,12 @@ struct kvm_arch {
>  	 * supported.
>  	 */
>  	bool return_nisv_io_abort_to_user;
> +
> +	/*
> +	 * VM-wide PMU filter, implemented as a bitmap and big enough
> +	 * for up to 65536 events
> +	 */
> +	unsigned long *pmu_filter;
>  };
>  
>  #define KVM_NR_MEM_OBJS     40
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index ba85bb23f060..7b1511d6ce44 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>  struct kvm_arch_memory_slot {
>  };
>  
> +/*
> + * PMU filter structure. Describe a range of events with a particular
> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
> + */
> +struct kvm_pmu_event_filter {
> +	__u16	base_event;
> +	__u16	nevents;
> +
> +#define KVM_PMU_EVENT_ALLOW	0
> +#define KVM_PMU_EVENT_DENY	1
> +
> +	__u8	action;
> +	__u8	pad[3];
> +};
> +
>  /* for KVM_GET/SET_VCPU_EVENTS */
>  struct kvm_vcpu_events {
>  	struct {
> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>  #define KVM_ARM_VCPU_PMU_V3_CTRL	0
>  #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
>  #define   KVM_ARM_VCPU_PMU_V3_INIT	1
> +#define   KVM_ARM_VCPU_PMU_V3_FILTER	2
>  #define KVM_ARM_VCPU_TIMER_CTRL		1
>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index eda7b624eab8..8d849ac88a44 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  	free_percpu(kvm->arch.last_vcpu_ran);
>  	kvm->arch.last_vcpu_ran = NULL;
>  
> +	bitmap_free(kvm->arch.pmu_filter);
> +
>  	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>  		if (kvm->vcpus[i]) {
>  			kvm_vcpu_destroy(kvm->vcpus[i]);
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index f0d0312c0a55..9f0fd0224d5b 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>  
>  	kvm_pmu_stop_counter(vcpu, pmc);
>  	eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
> +	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> +		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
nit:
	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
	else
		eventsel = data & ARMV8_PMU_EVTYPE_EVENT;

>  
>  	/* Software increment event does't need to be backed by a perf event */
nit: while wer are at it fix the does't typo
> -	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
> -	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
> +	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
> +		return;
> +
> +	/*
> +	 * If we have a filter in place and that the event isn't allowed, do
> +	 * not install a perf event either.
> +	 */
> +	if (vcpu->kvm->arch.pmu_filter &&
> +	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>  		return;
>  
>  	memset(&attr, 0, sizeof(struct perf_event_attr));
> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>  	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>  	attr.exclude_hv = 1; /* Don't count EL2 events */
>  	attr.exclude_host = 1; /* Don't count host events */
> -	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
> -		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
> +	attr.config = eventsel;
So in that case the guest counter will not increment but the guest does
not know the counter is not implemented. Can't this lead to bad user
experience. Shouldn't this disablement be reflected in PMCEID0/1 regs?
>  
>  	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>  
> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
>  
>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>  {
> -	if (!kvm_arm_support_pmu_v3())
> -		return -ENODEV;
> -
> -	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
> -		return -ENXIO;
> -
> -	if (vcpu->arch.pmu.created)
> -		return -EBUSY;
> -
>  	if (irqchip_in_kernel(vcpu->kvm)) {
>  		int ret;
>  
> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
>  	return true;
>  }
>  
> +#define NR_EVENTS	(ARMV8_PMU_EVTYPE_EVENT + 1)
> +
>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  {
> +	if (!kvm_arm_support_pmu_v3())
> +		return -ENODEV;
> +
> +	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
> +		return -ENODEV;
I see you changed -ENXIO into -ENODEV. wanted?
> +
> +	if (vcpu->arch.pmu.created)
> +		return -EBUSY;
> +
>  	switch (attr->attr) {
>  	case KVM_ARM_VCPU_PMU_V3_IRQ: {
>  		int __user *uaddr = (int __user *)(long)attr->addr;
> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  		if (!irqchip_in_kernel(vcpu->kvm))
>  			return -EINVAL;
>  
> -		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
> -			return -ENODEV;
> -
>  		if (get_user(irq, uaddr))
>  			return -EFAULT;
>  
> @@ -824,6 +831,50 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  		vcpu->arch.pmu.irq_num = irq;
>  		return 0;
>  	}
> +	case KVM_ARM_VCPU_PMU_V3_FILTER: {
> +		struct kvm_pmu_event_filter __user *uaddr;
> +		struct kvm_pmu_event_filter filter;
> +
> +		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
> +
> +		if (copy_from_user(&filter, uaddr, sizeof(filter)))
> +			return -EFAULT;
> +
> +		if (((u32)filter.base_event + filter.nevents) > NR_EVENTS ||
isnt't it >= ?
> +		    (filter.action != KVM_PMU_EVENT_ALLOW &&
> +		     filter.action != KVM_PMU_EVENT_DENY))
> +			return -EINVAL;
-EINVAL is not documented in the API doc.
> +
> +		mutex_lock(&vcpu->kvm->lock);
> +
> +		if (!vcpu->kvm->arch.pmu_filter) {
> +			vcpu->kvm->arch.pmu_filter = bitmap_alloc(NR_EVENTS, GFP_KERNEL);
> +			if (!vcpu->kvm->arch.pmu_filter) {
> +				mutex_unlock(&vcpu->kvm->lock);
> +				return -ENOMEM;
> +			}
> +
> +			/*
> +			 * The default depends on the first applied filter.
> +			 * If it allows events, the default is to deny.
> +			 * Conversely, if the first filter denies a set of
> +			 * events, the default is to allow.
> +			 */
> +			if (filter.action == KVM_PMU_EVENT_ALLOW)
> +				bitmap_zero(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
> +			else
> +				bitmap_fill(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
> +		}
> +
> +		if (filter.action == KVM_PMU_EVENT_ALLOW)
> +			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
> +		else
> +			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
> +
> +		mutex_unlock(&vcpu->kvm->lock);
> +
> +		return 0;
> +	}
>  	case KVM_ARM_VCPU_PMU_V3_INIT:
>  		return kvm_arm_pmu_v3_init(vcpu);
>  	}
> @@ -860,6 +911,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  	switch (attr->attr) {
>  	case KVM_ARM_VCPU_PMU_V3_IRQ:
not related to this patch but shouldn't we advertise this only with
in-kernel irqchip?
>  	case KVM_ARM_VCPU_PMU_V3_INIT:
> +	case KVM_ARM_VCPU_PMU_V3_FILTER:
>  		if (kvm_arm_support_pmu_v3() &&
>  		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>  			return 0;
> 
Thanks

Eric
Marc Zyngier March 10, 2020, 11:03 a.m. UTC | #2
Hi Eric,

On 2020-03-09 18:05, Auger Eric wrote:
> Hi Marc,
> 
> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>> It can be desirable to expose a PMU to a guest, and yet not want the
>> guest to be able to count some of the implemented events (because this
>> would give information on shared resources, for example.
>> 
>> For this, let's extend the PMUv3 device API, and offer a way to setup 
>> a
>> bitmap of the allowed events (the default being no bitmap, and thus no
>> filtering).
>> 
>> Userspace can thus allow/deny ranges of event. The default policy
>> depends on the "polarity" of the first filter setup (default deny if 
>> the
>> filter allows events, and default allow if the filter denies events).
>> This allows to setup exactly what is allowed for a given guest.
>> 
>> Note that although the ioctl is per-vcpu, the map of allowed events is
>> global to the VM (it can be setup from any vcpu until the vcpu PMU is
>> initialized).
>> 
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> ---
>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>  virt/kvm/arm/arm.c                |  2 +
>>  virt/kvm/arm/pmu.c                | 84 
>> +++++++++++++++++++++++++------
>>  4 files changed, 92 insertions(+), 16 deletions(-)
>> 
>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>> b/arch/arm64/include/asm/kvm_host.h
>> index 57fd46acd058..8e63c618688d 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>  	 * supported.
>>  	 */
>>  	bool return_nisv_io_abort_to_user;
>> +
>> +	/*
>> +	 * VM-wide PMU filter, implemented as a bitmap and big enough
>> +	 * for up to 65536 events
>> +	 */
>> +	unsigned long *pmu_filter;
>>  };
>> 
>>  #define KVM_NR_MEM_OBJS     40
>> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
>> b/arch/arm64/include/uapi/asm/kvm.h
>> index ba85bb23f060..7b1511d6ce44 100644
>> --- a/arch/arm64/include/uapi/asm/kvm.h
>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>  struct kvm_arch_memory_slot {
>>  };
>> 
>> +/*
>> + * PMU filter structure. Describe a range of events with a particular
>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>> + */
>> +struct kvm_pmu_event_filter {
>> +	__u16	base_event;
>> +	__u16	nevents;
>> +
>> +#define KVM_PMU_EVENT_ALLOW	0
>> +#define KVM_PMU_EVENT_DENY	1
>> +
>> +	__u8	action;
>> +	__u8	pad[3];
>> +};
>> +
>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>  struct kvm_vcpu_events {
>>  	struct {
>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>  #define KVM_ARM_VCPU_PMU_V3_CTRL	0
>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
>>  #define   KVM_ARM_VCPU_PMU_V3_INIT	1
>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER	2
>>  #define KVM_ARM_VCPU_TIMER_CTRL		1
>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index eda7b624eab8..8d849ac88a44 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>  	free_percpu(kvm->arch.last_vcpu_ran);
>>  	kvm->arch.last_vcpu_ran = NULL;
>> 
>> +	bitmap_free(kvm->arch.pmu_filter);
>> +
>>  	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>  		if (kvm->vcpus[i]) {
>>  			kvm_vcpu_destroy(kvm->vcpus[i]);
>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>> index f0d0312c0a55..9f0fd0224d5b 100644
>> --- a/virt/kvm/arm/pmu.c
>> +++ b/virt/kvm/arm/pmu.c
>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct 
>> kvm_vcpu *vcpu, u64 select_idx)
>> 
>>  	kvm_pmu_stop_counter(vcpu, pmc);
>>  	eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>> +	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>> +		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
> nit:
> 	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> 		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
> 	else
> 		eventsel = data & ARMV8_PMU_EVTYPE_EVENT;

You don't like it? ;-)

>> 
>>  	/* Software increment event does't need to be backed by a perf event 
>> */
> nit: while wer are at it fix the does't typo
>> -	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
>> -	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
>> +	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
>> +		return;
>> +
>> +	/*
>> +	 * If we have a filter in place and that the event isn't allowed, do
>> +	 * not install a perf event either.
>> +	 */
>> +	if (vcpu->kvm->arch.pmu_filter &&
>> +	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>>  		return;
>> 
>>  	memset(&attr, 0, sizeof(struct perf_event_attr));
>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct 
>> kvm_vcpu *vcpu, u64 select_idx)
>>  	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>>  	attr.exclude_hv = 1; /* Don't count EL2 events */
>>  	attr.exclude_host = 1; /* Don't count host events */
>> -	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>> -		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>> +	attr.config = eventsel;
> So in that case the guest counter will not increment but the guest does
> not know the counter is not implemented. Can't this lead to bad user
> experience. Shouldn't this disablement be reflected in PMCEID0/1 regs?

The whole point is that we want to keep things hidden from the guest.
Also, PMCEID{0,1} only describe a small set of events (the architected
common events), and not the whole range of microarchitectural events
that the CPU implements.

>> 
>>  	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>> 
>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
>> 
>>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>>  {
>> -	if (!kvm_arm_support_pmu_v3())
>> -		return -ENODEV;
>> -
>> -	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>> -		return -ENXIO;
>> -
>> -	if (vcpu->arch.pmu.created)
>> -		return -EBUSY;
>> -
>>  	if (irqchip_in_kernel(vcpu->kvm)) {
>>  		int ret;
>> 
>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int 
>> irq)
>>  	return true;
>>  }
>> 
>> +#define NR_EVENTS	(ARMV8_PMU_EVTYPE_EVENT + 1)
>> +
>>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct 
>> kvm_device_attr *attr)
>>  {
>> +	if (!kvm_arm_support_pmu_v3())
>> +		return -ENODEV;
>> +
>> +	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>> +		return -ENODEV;
> I see you changed -ENXIO into -ENODEV. wanted?

Probably not... but see below.

>> +
>> +	if (vcpu->arch.pmu.created)
>> +		return -EBUSY;
>> +
>>  	switch (attr->attr) {
>>  	case KVM_ARM_VCPU_PMU_V3_IRQ: {
>>  		int __user *uaddr = (int __user *)(long)attr->addr;
>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, 
>> struct kvm_device_attr *attr)
>>  		if (!irqchip_in_kernel(vcpu->kvm))
>>  			return -EINVAL;
>> 
>> -		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>> -			return -ENODEV;
>> -

Here's why. I wonder if we already have a problem with the consistency 
of the
error codes returned to userspace.

>>  		if (get_user(irq, uaddr))
>>  			return -EFAULT;
>> 
>> @@ -824,6 +831,50 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu 
>> *vcpu, struct kvm_device_attr *attr)
>>  		vcpu->arch.pmu.irq_num = irq;
>>  		return 0;
>>  	}
>> +	case KVM_ARM_VCPU_PMU_V3_FILTER: {
>> +		struct kvm_pmu_event_filter __user *uaddr;
>> +		struct kvm_pmu_event_filter filter;
>> +
>> +		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
>> +
>> +		if (copy_from_user(&filter, uaddr, sizeof(filter)))
>> +			return -EFAULT;
>> +
>> +		if (((u32)filter.base_event + filter.nevents) > NR_EVENTS ||
> isnt't it >= ?

No, I think this is correct. I want to be able to filter event 0xFFFF, 
for example,
so I have base_event=0xFFFF and nevents=1.

>> +		    (filter.action != KVM_PMU_EVENT_ALLOW &&
>> +		     filter.action != KVM_PMU_EVENT_DENY))
>> +			return -EINVAL;
> -EINVAL is not documented in the API doc.

Good point.

>> +
>> +		mutex_lock(&vcpu->kvm->lock);
>> +
>> +		if (!vcpu->kvm->arch.pmu_filter) {
>> +			vcpu->kvm->arch.pmu_filter = bitmap_alloc(NR_EVENTS, GFP_KERNEL);
>> +			if (!vcpu->kvm->arch.pmu_filter) {
>> +				mutex_unlock(&vcpu->kvm->lock);
>> +				return -ENOMEM;
>> +			}
>> +
>> +			/*
>> +			 * The default depends on the first applied filter.
>> +			 * If it allows events, the default is to deny.
>> +			 * Conversely, if the first filter denies a set of
>> +			 * events, the default is to allow.
>> +			 */
>> +			if (filter.action == KVM_PMU_EVENT_ALLOW)
>> +				bitmap_zero(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
>> +			else
>> +				bitmap_fill(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
>> +		}
>> +
>> +		if (filter.action == KVM_PMU_EVENT_ALLOW)
>> +			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, 
>> filter.nevents);
>> +		else
>> +			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, 
>> filter.nevents);
>> +
>> +		mutex_unlock(&vcpu->kvm->lock);
>> +
>> +		return 0;
>> +	}
>>  	case KVM_ARM_VCPU_PMU_V3_INIT:
>>  		return kvm_arm_pmu_v3_init(vcpu);
>>  	}
>> @@ -860,6 +911,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, 
>> struct kvm_device_attr *attr)
>>  	switch (attr->attr) {
>>  	case KVM_ARM_VCPU_PMU_V3_IRQ:
> not related to this patch but shouldn't we advertise this only with
> in-kernel irqchip?

We do support the PMU without the in-kernel chip, unfortunately... Yes,
supporting this feature was a big mistake.

>>  	case KVM_ARM_VCPU_PMU_V3_INIT:
>> +	case KVM_ARM_VCPU_PMU_V3_FILTER:
>>  		if (kvm_arm_support_pmu_v3() &&
>>  		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>  			return 0;

Thanks for the review,

         M.
Eric Auger March 10, 2020, 5:40 p.m. UTC | #3
Hi Marc,

On 3/10/20 12:03 PM, Marc Zyngier wrote:
> Hi Eric,
> 
> On 2020-03-09 18:05, Auger Eric wrote:
>> Hi Marc,
>>
>> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>>> It can be desirable to expose a PMU to a guest, and yet not want the
>>> guest to be able to count some of the implemented events (because this
>>> would give information on shared resources, for example.
>>>
>>> For this, let's extend the PMUv3 device API, and offer a way to setup a
>>> bitmap of the allowed events (the default being no bitmap, and thus no
>>> filtering).
>>>
>>> Userspace can thus allow/deny ranges of event. The default policy
>>> depends on the "polarity" of the first filter setup (default deny if the
>>> filter allows events, and default allow if the filter denies events).
>>> This allows to setup exactly what is allowed for a given guest.
>>>
>>> Note that although the ioctl is per-vcpu, the map of allowed events is
>>> global to the VM (it can be setup from any vcpu until the vcpu PMU is
>>> initialized).
>>>
>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>> ---
>>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>>  virt/kvm/arm/arm.c                |  2 +
>>>  virt/kvm/arm/pmu.c                | 84 +++++++++++++++++++++++++------
>>>  4 files changed, 92 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>> b/arch/arm64/include/asm/kvm_host.h
>>> index 57fd46acd058..8e63c618688d 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>>       * supported.
>>>       */
>>>      bool return_nisv_io_abort_to_user;
>>> +
>>> +    /*
>>> +     * VM-wide PMU filter, implemented as a bitmap and big enough
>>> +     * for up to 65536 events
>>> +     */
>>> +    unsigned long *pmu_filter;
>>>  };
>>>
>>>  #define KVM_NR_MEM_OBJS     40
>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h
>>> b/arch/arm64/include/uapi/asm/kvm.h
>>> index ba85bb23f060..7b1511d6ce44 100644
>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>>  struct kvm_arch_memory_slot {
>>>  };
>>>
>>> +/*
>>> + * PMU filter structure. Describe a range of events with a particular
>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>>> + */
>>> +struct kvm_pmu_event_filter {
>>> +    __u16    base_event;
>>> +    __u16    nevents;
>>> +
>>> +#define KVM_PMU_EVENT_ALLOW    0
>>> +#define KVM_PMU_EVENT_DENY    1
>>> +
>>> +    __u8    action;
>>> +    __u8    pad[3];
>>> +};
>>> +
>>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>>  struct kvm_vcpu_events {
>>>      struct {
>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>>  #define KVM_ARM_VCPU_PMU_V3_CTRL    0
>>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ    0
>>>  #define   KVM_ARM_VCPU_PMU_V3_INIT    1
>>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER    2
>>>  #define KVM_ARM_VCPU_TIMER_CTRL        1
>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER        0
>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER        1
>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>> index eda7b624eab8..8d849ac88a44 100644
>>> --- a/virt/kvm/arm/arm.c
>>> +++ b/virt/kvm/arm/arm.c
>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>      free_percpu(kvm->arch.last_vcpu_ran);
>>>      kvm->arch.last_vcpu_ran = NULL;
>>>
>>> +    bitmap_free(kvm->arch.pmu_filter);
>>> +
>>>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>>          if (kvm->vcpus[i]) {
>>>              kvm_vcpu_destroy(kvm->vcpus[i]);
>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>> index f0d0312c0a55..9f0fd0224d5b 100644
>>> --- a/virt/kvm/arm/pmu.c
>>> +++ b/virt/kvm/arm/pmu.c
>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct
>>> kvm_vcpu *vcpu, u64 select_idx)
>>>
>>>      kvm_pmu_stop_counter(vcpu, pmc);
>>>      eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>> +    if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>> +        eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>> nit:
>>     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>     else
>>         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
> 
> You don't like it? ;-)
? eventset set only once instead of 2 times
> 
>>>
>>>      /* Software increment event does't need to be backed by a perf
>>> event */
>> nit: while wer are at it fix the does't typo
>>> -    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
>>> -        pmc->idx != ARMV8_PMU_CYCLE_IDX)
>>> +    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
>>> +        return;
>>> +
>>> +    /*
>>> +     * If we have a filter in place and that the event isn't
>>> allowed, do
>>> +     * not install a perf event either.
>>> +     */
>>> +    if (vcpu->kvm->arch.pmu_filter &&
>>> +        !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>>>          return;
>>>
>>>      memset(&attr, 0, sizeof(struct perf_event_attr));
>>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct
>>> kvm_vcpu *vcpu, u64 select_idx)
>>>      attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>>>      attr.exclude_hv = 1; /* Don't count EL2 events */
>>>      attr.exclude_host = 1; /* Don't count host events */
>>> -    attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>>> -        ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>>> +    attr.config = eventsel;
>> So in that case the guest counter will not increment but the guest does
>> not know the counter is not implemented. Can't this lead to bad user
>> experience. Shouldn't this disablement be reflected in PMCEID0/1 regs?
> 
> The whole point is that we want to keep things hidden from the guest.
> Also, PMCEID{0,1} only describe a small set of events (the architected
> common events), and not the whole range of microarchitectural events
> that the CPU implements.

I am still not totally convinced. Things are not totally hidden to the
guest as the counter does not increment, right? So a guest may try to
use as it is advertised in PMCEID0/1 but not get the expected results
leading to potential support request. I agree not all the events are
described there but your API also allows to filter out some of the ones
that are advertised.
> 
>>>
>>>      counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>>>
>>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
>>>
>>>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>>>  {
>>> -    if (!kvm_arm_support_pmu_v3())
>>> -        return -ENODEV;
>>> -
>>> -    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>> -        return -ENXIO;
>>> -
>>> -    if (vcpu->arch.pmu.created)
>>> -        return -EBUSY;
>>> -
>>>      if (irqchip_in_kernel(vcpu->kvm)) {
>>>          int ret;
>>>
>>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm,
>>> int irq)
>>>      return true;
>>>  }
>>>
>>> +#define NR_EVENTS    (ARMV8_PMU_EVTYPE_EVENT + 1)
>>> +
>>>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct
>>> kvm_device_attr *attr)
>>>  {
>>> +    if (!kvm_arm_support_pmu_v3())
>>> +        return -ENODEV;
>>> +
>>> +    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>> +        return -ENODEV;
>> I see you changed -ENXIO into -ENODEV. wanted?
> 
> Probably not... but see below.
> 
>>> +
>>> +    if (vcpu->arch.pmu.created)
>>> +        return -EBUSY;
>>> +
>>>      switch (attr->attr) {
>>>      case KVM_ARM_VCPU_PMU_V3_IRQ: {
>>>          int __user *uaddr = (int __user *)(long)attr->addr;
>>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
>>> *vcpu, struct kvm_device_attr *attr)
>>>          if (!irqchip_in_kernel(vcpu->kvm))
>>>              return -EINVAL;
>>>
>>> -        if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>> -            return -ENODEV;
>>> -
> 
> Here's why. I wonder if we already have a problem with the consistency
> of the
> error codes returned to userspace.
OK. Then you may document it in the commit message?
> 
>>>          if (get_user(irq, uaddr))
>>>              return -EFAULT;
>>>
>>> @@ -824,6 +831,50 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
>>> *vcpu, struct kvm_device_attr *attr)
>>>          vcpu->arch.pmu.irq_num = irq;
>>>          return 0;
>>>      }
>>> +    case KVM_ARM_VCPU_PMU_V3_FILTER: {
>>> +        struct kvm_pmu_event_filter __user *uaddr;
>>> +        struct kvm_pmu_event_filter filter;
>>> +
>>> +        uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
>>> +
>>> +        if (copy_from_user(&filter, uaddr, sizeof(filter)))
>>> +            return -EFAULT;
>>> +
>>> +        if (((u32)filter.base_event + filter.nevents) > NR_EVENTS ||
>> isnt't it >= ?
> 
> No, I think this is correct. I want to be able to filter event 0xFFFF,
> for example,
> so I have base_event=0xFFFF and nevents=1.
hum my mistake. Sorry
> 
>>> +            (filter.action != KVM_PMU_EVENT_ALLOW &&
>>> +             filter.action != KVM_PMU_EVENT_DENY))
>>> +            return -EINVAL;
>> -EINVAL is not documented in the API doc.
> 
> Good point.
> 
>>> +
>>> +        mutex_lock(&vcpu->kvm->lock);
>>> +
>>> +        if (!vcpu->kvm->arch.pmu_filter) {
>>> +            vcpu->kvm->arch.pmu_filter = bitmap_alloc(NR_EVENTS,
>>> GFP_KERNEL);
>>> +            if (!vcpu->kvm->arch.pmu_filter) {
>>> +                mutex_unlock(&vcpu->kvm->lock);
>>> +                return -ENOMEM;
>>> +            }
>>> +
>>> +            /*
>>> +             * The default depends on the first applied filter.
>>> +             * If it allows events, the default is to deny.
>>> +             * Conversely, if the first filter denies a set of
>>> +             * events, the default is to allow.
>>> +             */
>>> +            if (filter.action == KVM_PMU_EVENT_ALLOW)
>>> +                bitmap_zero(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
>>> +            else
>>> +                bitmap_fill(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
>>> +        }
>>> +
>>> +        if (filter.action == KVM_PMU_EVENT_ALLOW)
>>> +            bitmap_set(vcpu->kvm->arch.pmu_filter,
>>> filter.base_event, filter.nevents);
>>> +        else
>>> +            bitmap_clear(vcpu->kvm->arch.pmu_filter,
>>> filter.base_event, filter.nevents);
>>> +
>>> +        mutex_unlock(&vcpu->kvm->lock);
>>> +
>>> +        return 0;
>>> +    }
>>>      case KVM_ARM_VCPU_PMU_V3_INIT:
>>>          return kvm_arm_pmu_v3_init(vcpu);
>>>      }
>>> @@ -860,6 +911,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu
>>> *vcpu, struct kvm_device_attr *attr)
>>>      switch (attr->attr) {
>>>      case KVM_ARM_VCPU_PMU_V3_IRQ:
>> not related to this patch but shouldn't we advertise this only with
>> in-kernel irqchip?
> 
> We do support the PMU without the in-kernel chip, unfortunately... Yes,
> supporting this feature was a big mistake.
But I see in kvm_arm_pmu_v3_set_attr:
case KVM_ARM_VCPU_PMU_V3_IRQ:
../..
                if (!irqchip_in_kernel(vcpu->kvm))
                        return -EINVAL;

Thanks

Eric



> 
>>>      case KVM_ARM_VCPU_PMU_V3_INIT:
>>> +    case KVM_ARM_VCPU_PMU_V3_FILTER:
>>>          if (kvm_arm_support_pmu_v3() &&
>>>              test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>              return 0;
> 
> Thanks for the review,
> 
>         M.
Marc Zyngier March 10, 2020, 6 p.m. UTC | #4
On 2020-03-10 17:40, Auger Eric wrote:
> Hi Marc,
> 
> On 3/10/20 12:03 PM, Marc Zyngier wrote:
>> Hi Eric,
>> 
>> On 2020-03-09 18:05, Auger Eric wrote:
>>> Hi Marc,
>>> 
>>> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>>>> It can be desirable to expose a PMU to a guest, and yet not want the
>>>> guest to be able to count some of the implemented events (because 
>>>> this
>>>> would give information on shared resources, for example.
>>>> 
>>>> For this, let's extend the PMUv3 device API, and offer a way to 
>>>> setup a
>>>> bitmap of the allowed events (the default being no bitmap, and thus 
>>>> no
>>>> filtering).
>>>> 
>>>> Userspace can thus allow/deny ranges of event. The default policy
>>>> depends on the "polarity" of the first filter setup (default deny if 
>>>> the
>>>> filter allows events, and default allow if the filter denies 
>>>> events).
>>>> This allows to setup exactly what is allowed for a given guest.
>>>> 
>>>> Note that although the ioctl is per-vcpu, the map of allowed events 
>>>> is
>>>> global to the VM (it can be setup from any vcpu until the vcpu PMU 
>>>> is
>>>> initialized).
>>>> 
>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>>> ---
>>>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>>>  virt/kvm/arm/arm.c                |  2 +
>>>>  virt/kvm/arm/pmu.c                | 84 
>>>> +++++++++++++++++++++++++------
>>>>  4 files changed, 92 insertions(+), 16 deletions(-)
>>>> 
>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>> b/arch/arm64/include/asm/kvm_host.h
>>>> index 57fd46acd058..8e63c618688d 100644
>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>>>       * supported.
>>>>       */
>>>>      bool return_nisv_io_abort_to_user;
>>>> +
>>>> +    /*
>>>> +     * VM-wide PMU filter, implemented as a bitmap and big enough
>>>> +     * for up to 65536 events
>>>> +     */
>>>> +    unsigned long *pmu_filter;
>>>>  };
>>>> 
>>>>  #define KVM_NR_MEM_OBJS     40
>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h
>>>> b/arch/arm64/include/uapi/asm/kvm.h
>>>> index ba85bb23f060..7b1511d6ce44 100644
>>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>>>  struct kvm_arch_memory_slot {
>>>>  };
>>>> 
>>>> +/*
>>>> + * PMU filter structure. Describe a range of events with a 
>>>> particular
>>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>>>> + */
>>>> +struct kvm_pmu_event_filter {
>>>> +    __u16    base_event;
>>>> +    __u16    nevents;
>>>> +
>>>> +#define KVM_PMU_EVENT_ALLOW    0
>>>> +#define KVM_PMU_EVENT_DENY    1
>>>> +
>>>> +    __u8    action;
>>>> +    __u8    pad[3];
>>>> +};
>>>> +
>>>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>>>  struct kvm_vcpu_events {
>>>>      struct {
>>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>>>  #define KVM_ARM_VCPU_PMU_V3_CTRL    0
>>>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ    0
>>>>  #define   KVM_ARM_VCPU_PMU_V3_INIT    1
>>>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER    2
>>>>  #define KVM_ARM_VCPU_TIMER_CTRL        1
>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER        0
>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER        1
>>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>>> index eda7b624eab8..8d849ac88a44 100644
>>>> --- a/virt/kvm/arm/arm.c
>>>> +++ b/virt/kvm/arm/arm.c
>>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>      free_percpu(kvm->arch.last_vcpu_ran);
>>>>      kvm->arch.last_vcpu_ran = NULL;
>>>> 
>>>> +    bitmap_free(kvm->arch.pmu_filter);
>>>> +
>>>>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>>>          if (kvm->vcpus[i]) {
>>>>              kvm_vcpu_destroy(kvm->vcpus[i]);
>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>> index f0d0312c0a55..9f0fd0224d5b 100644
>>>> --- a/virt/kvm/arm/pmu.c
>>>> +++ b/virt/kvm/arm/pmu.c
>>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct
>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>> 
>>>>      kvm_pmu_stop_counter(vcpu, pmc);
>>>>      eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>> +    if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>> +        eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>> nit:
>>>     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>     else
>>>         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>> 
>> You don't like it? ;-)
> ? eventset set only once instead of 2 times

The compiler does the right thing, but sore, I'll change it.

>> 
>>>> 
>>>>      /* Software increment event does't need to be backed by a perf
>>>> event */
>>> nit: while wer are at it fix the does't typo
>>>> -    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
>>>> -        pmc->idx != ARMV8_PMU_CYCLE_IDX)
>>>> +    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
>>>> +        return;
>>>> +
>>>> +    /*
>>>> +     * If we have a filter in place and that the event isn't
>>>> allowed, do
>>>> +     * not install a perf event either.
>>>> +     */
>>>> +    if (vcpu->kvm->arch.pmu_filter &&
>>>> +        !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>>>>          return;
>>>> 
>>>>      memset(&attr, 0, sizeof(struct perf_event_attr));
>>>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct
>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>      attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>>>>      attr.exclude_hv = 1; /* Don't count EL2 events */
>>>>      attr.exclude_host = 1; /* Don't count host events */
>>>> -    attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>>>> -        ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>>>> +    attr.config = eventsel;
>>> So in that case the guest counter will not increment but the guest 
>>> does
>>> not know the counter is not implemented. Can't this lead to bad user
>>> experience. Shouldn't this disablement be reflected in PMCEID0/1 
>>> regs?
>> 
>> The whole point is that we want to keep things hidden from the guest.
>> Also, PMCEID{0,1} only describe a small set of events (the architected
>> common events), and not the whole range of microarchitectural events
>> that the CPU implements.
> 
> I am still not totally convinced. Things are not totally hidden to the
> guest as the counter does not increment, right? So a guest may try to
> use as it is advertised in PMCEID0/1 but not get the expected results
> leading to potential support request. I agree not all the events are
> described there but your API also allows to filter out some of the ones
> that are advertised.

I think we're at odds when it comes to the goal of this series. If you
read the CPU TRM, you will find that event X is implemented. You look
at PMCEIDx, and you find it is not. You still get a support request! ;-)

Dropping events from these registers is totally trivial, but I'm not
sure this will reduce the surprise effect. It doesn't hurt anyway, so
I'll implement that.

>> 
>>>> 
>>>>      counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>>>> 
>>>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu 
>>>> *vcpu)
>>>> 
>>>>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>>>>  {
>>>> -    if (!kvm_arm_support_pmu_v3())
>>>> -        return -ENODEV;
>>>> -
>>>> -    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>> -        return -ENXIO;
>>>> -
>>>> -    if (vcpu->arch.pmu.created)
>>>> -        return -EBUSY;
>>>> -
>>>>      if (irqchip_in_kernel(vcpu->kvm)) {
>>>>          int ret;
>>>> 
>>>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm,
>>>> int irq)
>>>>      return true;
>>>>  }
>>>> 
>>>> +#define NR_EVENTS    (ARMV8_PMU_EVTYPE_EVENT + 1)
>>>> +
>>>>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct
>>>> kvm_device_attr *attr)
>>>>  {
>>>> +    if (!kvm_arm_support_pmu_v3())
>>>> +        return -ENODEV;
>>>> +
>>>> +    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>> +        return -ENODEV;
>>> I see you changed -ENXIO into -ENODEV. wanted?
>> 
>> Probably not... but see below.
>> 
>>>> +
>>>> +    if (vcpu->arch.pmu.created)
>>>> +        return -EBUSY;
>>>> +
>>>>      switch (attr->attr) {
>>>>      case KVM_ARM_VCPU_PMU_V3_IRQ: {
>>>>          int __user *uaddr = (int __user *)(long)attr->addr;
>>>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
>>>> *vcpu, struct kvm_device_attr *attr)
>>>>          if (!irqchip_in_kernel(vcpu->kvm))
>>>>              return -EINVAL;
>>>> 
>>>> -        if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>> -            return -ENODEV;
>>>> -
>> 
>> Here's why. I wonder if we already have a problem with the consistency
>> of the
>> error codes returned to userspace.
> OK. Then you may document it in the commit message?

I still need to work out whether we actually have an issue on that.

[...]

>>> not related to this patch but shouldn't we advertise this only with
>>> in-kernel irqchip?
>> 
>> We do support the PMU without the in-kernel chip, unfortunately... 
>> Yes,
>> supporting this feature was a big mistake.
> But I see in kvm_arm_pmu_v3_set_attr:
> case KVM_ARM_VCPU_PMU_V3_IRQ:
> ../..
>                 if (!irqchip_in_kernel(vcpu->kvm))
>                         return -EINVAL;

Ah, I see what you mean. Yes, we probably shouldn't report that the PMU
IRQ attribute is supported when we don't have an in-kernel irqchip.

Thanks,

         M.
Eric Auger March 10, 2020, 6:26 p.m. UTC | #5
Hi Marc,

On 3/10/20 7:00 PM, Marc Zyngier wrote:
> On 2020-03-10 17:40, Auger Eric wrote:
>> Hi Marc,
>>
>> On 3/10/20 12:03 PM, Marc Zyngier wrote:
>>> Hi Eric,
>>>
>>> On 2020-03-09 18:05, Auger Eric wrote:
>>>> Hi Marc,
>>>>
>>>> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>>>>> It can be desirable to expose a PMU to a guest, and yet not want the
>>>>> guest to be able to count some of the implemented events (because this
>>>>> would give information on shared resources, for example.
>>>>>
>>>>> For this, let's extend the PMUv3 device API, and offer a way to
>>>>> setup a
>>>>> bitmap of the allowed events (the default being no bitmap, and thus no
>>>>> filtering).
>>>>>
>>>>> Userspace can thus allow/deny ranges of event. The default policy
>>>>> depends on the "polarity" of the first filter setup (default deny
>>>>> if the
>>>>> filter allows events, and default allow if the filter denies events).
>>>>> This allows to setup exactly what is allowed for a given guest.
>>>>>
>>>>> Note that although the ioctl is per-vcpu, the map of allowed events is
>>>>> global to the VM (it can be setup from any vcpu until the vcpu PMU is
>>>>> initialized).
>>>>>
>>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>>>> ---
>>>>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>>>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>>>>  virt/kvm/arm/arm.c                |  2 +
>>>>>  virt/kvm/arm/pmu.c                | 84
>>>>> +++++++++++++++++++++++++------
>>>>>  4 files changed, 92 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>>> b/arch/arm64/include/asm/kvm_host.h
>>>>> index 57fd46acd058..8e63c618688d 100644
>>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>>>>       * supported.
>>>>>       */
>>>>>      bool return_nisv_io_abort_to_user;
>>>>> +
>>>>> +    /*
>>>>> +     * VM-wide PMU filter, implemented as a bitmap and big enough
>>>>> +     * for up to 65536 events
>>>>> +     */
>>>>> +    unsigned long *pmu_filter;
>>>>>  };
>>>>>
>>>>>  #define KVM_NR_MEM_OBJS     40
>>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h
>>>>> b/arch/arm64/include/uapi/asm/kvm.h
>>>>> index ba85bb23f060..7b1511d6ce44 100644
>>>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>>>>  struct kvm_arch_memory_slot {
>>>>>  };
>>>>>
>>>>> +/*
>>>>> + * PMU filter structure. Describe a range of events with a particular
>>>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>>>>> + */
>>>>> +struct kvm_pmu_event_filter {
>>>>> +    __u16    base_event;
>>>>> +    __u16    nevents;
>>>>> +
>>>>> +#define KVM_PMU_EVENT_ALLOW    0
>>>>> +#define KVM_PMU_EVENT_DENY    1
>>>>> +
>>>>> +    __u8    action;
>>>>> +    __u8    pad[3];
>>>>> +};
>>>>> +
>>>>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>>>>  struct kvm_vcpu_events {
>>>>>      struct {
>>>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>>>>  #define KVM_ARM_VCPU_PMU_V3_CTRL    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_INIT    1
>>>>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER    2
>>>>>  #define KVM_ARM_VCPU_TIMER_CTRL        1
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER        0
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER        1
>>>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>>>> index eda7b624eab8..8d849ac88a44 100644
>>>>> --- a/virt/kvm/arm/arm.c
>>>>> +++ b/virt/kvm/arm/arm.c
>>>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>>      free_percpu(kvm->arch.last_vcpu_ran);
>>>>>      kvm->arch.last_vcpu_ran = NULL;
>>>>>
>>>>> +    bitmap_free(kvm->arch.pmu_filter);
>>>>> +
>>>>>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>>>>          if (kvm->vcpus[i]) {
>>>>>              kvm_vcpu_destroy(kvm->vcpus[i]);
>>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>>> index f0d0312c0a55..9f0fd0224d5b 100644
>>>>> --- a/virt/kvm/arm/pmu.c
>>>>> +++ b/virt/kvm/arm/pmu.c
>>>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct
>>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>>
>>>>>      kvm_pmu_stop_counter(vcpu, pmc);
>>>>>      eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>>> +    if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>> +        eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>> nit:
>>>>     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>>     else
>>>>         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>
>>> You don't like it? ;-)
>> ? eventset set only once instead of 2 times
> 
> The compiler does the right thing, but sore, I'll change it.
> 
>>>
>>>>>
>>>>>      /* Software increment event does't need to be backed by a perf
>>>>> event */
>>>> nit: while wer are at it fix the does't typo
>>>>> -    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
>>>>> -        pmc->idx != ARMV8_PMU_CYCLE_IDX)
>>>>> +    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
>>>>> +        return;
>>>>> +
>>>>> +    /*
>>>>> +     * If we have a filter in place and that the event isn't
>>>>> allowed, do
>>>>> +     * not install a perf event either.
>>>>> +     */
>>>>> +    if (vcpu->kvm->arch.pmu_filter &&
>>>>> +        !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>>>>>          return;
>>>>>
>>>>>      memset(&attr, 0, sizeof(struct perf_event_attr));
>>>>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct
>>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>>      attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>>>>>      attr.exclude_hv = 1; /* Don't count EL2 events */
>>>>>      attr.exclude_host = 1; /* Don't count host events */
>>>>> -    attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>>>>> -        ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>>>>> +    attr.config = eventsel;
>>>> So in that case the guest counter will not increment but the guest does
>>>> not know the counter is not implemented. Can't this lead to bad user
>>>> experience. Shouldn't this disablement be reflected in PMCEID0/1 regs?
>>>
>>> The whole point is that we want to keep things hidden from the guest.
>>> Also, PMCEID{0,1} only describe a small set of events (the architected
>>> common events), and not the whole range of microarchitectural events
>>> that the CPU implements.
>>
>> I am still not totally convinced. Things are not totally hidden to the
>> guest as the counter does not increment, right? So a guest may try to
>> use as it is advertised in PMCEID0/1 but not get the expected results
>> leading to potential support request. I agree not all the events are
>> described there but your API also allows to filter out some of the ones
>> that are advertised.
> 
> I think we're at odds when it comes to the goal of this series. If you
> read the CPU TRM, you will find that event X is implemented. You look
> at PMCEIDx, and you find it is not. You still get a support request! ;-)
Yep that's a weird situation indeed, I haven't thought about the TRM.
> 
> Dropping events from these registers is totally trivial, but I'm not
> sure this will reduce the surprise effect. It doesn't hurt anyway, so
> I'll implement that.
Up to you. Or at least you can document it in the commit msg.

Thanks

Eric

> 
>>>
>>>>>
>>>>>      counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>>>>>
>>>>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
>>>>>
>>>>>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>>>>>  {
>>>>> -    if (!kvm_arm_support_pmu_v3())
>>>>> -        return -ENODEV;
>>>>> -
>>>>> -    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> -        return -ENXIO;
>>>>> -
>>>>> -    if (vcpu->arch.pmu.created)
>>>>> -        return -EBUSY;
>>>>> -
>>>>>      if (irqchip_in_kernel(vcpu->kvm)) {
>>>>>          int ret;
>>>>>
>>>>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm,
>>>>> int irq)
>>>>>      return true;
>>>>>  }
>>>>>
>>>>> +#define NR_EVENTS    (ARMV8_PMU_EVTYPE_EVENT + 1)
>>>>> +
>>>>>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct
>>>>> kvm_device_attr *attr)
>>>>>  {
>>>>> +    if (!kvm_arm_support_pmu_v3())
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> +        return -ENODEV;
>>>> I see you changed -ENXIO into -ENODEV. wanted?
>>>
>>> Probably not... but see below.
>>>
>>>>> +
>>>>> +    if (vcpu->arch.pmu.created)
>>>>> +        return -EBUSY;
>>>>> +
>>>>>      switch (attr->attr) {
>>>>>      case KVM_ARM_VCPU_PMU_V3_IRQ: {
>>>>>          int __user *uaddr = (int __user *)(long)attr->addr;
>>>>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
>>>>> *vcpu, struct kvm_device_attr *attr)
>>>>>          if (!irqchip_in_kernel(vcpu->kvm))
>>>>>              return -EINVAL;
>>>>>
>>>>> -        if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> -            return -ENODEV;
>>>>> -
>>>
>>> Here's why. I wonder if we already have a problem with the consistency
>>> of the
>>> error codes returned to userspace.
>> OK. Then you may document it in the commit message?
> 
> I still need to work out whether we actually have an issue on that.
> 
> [...]
> 
>>>> not related to this patch but shouldn't we advertise this only with
>>>> in-kernel irqchip?
>>>
>>> We do support the PMU without the in-kernel chip, unfortunately... Yes,
>>> supporting this feature was a big mistake.
>> But I see in kvm_arm_pmu_v3_set_attr:
>> case KVM_ARM_VCPU_PMU_V3_IRQ:
>> ../..
>>                 if (!irqchip_in_kernel(vcpu->kvm))
>>                         return -EINVAL;
> 
> Ah, I see what you mean. Yes, we probably shouldn't report that the PMU
> IRQ attribute is supported when we don't have an in-kernel irqchip.
> 
> Thanks,
> 
>         M.
Alexander Graf Aug. 18, 2020, 11:24 p.m. UTC | #6
Hi Marc,

On 10.03.20 19:00, Marc Zyngier wrote:
> On 2020-03-10 17:40, Auger Eric wrote:
>> Hi Marc,
>>
>> On 3/10/20 12:03 PM, Marc Zyngier wrote:
>>> Hi Eric,
>>>
>>> On 2020-03-09 18:05, Auger Eric wrote:
>>>> Hi Marc,
>>>>
>>>> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>>>>> It can be desirable to expose a PMU to a guest, and yet not want the
>>>>> guest to be able to count some of the implemented events (because this
>>>>> would give information on shared resources, for example.
>>>>>
>>>>> For this, let's extend the PMUv3 device API, and offer a way to 
>>>>> setup a
>>>>> bitmap of the allowed events (the default being no bitmap, and thus no
>>>>> filtering).
>>>>>
>>>>> Userspace can thus allow/deny ranges of event. The default policy
>>>>> depends on the "polarity" of the first filter setup (default deny 
>>>>> if the
>>>>> filter allows events, and default allow if the filter denies events).
>>>>> This allows to setup exactly what is allowed for a given guest.
>>>>>
>>>>> Note that although the ioctl is per-vcpu, the map of allowed events is
>>>>> global to the VM (it can be setup from any vcpu until the vcpu PMU is
>>>>> initialized).
>>>>>
>>>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>>>> ---
>>>>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>>>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>>>>  virt/kvm/arm/arm.c                |  2 +
>>>>>  virt/kvm/arm/pmu.c                | 84 
>>>>> +++++++++++++++++++++++++------
>>>>>  4 files changed, 92 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>>> b/arch/arm64/include/asm/kvm_host.h
>>>>> index 57fd46acd058..8e63c618688d 100644
>>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>>>>       * supported.
>>>>>       */
>>>>>      bool return_nisv_io_abort_to_user;
>>>>> +
>>>>> +    /*
>>>>> +     * VM-wide PMU filter, implemented as a bitmap and big enough
>>>>> +     * for up to 65536 events
>>>>> +     */
>>>>> +    unsigned long *pmu_filter;
>>>>>  };
>>>>>
>>>>>  #define KVM_NR_MEM_OBJS     40
>>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h
>>>>> b/arch/arm64/include/uapi/asm/kvm.h
>>>>> index ba85bb23f060..7b1511d6ce44 100644
>>>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>>>>  struct kvm_arch_memory_slot {
>>>>>  };
>>>>>
>>>>> +/*
>>>>> + * PMU filter structure. Describe a range of events with a particular
>>>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>>>>> + */
>>>>> +struct kvm_pmu_event_filter {
>>>>> +    __u16    base_event;
>>>>> +    __u16    nevents;
>>>>> +
>>>>> +#define KVM_PMU_EVENT_ALLOW    0
>>>>> +#define KVM_PMU_EVENT_DENY    1
>>>>> +
>>>>> +    __u8    action;
>>>>> +    __u8    pad[3];
>>>>> +};
>>>>> +
>>>>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>>>>  struct kvm_vcpu_events {
>>>>>      struct {
>>>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>>>>  #define KVM_ARM_VCPU_PMU_V3_CTRL    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_INIT    1
>>>>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER    2
>>>>>  #define KVM_ARM_VCPU_TIMER_CTRL        1
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER        0
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER        1
>>>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>>>> index eda7b624eab8..8d849ac88a44 100644
>>>>> --- a/virt/kvm/arm/arm.c
>>>>> +++ b/virt/kvm/arm/arm.c
>>>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>>      free_percpu(kvm->arch.last_vcpu_ran);
>>>>>      kvm->arch.last_vcpu_ran = NULL;
>>>>>
>>>>> +    bitmap_free(kvm->arch.pmu_filter);
>>>>> +
>>>>>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>>>>          if (kvm->vcpus[i]) {
>>>>>              kvm_vcpu_destroy(kvm->vcpus[i]);
>>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>>> index f0d0312c0a55..9f0fd0224d5b 100644
>>>>> --- a/virt/kvm/arm/pmu.c
>>>>> +++ b/virt/kvm/arm/pmu.c
>>>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct
>>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>>
>>>>>      kvm_pmu_stop_counter(vcpu, pmc);
>>>>>      eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>>> +    if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>> +        eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>> nit:
>>>>     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>>     else
>>>>         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>
>>> You don't like it? ;-)
>> ? eventset set only once instead of 2 times
> 
> The compiler does the right thing, but sore, I'll change it.

I haven't seen a v3 follow-up after this. Do you happen to have that 
somewhere in a local branch and just need to send it out or would you 
prefer if I pick up v2 and address the comments?


Thanks,

Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
Marc Zyngier Aug. 20, 2020, 7:37 a.m. UTC | #7
On 2020-08-19 00:24, Alexander Graf wrote:
> Hi Marc,

[...]

> I haven't seen a v3 follow-up after this. Do you happen to have that
> somewhere in a local branch and just need to send it out or would you
> prefer if I pick up v2 and address the comments?

I'll look into it.

         M.
Alexander Graf Sept. 2, 2020, 12:23 p.m. UTC | #8
On 20.08.20 09:37, Marc Zyngier wrote:
> 
> On 2020-08-19 00:24, Alexander Graf wrote:
>> Hi Marc,
> 
> [...]
> 
>> I haven't seen a v3 follow-up after this. Do you happen to have that
>> somewhere in a local branch and just need to send it out or would you
>> prefer if I pick up v2 and address the comments?
> 
> I'll look into it.

Thank you :)


Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 57fd46acd058..8e63c618688d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -91,6 +91,12 @@  struct kvm_arch {
 	 * supported.
 	 */
 	bool return_nisv_io_abort_to_user;
+
+	/*
+	 * VM-wide PMU filter, implemented as a bitmap and big enough
+	 * for up to 65536 events
+	 */
+	unsigned long *pmu_filter;
 };
 
 #define KVM_NR_MEM_OBJS     40
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..7b1511d6ce44 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -159,6 +159,21 @@  struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/*
+ * PMU filter structure. Describe a range of events with a particular
+ * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
+ */
+struct kvm_pmu_event_filter {
+	__u16	base_event;
+	__u16	nevents;
+
+#define KVM_PMU_EVENT_ALLOW	0
+#define KVM_PMU_EVENT_DENY	1
+
+	__u8	action;
+	__u8	pad[3];
+};
+
 /* for KVM_GET/SET_VCPU_EVENTS */
 struct kvm_vcpu_events {
 	struct {
@@ -329,6 +344,7 @@  struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
 #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
 #define   KVM_ARM_VCPU_PMU_V3_INIT	1
+#define   KVM_ARM_VCPU_PMU_V3_FILTER	2
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index eda7b624eab8..8d849ac88a44 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -164,6 +164,8 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 	free_percpu(kvm->arch.last_vcpu_ran);
 	kvm->arch.last_vcpu_ran = NULL;
 
+	bitmap_free(kvm->arch.pmu_filter);
+
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_vcpu_destroy(kvm->vcpus[i]);
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index f0d0312c0a55..9f0fd0224d5b 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -579,10 +579,19 @@  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 
 	kvm_pmu_stop_counter(vcpu, pmc);
 	eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
+	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
+		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
 
 	/* Software increment event does't need to be backed by a perf event */
-	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
-	    pmc->idx != ARMV8_PMU_CYCLE_IDX)
+	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
+		return;
+
+	/*
+	 * If we have a filter in place and that the event isn't allowed, do
+	 * not install a perf event either.
+	 */
+	if (vcpu->kvm->arch.pmu_filter &&
+	    !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
 		return;
 
 	memset(&attr, 0, sizeof(struct perf_event_attr));
@@ -594,8 +603,7 @@  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
 	attr.exclude_hv = 1; /* Don't count EL2 events */
 	attr.exclude_host = 1; /* Don't count host events */
-	attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
-		ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
+	attr.config = eventsel;
 
 	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
 
@@ -735,15 +743,6 @@  int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 
 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
 {
-	if (!kvm_arm_support_pmu_v3())
-		return -ENODEV;
-
-	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
-		return -ENXIO;
-
-	if (vcpu->arch.pmu.created)
-		return -EBUSY;
-
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		int ret;
 
@@ -794,8 +793,19 @@  static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
 	return true;
 }
 
+#define NR_EVENTS	(ARMV8_PMU_EVTYPE_EVENT + 1)
+
 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
+	if (!kvm_arm_support_pmu_v3())
+		return -ENODEV;
+
+	if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
+		return -ENODEV;
+
+	if (vcpu->arch.pmu.created)
+		return -EBUSY;
+
 	switch (attr->attr) {
 	case KVM_ARM_VCPU_PMU_V3_IRQ: {
 		int __user *uaddr = (int __user *)(long)attr->addr;
@@ -804,9 +814,6 @@  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 		if (!irqchip_in_kernel(vcpu->kvm))
 			return -EINVAL;
 
-		if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
-			return -ENODEV;
-
 		if (get_user(irq, uaddr))
 			return -EFAULT;
 
@@ -824,6 +831,50 @@  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 		vcpu->arch.pmu.irq_num = irq;
 		return 0;
 	}
+	case KVM_ARM_VCPU_PMU_V3_FILTER: {
+		struct kvm_pmu_event_filter __user *uaddr;
+		struct kvm_pmu_event_filter filter;
+
+		uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
+
+		if (copy_from_user(&filter, uaddr, sizeof(filter)))
+			return -EFAULT;
+
+		if (((u32)filter.base_event + filter.nevents) > NR_EVENTS ||
+		    (filter.action != KVM_PMU_EVENT_ALLOW &&
+		     filter.action != KVM_PMU_EVENT_DENY))
+			return -EINVAL;
+
+		mutex_lock(&vcpu->kvm->lock);
+
+		if (!vcpu->kvm->arch.pmu_filter) {
+			vcpu->kvm->arch.pmu_filter = bitmap_alloc(NR_EVENTS, GFP_KERNEL);
+			if (!vcpu->kvm->arch.pmu_filter) {
+				mutex_unlock(&vcpu->kvm->lock);
+				return -ENOMEM;
+			}
+
+			/*
+			 * The default depends on the first applied filter.
+			 * If it allows events, the default is to deny.
+			 * Conversely, if the first filter denies a set of
+			 * events, the default is to allow.
+			 */
+			if (filter.action == KVM_PMU_EVENT_ALLOW)
+				bitmap_zero(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
+			else
+				bitmap_fill(vcpu->kvm->arch.pmu_filter, NR_EVENTS);
+		}
+
+		if (filter.action == KVM_PMU_EVENT_ALLOW)
+			bitmap_set(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
+		else
+			bitmap_clear(vcpu->kvm->arch.pmu_filter, filter.base_event, filter.nevents);
+
+		mutex_unlock(&vcpu->kvm->lock);
+
+		return 0;
+	}
 	case KVM_ARM_VCPU_PMU_V3_INIT:
 		return kvm_arm_pmu_v3_init(vcpu);
 	}
@@ -860,6 +911,7 @@  int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	switch (attr->attr) {
 	case KVM_ARM_VCPU_PMU_V3_IRQ:
 	case KVM_ARM_VCPU_PMU_V3_INIT:
+	case KVM_ARM_VCPU_PMU_V3_FILTER:
 		if (kvm_arm_support_pmu_v3() &&
 		    test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
 			return 0;