diff mbox series

[v2,3/4] KVM: arm64: Add KVM_ARM_VCPU_PMU_V3_SET_PMU attribute

Message ID 20211206170223.309789-4-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Improve PMU support on heterogeneous systems | expand

Commit Message

Alexandru Elisei Dec. 6, 2021, 5:02 p.m. UTC
When KVM creates an event and there are more than one PMUs present on the
system, perf_init_event() will go through the list of available PMUs and
will choose the first one that can create the event. The order of the PMUs
in the PMU list depends on the probe order, which can change under various
circumstances, for example if the order of the PMU nodes change in the DTB
or if asynchronous driver probing is enabled on the kernel command line
(with the driver_async_probe=armv8-pmu option).

Another consequence of this approach is that, on heteregeneous systems,
all virtual machines that KVM creates will use the same PMU. This might
cause unexpected behaviour for userspace: when a VCPU is executing on
the physical CPU that uses this PMU, PMU events in the guest work
correctly; but when the same VCPU executes on another CPU, PMU events in
the guest will suddenly stop counting.

Fortunately, perf core allows user to specify on which PMU to create an
event by using the perf_event_attr->type field, which is used by
perf_init_event() as an index in the radix tree of available PMUs.

Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
attribute to allow userspace to specify the arm_pmu that KVM will use when
creating events for that VCPU. KVM will make no attempt to run the VCPU on
the physical CPUs that share this PMU, leaving it up to userspace to
manage the VCPU threads' affinity accordingly.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 Documentation/virt/kvm/devices/vcpu.rst | 25 +++++++++++++++++
 arch/arm64/include/uapi/asm/kvm.h       |  1 +
 arch/arm64/kvm/pmu-emul.c               | 37 +++++++++++++++++++++++--
 include/kvm/arm_pmu.h                   |  1 +
 tools/arch/arm64/include/uapi/asm/kvm.h |  1 +
 5 files changed, 63 insertions(+), 2 deletions(-)

Comments

Reiji Watanabe Dec. 8, 2021, 3:13 a.m. UTC | #1
Hi Alex,

On Mon, Dec 6, 2021 at 9:02 AM Alexandru Elisei
<alexandru.elisei@arm.com> wrote:
>
> When KVM creates an event and there are more than one PMUs present on the
> system, perf_init_event() will go through the list of available PMUs and
> will choose the first one that can create the event. The order of the PMUs
> in the PMU list depends on the probe order, which can change under various
> circumstances, for example if the order of the PMU nodes change in the DTB
> or if asynchronous driver probing is enabled on the kernel command line
> (with the driver_async_probe=armv8-pmu option).
>
> Another consequence of this approach is that, on heteregeneous systems,
> all virtual machines that KVM creates will use the same PMU. This might
> cause unexpected behaviour for userspace: when a VCPU is executing on
> the physical CPU that uses this PMU, PMU events in the guest work
> correctly; but when the same VCPU executes on another CPU, PMU events in
> the guest will suddenly stop counting.
>
> Fortunately, perf core allows user to specify on which PMU to create an
> event by using the perf_event_attr->type field, which is used by
> perf_init_event() as an index in the radix tree of available PMUs.
>
> Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
> attribute to allow userspace to specify the arm_pmu that KVM will use when
> creating events for that VCPU. KVM will make no attempt to run the VCPU on
> the physical CPUs that share this PMU, leaving it up to userspace to
> manage the VCPU threads' affinity accordingly.
>
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>  Documentation/virt/kvm/devices/vcpu.rst | 25 +++++++++++++++++
>  arch/arm64/include/uapi/asm/kvm.h       |  1 +
>  arch/arm64/kvm/pmu-emul.c               | 37 +++++++++++++++++++++++--
>  include/kvm/arm_pmu.h                   |  1 +
>  tools/arch/arm64/include/uapi/asm/kvm.h |  1 +
>  5 files changed, 63 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
> index 60a29972d3f1..c82be5cbc268 100644
> --- a/Documentation/virt/kvm/devices/vcpu.rst
> +++ b/Documentation/virt/kvm/devices/vcpu.rst
> @@ -104,6 +104,31 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
>  isn't strictly speaking an event. Filtering the cycle counter is possible
>  using event 0x11 (CPU_CYCLES).
>
> +1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
> +------------------------------------------
> +
> +:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
> +             identifier.
> +
> +:Returns:
> +
> +        =======  ===============================================
> +        -EBUSY   PMUv3 already initialized
> +        -EFAULT  Error accessing the PMU identifier
> +        -ENXIO   PMU not found
> +        -ENODEV  PMUv3 not supported or GIC not initialized
> +        -ENOMEM  Could not allocate memory
> +        =======  ===============================================
> +
> +Request that the VCPU uses the specified hardware PMU when creating guest events
> +for the purpose of PMU emulation. The PMU identifier can be read from the "type"
> +file for the desired PMU instance under /sys/devices (or, equivalent,
> +/sys/bus/even_source). This attribute is particularly useful on heterogeneous
> +systems where there are at least two CPU PMUs on the system.
> +
> +Note that KVM will not make any attempts to run the VCPU on the physical CPUs
> +associated with the PMU specified by this attribute. This is entirely left to
> +userspace.
>
>  2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
>  =================================
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index b3edde68bc3e..1d0a0a2a9711 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
>  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
>  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
>  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
>  #define KVM_ARM_VCPU_TIMER_CTRL                1
>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index eaaad4c06561..618138c5f792 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -603,6 +603,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>  {
>         struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +       struct arm_pmu *arm_pmu = pmu->arm_pmu;
>         struct kvm_pmc *pmc;
>         struct perf_event *event;
>         struct perf_event_attr attr;
> @@ -638,8 +639,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>                 return;
>
>         memset(&attr, 0, sizeof(struct perf_event_attr));
> -       attr.type = PERF_TYPE_RAW;
> -       attr.size = sizeof(attr);
> +       attr.type = arm_pmu ? arm_pmu->pmu.type : PERF_TYPE_RAW;
>         attr.pinned = 1;
>         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
>         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
> @@ -941,6 +941,29 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
>         return true;
>  }
>
> +static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
> +{
> +       struct kvm_pmu *kvm_pmu = &vcpu->arch.pmu;
> +       struct arm_pmu_entry *entry;
> +       struct arm_pmu *arm_pmu;
> +       int ret = -ENXIO;
> +
> +       mutex_lock(&arm_pmus_lock);
> +
> +       list_for_each_entry(entry, &arm_pmus, entry) {
> +               arm_pmu = entry->arm_pmu;
> +               if (arm_pmu->pmu.type == pmu_id) {
> +                       kvm_pmu->arm_pmu = arm_pmu;

Shouldn't kvm->arch.pmuver be updated based on the pmu that
is used for the guest ?

Thanks,
Reiji


> +                       ret = 0;
> +                       goto out_unlock;
> +               }
> +       }
> +
> +out_unlock:
> +       mutex_unlock(&arm_pmus_lock);
> +       return ret;
> +}
> +
>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  {
>         if (!kvm_vcpu_has_pmu(vcpu))
> @@ -1027,6 +1050,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>
>                 return 0;
>         }
> +       case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
> +               int __user *uaddr = (int __user *)(long)attr->addr;
> +               int pmu_id;
> +
> +               if (get_user(pmu_id, uaddr))
> +                       return -EFAULT;
> +
> +               return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
> +       }
>         case KVM_ARM_VCPU_PMU_V3_INIT:
>                 return kvm_arm_pmu_v3_init(vcpu);
>         }
> @@ -1064,6 +1096,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>         case KVM_ARM_VCPU_PMU_V3_IRQ:
>         case KVM_ARM_VCPU_PMU_V3_INIT:
>         case KVM_ARM_VCPU_PMU_V3_FILTER:
> +       case KVM_ARM_VCPU_PMU_V3_SET_PMU:
>                 if (kvm_vcpu_has_pmu(vcpu))
>                         return 0;
>         }
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index e249c5f172aa..ab3046a8f9bb 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -34,6 +34,7 @@ struct kvm_pmu {
>         bool created;
>         bool irq_level;
>         struct irq_work overflow_work;
> +       struct arm_pmu *arm_pmu;
>  };
>
>  struct arm_pmu_entry {
> diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
> index b3edde68bc3e..1d0a0a2a9711 100644
> --- a/tools/arch/arm64/include/uapi/asm/kvm.h
> +++ b/tools/arch/arm64/include/uapi/asm/kvm.h
> @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
>  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
>  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
>  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
>  #define KVM_ARM_VCPU_TIMER_CTRL                1
>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> --
> 2.34.1
>
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Alexandru Elisei Dec. 8, 2021, 12:23 p.m. UTC | #2
Hi Reiji,

On Tue, Dec 07, 2021 at 07:13:17PM -0800, Reiji Watanabe wrote:
> Hi Alex,
> 
> On Mon, Dec 6, 2021 at 9:02 AM Alexandru Elisei
> <alexandru.elisei@arm.com> wrote:
> >
> > When KVM creates an event and there are more than one PMUs present on the
> > system, perf_init_event() will go through the list of available PMUs and
> > will choose the first one that can create the event. The order of the PMUs
> > in the PMU list depends on the probe order, which can change under various
> > circumstances, for example if the order of the PMU nodes change in the DTB
> > or if asynchronous driver probing is enabled on the kernel command line
> > (with the driver_async_probe=armv8-pmu option).
> >
> > Another consequence of this approach is that, on heteregeneous systems,
> > all virtual machines that KVM creates will use the same PMU. This might
> > cause unexpected behaviour for userspace: when a VCPU is executing on
> > the physical CPU that uses this PMU, PMU events in the guest work
> > correctly; but when the same VCPU executes on another CPU, PMU events in
> > the guest will suddenly stop counting.
> >
> > Fortunately, perf core allows user to specify on which PMU to create an
> > event by using the perf_event_attr->type field, which is used by
> > perf_init_event() as an index in the radix tree of available PMUs.
> >
> > Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
> > attribute to allow userspace to specify the arm_pmu that KVM will use when
> > creating events for that VCPU. KVM will make no attempt to run the VCPU on
> > the physical CPUs that share this PMU, leaving it up to userspace to
> > manage the VCPU threads' affinity accordingly.
> >
> > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > ---
> >  Documentation/virt/kvm/devices/vcpu.rst | 25 +++++++++++++++++
> >  arch/arm64/include/uapi/asm/kvm.h       |  1 +
> >  arch/arm64/kvm/pmu-emul.c               | 37 +++++++++++++++++++++++--
> >  include/kvm/arm_pmu.h                   |  1 +
> >  tools/arch/arm64/include/uapi/asm/kvm.h |  1 +
> >  5 files changed, 63 insertions(+), 2 deletions(-)
> >
> > diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
> > index 60a29972d3f1..c82be5cbc268 100644
> > --- a/Documentation/virt/kvm/devices/vcpu.rst
> > +++ b/Documentation/virt/kvm/devices/vcpu.rst
> > @@ -104,6 +104,31 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
> >  isn't strictly speaking an event. Filtering the cycle counter is possible
> >  using event 0x11 (CPU_CYCLES).
> >
> > +1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
> > +------------------------------------------
> > +
> > +:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
> > +             identifier.
> > +
> > +:Returns:
> > +
> > +        =======  ===============================================
> > +        -EBUSY   PMUv3 already initialized
> > +        -EFAULT  Error accessing the PMU identifier
> > +        -ENXIO   PMU not found
> > +        -ENODEV  PMUv3 not supported or GIC not initialized
> > +        -ENOMEM  Could not allocate memory
> > +        =======  ===============================================
> > +
> > +Request that the VCPU uses the specified hardware PMU when creating guest events
> > +for the purpose of PMU emulation. The PMU identifier can be read from the "type"
> > +file for the desired PMU instance under /sys/devices (or, equivalent,
> > +/sys/bus/even_source). This attribute is particularly useful on heterogeneous
> > +systems where there are at least two CPU PMUs on the system.
> > +
> > +Note that KVM will not make any attempts to run the VCPU on the physical CPUs
> > +associated with the PMU specified by this attribute. This is entirely left to
> > +userspace.
> >
> >  2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
> >  =================================
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > index b3edde68bc3e..1d0a0a2a9711 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
> >  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
> >  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
> >  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> > +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
> >  #define KVM_ARM_VCPU_TIMER_CTRL                1
> >  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
> >  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index eaaad4c06561..618138c5f792 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -603,6 +603,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
> >  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> >  {
> >         struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > +       struct arm_pmu *arm_pmu = pmu->arm_pmu;
> >         struct kvm_pmc *pmc;
> >         struct perf_event *event;
> >         struct perf_event_attr attr;
> > @@ -638,8 +639,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> >                 return;
> >
> >         memset(&attr, 0, sizeof(struct perf_event_attr));
> > -       attr.type = PERF_TYPE_RAW;
> > -       attr.size = sizeof(attr);
> > +       attr.type = arm_pmu ? arm_pmu->pmu.type : PERF_TYPE_RAW;
> >         attr.pinned = 1;
> >         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
> >         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
> > @@ -941,6 +941,29 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
> >         return true;
> >  }
> >
> > +static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
> > +{
> > +       struct kvm_pmu *kvm_pmu = &vcpu->arch.pmu;
> > +       struct arm_pmu_entry *entry;
> > +       struct arm_pmu *arm_pmu;
> > +       int ret = -ENXIO;
> > +
> > +       mutex_lock(&arm_pmus_lock);
> > +
> > +       list_for_each_entry(entry, &arm_pmus, entry) {
> > +               arm_pmu = entry->arm_pmu;
> > +               if (arm_pmu->pmu.type == pmu_id) {
> > +                       kvm_pmu->arm_pmu = arm_pmu;
> 
> Shouldn't kvm->arch.pmuver be updated based on the pmu that
> is used for the guest ?

As far as I can tell, kvm->arch.pmuver is used in kvm_pmu_event_mask() to
get the number of available perf events, which is then used for configuring
events (via the PMEVTYPER_EL0) register or for masking out events when the
guest reads PMCEID{0,1}_EL0; the events that are masked out are the events
that are unsupported by the PMU that perf will choose for creating events.

This series doesn't forbid userspace from setting the PMU for only a subset
of VCPUs, leaving the other VCPUs with the default PMU, so setting
kvm->arch.pmuver to a particular VCPU's PMU is not correct.

I think the correct fix here would be to have kvm_pmu_event_mask() use the
VCPU's PMU PMUVer, and fallback to kvm->arch.pmuver if that isn't set.

This makes me wonder. Should KVM enforce having userspace either not
setting the PMU for any VCPU, either setting it for all VCPUs? I think this
would be a good idea and will reduce complexity in the long run. I also
don't see a use case for userspace choosing to set the PMU for a subset of
VCPUs, leaving the other VCPUs with the default behaviour.

Thanks,
Alex

> 
> Thanks,
> Reiji
> 
> 
> > +                       ret = 0;
> > +                       goto out_unlock;
> > +               }
> > +       }
> > +
> > +out_unlock:
> > +       mutex_unlock(&arm_pmus_lock);
> > +       return ret;
> > +}
> > +
> >  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> >  {
> >         if (!kvm_vcpu_has_pmu(vcpu))
> > @@ -1027,6 +1050,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> >
> >                 return 0;
> >         }
> > +       case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
> > +               int __user *uaddr = (int __user *)(long)attr->addr;
> > +               int pmu_id;
> > +
> > +               if (get_user(pmu_id, uaddr))
> > +                       return -EFAULT;
> > +
> > +               return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
> > +       }
> >         case KVM_ARM_VCPU_PMU_V3_INIT:
> >                 return kvm_arm_pmu_v3_init(vcpu);
> >         }
> > @@ -1064,6 +1096,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> >         case KVM_ARM_VCPU_PMU_V3_IRQ:
> >         case KVM_ARM_VCPU_PMU_V3_INIT:
> >         case KVM_ARM_VCPU_PMU_V3_FILTER:
> > +       case KVM_ARM_VCPU_PMU_V3_SET_PMU:
> >                 if (kvm_vcpu_has_pmu(vcpu))
> >                         return 0;
> >         }
> > diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> > index e249c5f172aa..ab3046a8f9bb 100644
> > --- a/include/kvm/arm_pmu.h
> > +++ b/include/kvm/arm_pmu.h
> > @@ -34,6 +34,7 @@ struct kvm_pmu {
> >         bool created;
> >         bool irq_level;
> >         struct irq_work overflow_work;
> > +       struct arm_pmu *arm_pmu;
> >  };
> >
> >  struct arm_pmu_entry {
> > diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
> > index b3edde68bc3e..1d0a0a2a9711 100644
> > --- a/tools/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/tools/arch/arm64/include/uapi/asm/kvm.h
> > @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
> >  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
> >  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
> >  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> > +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
> >  #define KVM_ARM_VCPU_TIMER_CTRL                1
> >  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
> >  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> > --
> > 2.34.1
> >
> > _______________________________________________
> > kvmarm mailing list
> > kvmarm@lists.cs.columbia.edu
> > https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Alexandru Elisei Dec. 8, 2021, 12:43 p.m. UTC | #3
Hi,

On Wed, Dec 08, 2021 at 12:23:44PM +0000, Alexandru Elisei wrote:
> Hi Reiji,
> 
> On Tue, Dec 07, 2021 at 07:13:17PM -0800, Reiji Watanabe wrote:
> > Hi Alex,
> > 
> > On Mon, Dec 6, 2021 at 9:02 AM Alexandru Elisei
> > <alexandru.elisei@arm.com> wrote:
> > >
> > > When KVM creates an event and there are more than one PMUs present on the
> > > system, perf_init_event() will go through the list of available PMUs and
> > > will choose the first one that can create the event. The order of the PMUs
> > > in the PMU list depends on the probe order, which can change under various
> > > circumstances, for example if the order of the PMU nodes change in the DTB
> > > or if asynchronous driver probing is enabled on the kernel command line
> > > (with the driver_async_probe=armv8-pmu option).
> > >
> > > Another consequence of this approach is that, on heteregeneous systems,
> > > all virtual machines that KVM creates will use the same PMU. This might
> > > cause unexpected behaviour for userspace: when a VCPU is executing on
> > > the physical CPU that uses this PMU, PMU events in the guest work
> > > correctly; but when the same VCPU executes on another CPU, PMU events in
> > > the guest will suddenly stop counting.
> > >
> > > Fortunately, perf core allows user to specify on which PMU to create an
> > > event by using the perf_event_attr->type field, which is used by
> > > perf_init_event() as an index in the radix tree of available PMUs.
> > >
> > > Add the KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_SET_PMU) VCPU
> > > attribute to allow userspace to specify the arm_pmu that KVM will use when
> > > creating events for that VCPU. KVM will make no attempt to run the VCPU on
> > > the physical CPUs that share this PMU, leaving it up to userspace to
> > > manage the VCPU threads' affinity accordingly.
> > >
> > > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > > ---
> > >  Documentation/virt/kvm/devices/vcpu.rst | 25 +++++++++++++++++
> > >  arch/arm64/include/uapi/asm/kvm.h       |  1 +
> > >  arch/arm64/kvm/pmu-emul.c               | 37 +++++++++++++++++++++++--
> > >  include/kvm/arm_pmu.h                   |  1 +
> > >  tools/arch/arm64/include/uapi/asm/kvm.h |  1 +
> > >  5 files changed, 63 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
> > > index 60a29972d3f1..c82be5cbc268 100644
> > > --- a/Documentation/virt/kvm/devices/vcpu.rst
> > > +++ b/Documentation/virt/kvm/devices/vcpu.rst
> > > @@ -104,6 +104,31 @@ hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
> > >  isn't strictly speaking an event. Filtering the cycle counter is possible
> > >  using event 0x11 (CPU_CYCLES).
> > >
> > > +1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
> > > +------------------------------------------
> > > +
> > > +:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
> > > +             identifier.
> > > +
> > > +:Returns:
> > > +
> > > +        =======  ===============================================
> > > +        -EBUSY   PMUv3 already initialized
> > > +        -EFAULT  Error accessing the PMU identifier
> > > +        -ENXIO   PMU not found
> > > +        -ENODEV  PMUv3 not supported or GIC not initialized
> > > +        -ENOMEM  Could not allocate memory
> > > +        =======  ===============================================
> > > +
> > > +Request that the VCPU uses the specified hardware PMU when creating guest events
> > > +for the purpose of PMU emulation. The PMU identifier can be read from the "type"
> > > +file for the desired PMU instance under /sys/devices (or, equivalent,
> > > +/sys/bus/even_source). This attribute is particularly useful on heterogeneous
> > > +systems where there are at least two CPU PMUs on the system.
> > > +
> > > +Note that KVM will not make any attempts to run the VCPU on the physical CPUs
> > > +associated with the PMU specified by this attribute. This is entirely left to
> > > +userspace.
> > >
> > >  2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
> > >  =================================
> > > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > > index b3edde68bc3e..1d0a0a2a9711 100644
> > > --- a/arch/arm64/include/uapi/asm/kvm.h
> > > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > > @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
> > >  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
> > >  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
> > >  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> > > +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
> > >  #define KVM_ARM_VCPU_TIMER_CTRL                1
> > >  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
> > >  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > > index eaaad4c06561..618138c5f792 100644
> > > --- a/arch/arm64/kvm/pmu-emul.c
> > > +++ b/arch/arm64/kvm/pmu-emul.c
> > > @@ -603,6 +603,7 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
> > >  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> > >  {
> > >         struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > > +       struct arm_pmu *arm_pmu = pmu->arm_pmu;
> > >         struct kvm_pmc *pmc;
> > >         struct perf_event *event;
> > >         struct perf_event_attr attr;
> > > @@ -638,8 +639,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
> > >                 return;
> > >
> > >         memset(&attr, 0, sizeof(struct perf_event_attr));
> > > -       attr.type = PERF_TYPE_RAW;
> > > -       attr.size = sizeof(attr);
> > > +       attr.type = arm_pmu ? arm_pmu->pmu.type : PERF_TYPE_RAW;
> > >         attr.pinned = 1;
> > >         attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
> > >         attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
> > > @@ -941,6 +941,29 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
> > >         return true;
> > >  }
> > >
> > > +static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
> > > +{
> > > +       struct kvm_pmu *kvm_pmu = &vcpu->arch.pmu;
> > > +       struct arm_pmu_entry *entry;
> > > +       struct arm_pmu *arm_pmu;
> > > +       int ret = -ENXIO;
> > > +
> > > +       mutex_lock(&arm_pmus_lock);
> > > +
> > > +       list_for_each_entry(entry, &arm_pmus, entry) {
> > > +               arm_pmu = entry->arm_pmu;
> > > +               if (arm_pmu->pmu.type == pmu_id) {
> > > +                       kvm_pmu->arm_pmu = arm_pmu;
> > 
> > Shouldn't kvm->arch.pmuver be updated based on the pmu that
> > is used for the guest ?
> 
> As far as I can tell, kvm->arch.pmuver is used in kvm_pmu_event_mask() to
> get the number of available perf events, which is then used for configuring
> events (via the PMEVTYPER_EL0) register or for masking out events when the
> guest reads PMCEID{0,1}_EL0; the events that are masked out are the events
> that are unsupported by the PMU that perf will choose for creating events.
> 
> This series doesn't forbid userspace from setting the PMU for only a subset
> of VCPUs, leaving the other VCPUs with the default PMU, so setting
> kvm->arch.pmuver to a particular VCPU's PMU is not correct.
> 
> I think the correct fix here would be to have kvm_pmu_event_mask() use the
> VCPU's PMU PMUVer, and fallback to kvm->arch.pmuver if that isn't set.
> 
> This makes me wonder. Should KVM enforce having userspace either not
> setting the PMU for any VCPU, either setting it for all VCPUs? I think this
> would be a good idea and will reduce complexity in the long run. I also
> don't see a use case for userspace choosing to set the PMU for a subset of
> VCPUs, leaving the other VCPUs with the default behaviour.

I had a look and I don't think there's a way to enforce this, as there are
no restrictions on when a VCPU can be created. KVM must support only a
subset of VCPUs having a PMU set.

Thanks,
Alex

> 
> Thanks,
> Alex
> 
> > 
> > Thanks,
> > Reiji
> > 
> > 
> > > +                       ret = 0;
> > > +                       goto out_unlock;
> > > +               }
> > > +       }
> > > +
> > > +out_unlock:
> > > +       mutex_unlock(&arm_pmus_lock);
> > > +       return ret;
> > > +}
> > > +
> > >  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > >  {
> > >         if (!kvm_vcpu_has_pmu(vcpu))
> > > @@ -1027,6 +1050,15 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > >
> > >                 return 0;
> > >         }
> > > +       case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
> > > +               int __user *uaddr = (int __user *)(long)attr->addr;
> > > +               int pmu_id;
> > > +
> > > +               if (get_user(pmu_id, uaddr))
> > > +                       return -EFAULT;
> > > +
> > > +               return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
> > > +       }
> > >         case KVM_ARM_VCPU_PMU_V3_INIT:
> > >                 return kvm_arm_pmu_v3_init(vcpu);
> > >         }
> > > @@ -1064,6 +1096,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > >         case KVM_ARM_VCPU_PMU_V3_IRQ:
> > >         case KVM_ARM_VCPU_PMU_V3_INIT:
> > >         case KVM_ARM_VCPU_PMU_V3_FILTER:
> > > +       case KVM_ARM_VCPU_PMU_V3_SET_PMU:
> > >                 if (kvm_vcpu_has_pmu(vcpu))
> > >                         return 0;
> > >         }
> > > diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> > > index e249c5f172aa..ab3046a8f9bb 100644
> > > --- a/include/kvm/arm_pmu.h
> > > +++ b/include/kvm/arm_pmu.h
> > > @@ -34,6 +34,7 @@ struct kvm_pmu {
> > >         bool created;
> > >         bool irq_level;
> > >         struct irq_work overflow_work;
> > > +       struct arm_pmu *arm_pmu;
> > >  };
> > >
> > >  struct arm_pmu_entry {
> > > diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
> > > index b3edde68bc3e..1d0a0a2a9711 100644
> > > --- a/tools/arch/arm64/include/uapi/asm/kvm.h
> > > +++ b/tools/arch/arm64/include/uapi/asm/kvm.h
> > > @@ -362,6 +362,7 @@ struct kvm_arm_copy_mte_tags {
> > >  #define   KVM_ARM_VCPU_PMU_V3_IRQ      0
> > >  #define   KVM_ARM_VCPU_PMU_V3_INIT     1
> > >  #define   KVM_ARM_VCPU_PMU_V3_FILTER   2
> > > +#define   KVM_ARM_VCPU_PMU_V3_SET_PMU  3
> > >  #define KVM_ARM_VCPU_TIMER_CTRL                1
> > >  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER                0
> > >  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER                1
> > > --
> > > 2.34.1
> > >
> > > _______________________________________________
> > > kvmarm mailing list
> > > kvmarm@lists.cs.columbia.edu
> > > https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Marc Zyngier Dec. 8, 2021, 2:25 p.m. UTC | #4
On Wed, 08 Dec 2021 12:23:44 +0000,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> This makes me wonder. Should KVM enforce having userspace either not
> setting the PMU for any VCPU, either setting it for all VCPUs? I think this
> would be a good idea and will reduce complexity in the long run. I also
> don't see a use case for userspace choosing to set the PMU for a subset of
> VCPUs, leaving the other VCPUs with the default behaviour.

Indeed. As much as I'm happy to expose a PMU to a guest on an
asymmetric system, I really do not want the asymmetry in the guest
itself. So this should be an all or nothing behaviour.

Thanks,

	M.
Alexandru Elisei Dec. 8, 2021, 3:20 p.m. UTC | #5
Hi Marc,

On Wed, Dec 08, 2021 at 02:25:58PM +0000, Marc Zyngier wrote:
> On Wed, 08 Dec 2021 12:23:44 +0000,
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > 
> > This makes me wonder. Should KVM enforce having userspace either not
> > setting the PMU for any VCPU, either setting it for all VCPUs? I think this
> > would be a good idea and will reduce complexity in the long run. I also
> > don't see a use case for userspace choosing to set the PMU for a subset of
> > VCPUs, leaving the other VCPUs with the default behaviour.
> 
> Indeed. As much as I'm happy to expose a PMU to a guest on an
> asymmetric system, I really do not want the asymmetry in the guest
> itself. So this should be an all or nothing behaviour.

From what I can tell, the only asymmetry that can be exposed to a guest as
a result of the series is the number of events supported on a VCPU.

I don't like the idea of forcing userspace to set the *same* PMU for all
VCPUs, as that would severely limit running VMs with PMU on asymmetric
systems.

Even if KVM forces to set a PMU (does not have to be the same PMU) for all
VCPUs, that still does not look like the correct solution for me, because
userspace can set PMUs with different number of events.

What I can try is to make kvm->arch.pmuver the minimum version of all the
VCPU PMUs and the implict PMU. I'll give that a go in the next iteration.

Thanks,
Alex

> 
> Thanks,
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.
Marc Zyngier Dec. 8, 2021, 3:44 p.m. UTC | #6
On Wed, 08 Dec 2021 15:20:30 +0000,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> Hi Marc,
> 
> On Wed, Dec 08, 2021 at 02:25:58PM +0000, Marc Zyngier wrote:
> > On Wed, 08 Dec 2021 12:23:44 +0000,
> > Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > > 
> > > This makes me wonder. Should KVM enforce having userspace either not
> > > setting the PMU for any VCPU, either setting it for all VCPUs? I think this
> > > would be a good idea and will reduce complexity in the long run. I also
> > > don't see a use case for userspace choosing to set the PMU for a subset of
> > > VCPUs, leaving the other VCPUs with the default behaviour.
> > 
> > Indeed. As much as I'm happy to expose a PMU to a guest on an
> > asymmetric system, I really do not want the asymmetry in the guest
> > itself. So this should be an all or nothing behaviour.
> 
> From what I can tell, the only asymmetry that can be exposed to a guest as
> a result of the series is the number of events supported on a VCPU.

Not only. It means that the events are counting different things. It
isn't only about pmuver, which is only about the architectural
revision implemented by the PMU. If you start assigning two different
PMUs (in the perf sense) to a guest, you open the Pandora box of
having to deal with all the subtle nonsense that asymmetric systems
bring. What about event filtering, for example?

> I don't like the idea of forcing userspace to set the *same* PMU for all
> VCPUs, as that would severely limit running VMs with PMU on asymmetric
> systems.

On the contrary, I am *very* happy to limit a VM to a single PMU (and
thus CPU) type on these systems. Really.

> Even if KVM forces to set a PMU (does not have to be the same PMU) for all
> VCPUs, that still does not look like the correct solution for me, because
> userspace can set PMUs with different number of events.

I don't understand what you mean. If you associate a single PMU type
to the guest, that's all the guest sees.

> What I can try is to make kvm->arch.pmuver the minimum version of all the
> VCPU PMUs and the implict PMU. I'll give that a go in the next iteration.

I really don't think we need any of this.

	M.
Alexandru Elisei Dec. 8, 2021, 4:11 p.m. UTC | #7
Hi Marc,

On Wed, Dec 08, 2021 at 03:44:35PM +0000, Marc Zyngier wrote:
> On Wed, 08 Dec 2021 15:20:30 +0000,
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > 
> > Hi Marc,
> > 
> > On Wed, Dec 08, 2021 at 02:25:58PM +0000, Marc Zyngier wrote:
> > > On Wed, 08 Dec 2021 12:23:44 +0000,
> > > Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > > > 
> > > > This makes me wonder. Should KVM enforce having userspace either not
> > > > setting the PMU for any VCPU, either setting it for all VCPUs? I think this
> > > > would be a good idea and will reduce complexity in the long run. I also
> > > > don't see a use case for userspace choosing to set the PMU for a subset of
> > > > VCPUs, leaving the other VCPUs with the default behaviour.
> > > 
> > > Indeed. As much as I'm happy to expose a PMU to a guest on an
> > > asymmetric system, I really do not want the asymmetry in the guest
> > > itself. So this should be an all or nothing behaviour.
> > 
> > From what I can tell, the only asymmetry that can be exposed to a guest as
> > a result of the series is the number of events supported on a VCPU.
> 
> Not only. It means that the events are counting different things. It
> isn't only about pmuver, which is only about the architectural
> revision implemented by the PMU. If you start assigning two different
> PMUs (in the perf sense) to a guest, you open the Pandora box of
> having to deal with all the subtle nonsense that asymmetric systems
> bring. What about event filtering, for example?

kvm_pmu_set_counter_event_type() uses the number of events to mask out the
unsupported events, so still depends on pmuver.

But I understand what you are saying, there might be differences between what
exactly an event is counting, how it increments and how the counter value should
be interpreted based on the microarchitecture.

> 
> > I don't like the idea of forcing userspace to set the *same* PMU for all
> > VCPUs, as that would severely limit running VMs with PMU on asymmetric
> > systems.
> 
> On the contrary, I am *very* happy to limit a VM to a single PMU (and
> thus CPU) type on these systems. Really.

Ok, so any kind of asymmetry is unacceptable.

Accepted behaviour:

1. If userspace sets PMU for one VCPU, then *all* other VCPUs must have a PMU
set, and furthermore, it must be the same PMU as the first VCPU,

or

2. If userspace has initialized a PMU (via
KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_INIT)) without setting a PMU, then
it is forbidden for userspace to set a PMU for the other VCPUs.

Is that what you had in mind?

> 
> > Even if KVM forces to set a PMU (does not have to be the same PMU) for all
> > VCPUs, that still does not look like the correct solution for me, because
> > userspace can set PMUs with different number of events.
> 
> I don't understand what you mean. If you associate a single PMU type
> to the guest, that's all the guest sees.

I was talking in the context of allowing userspace to associate different PMUs
to different VCPUs.

Thanks,
Alex

> 
> > What I can try is to make kvm->arch.pmuver the minimum version of all the
> > VCPU PMUs and the implict PMU. I'll give that a go in the next iteration.
> 
> I really don't think we need any of this.
> 
> 	M.
> 
> -- 
> Without deviation from the norm, progress is not possible.
Marc Zyngier Dec. 8, 2021, 4:21 p.m. UTC | #8
On Wed, 08 Dec 2021 16:11:13 +0000,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> > On the contrary, I am *very* happy to limit a VM to a single PMU (and
> > thus CPU) type on these systems. Really.
> 
> Ok, so any kind of asymmetry is unacceptable.
> 
> Accepted behaviour:
> 
> 1. If userspace sets PMU for one VCPU, then *all* other VCPUs must
> have a PMU set, and furthermore, it must be the same PMU as the
> first VCPU,
> 
> or
> 
> 2. If userspace has initialized a PMU (via
> KVM_ARM_VCPU_PMU_V3_CTRL(KVM_ARM_VCPU_PMU_V3_INIT)) without setting
> a PMU, then it is forbidden for userspace to set a PMU for the other
> VCPUs.
> 
> Is that what you had in mind?

Exactly. This sidesteps any sort of odd behaviour by forcing userspace
to pick a side.

Thanks,

	M.
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 60a29972d3f1..c82be5cbc268 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -104,6 +104,31 @@  hardware event. Filtering event 0x1E (CHAIN) has no effect either, as it
 isn't strictly speaking an event. Filtering the cycle counter is possible
 using event 0x11 (CPU_CYCLES).
 
+1.4 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_SET_PMU
+------------------------------------------
+
+:Parameters: in kvm_device_attr.addr the address to an int representing the PMU
+             identifier.
+
+:Returns:
+
+	 =======  ===============================================
+	 -EBUSY   PMUv3 already initialized
+	 -EFAULT  Error accessing the PMU identifier
+	 -ENXIO   PMU not found
+	 -ENODEV  PMUv3 not supported or GIC not initialized
+	 -ENOMEM  Could not allocate memory
+	 =======  ===============================================
+
+Request that the VCPU uses the specified hardware PMU when creating guest events
+for the purpose of PMU emulation. The PMU identifier can be read from the "type"
+file for the desired PMU instance under /sys/devices (or, equivalent,
+/sys/bus/even_source). This attribute is particularly useful on heterogeneous
+systems where there are at least two CPU PMUs on the system.
+
+Note that KVM will not make any attempts to run the VCPU on the physical CPUs
+associated with the PMU specified by this attribute. This is entirely left to
+userspace.
 
 2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
 =================================
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..1d0a0a2a9711 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -362,6 +362,7 @@  struct kvm_arm_copy_mte_tags {
 #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
 #define   KVM_ARM_VCPU_PMU_V3_INIT	1
 #define   KVM_ARM_VCPU_PMU_V3_FILTER	2
+#define   KVM_ARM_VCPU_PMU_V3_SET_PMU	3
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index eaaad4c06561..618138c5f792 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -603,6 +603,7 @@  static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct arm_pmu *arm_pmu = pmu->arm_pmu;
 	struct kvm_pmc *pmc;
 	struct perf_event *event;
 	struct perf_event_attr attr;
@@ -638,8 +639,7 @@  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 		return;
 
 	memset(&attr, 0, sizeof(struct perf_event_attr));
-	attr.type = PERF_TYPE_RAW;
-	attr.size = sizeof(attr);
+	attr.type = arm_pmu ? arm_pmu->pmu.type : PERF_TYPE_RAW;
 	attr.pinned = 1;
 	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
 	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
@@ -941,6 +941,29 @@  static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
 	return true;
 }
 
+static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
+{
+	struct kvm_pmu *kvm_pmu = &vcpu->arch.pmu;
+	struct arm_pmu_entry *entry;
+	struct arm_pmu *arm_pmu;
+	int ret = -ENXIO;
+
+	mutex_lock(&arm_pmus_lock);
+
+	list_for_each_entry(entry, &arm_pmus, entry) {
+		arm_pmu = entry->arm_pmu;
+		if (arm_pmu->pmu.type == pmu_id) {
+			kvm_pmu->arm_pmu = arm_pmu;
+			ret = 0;
+			goto out_unlock;
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&arm_pmus_lock);
+	return ret;
+}
+
 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
 	if (!kvm_vcpu_has_pmu(vcpu))
@@ -1027,6 +1050,15 @@  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 
 		return 0;
 	}
+	case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
+		int __user *uaddr = (int __user *)(long)attr->addr;
+		int pmu_id;
+
+		if (get_user(pmu_id, uaddr))
+			return -EFAULT;
+
+		return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
+	}
 	case KVM_ARM_VCPU_PMU_V3_INIT:
 		return kvm_arm_pmu_v3_init(vcpu);
 	}
@@ -1064,6 +1096,7 @@  int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	case KVM_ARM_VCPU_PMU_V3_IRQ:
 	case KVM_ARM_VCPU_PMU_V3_INIT:
 	case KVM_ARM_VCPU_PMU_V3_FILTER:
+	case KVM_ARM_VCPU_PMU_V3_SET_PMU:
 		if (kvm_vcpu_has_pmu(vcpu))
 			return 0;
 	}
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index e249c5f172aa..ab3046a8f9bb 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -34,6 +34,7 @@  struct kvm_pmu {
 	bool created;
 	bool irq_level;
 	struct irq_work overflow_work;
+	struct arm_pmu *arm_pmu;
 };
 
 struct arm_pmu_entry {
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..1d0a0a2a9711 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -362,6 +362,7 @@  struct kvm_arm_copy_mte_tags {
 #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
 #define   KVM_ARM_VCPU_PMU_V3_INIT	1
 #define   KVM_ARM_VCPU_PMU_V3_FILTER	2
+#define   KVM_ARM_VCPU_PMU_V3_SET_PMU	3
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1