diff mbox series

[v5,08/12] KVM: arm64: PMU: Allow userspace to limit PMCR_EL0.N for the guest

Message ID 20230817003029.3073210-9-rananta@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: PMU: Allow userspace to limit the number of PMCs on vCPU | expand

Commit Message

Raghavendra Rao Ananta Aug. 17, 2023, 12:30 a.m. UTC
From: Reiji Watanabe <reijiw@google.com>

KVM does not yet support userspace modifying PMCR_EL0.N (With
the previous patch, KVM ignores what is written by upserspace).
Add support userspace limiting PMCR_EL0.N.

Disallow userspace to set PMCR_EL0.N to a value that is greater
than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
support more event counters than the host HW implements.
Although this is an ABI change, this change only affects
userspace setting PMCR_EL0.N to a larger value than the host.
As accesses to unadvertised event counters indices is CONSTRAINED
UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
on every vCPU reset before this series, I can't think of any
use case where a user space would do that.

Also, ignore writes to read-only bits that are cleared on vCPU reset,
and RES{0,1} bits (including writable bits that KVM doesn't support
yet), as those bits shouldn't be modified (at least with
the current KVM).

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
---
 arch/arm64/include/asm/kvm_host.h |  3 ++
 arch/arm64/kvm/pmu-emul.c         |  1 +
 arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
 3 files changed, 51 insertions(+), 2 deletions(-)

Comments

Shaoqin Huang Aug. 21, 2023, 12:12 p.m. UTC | #1
Hi Raghavendra,

On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
> From: Reiji Watanabe <reijiw@google.com>
> 
> KVM does not yet support userspace modifying PMCR_EL0.N (With
> the previous patch, KVM ignores what is written by upserspace).
> Add support userspace limiting PMCR_EL0.N.
> 
> Disallow userspace to set PMCR_EL0.N to a value that is greater
> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
> support more event counters than the host HW implements.
> Although this is an ABI change, this change only affects
> userspace setting PMCR_EL0.N to a larger value than the host.
> As accesses to unadvertised event counters indices is CONSTRAINED
> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
> on every vCPU reset before this series, I can't think of any
> use case where a user space would do that.
> 
> Also, ignore writes to read-only bits that are cleared on vCPU reset,
> and RES{0,1} bits (including writable bits that KVM doesn't support
> yet), as those bits shouldn't be modified (at least with
> the current KVM).
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> ---
>   arch/arm64/include/asm/kvm_host.h |  3 ++
>   arch/arm64/kvm/pmu-emul.c         |  1 +
>   arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
>   3 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 0f2dbbe8f6a7e..c15ec365283d1 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -259,6 +259,9 @@ struct kvm_arch {
>   	/* PMCR_EL0.N value for the guest */
>   	u8 pmcr_n;
>   
> +	/* Limit value of PMCR_EL0.N for the guest */
> +	u8 pmcr_n_limit;
> +
>   	/* Hypercall features firmware registers' descriptor */
>   	struct kvm_smccc_features smccc_feat;
>   	struct maple_tree smccc_filter;
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index ce7de6bbdc967..39ad56a71ad20 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>   	 * while the latter does not.
>   	 */
>   	kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> +	kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>   
>   	return 0;
>   }
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2075901356c5b..c01d62afa7db4 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>   	return 0;
>   }
>   
> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> +		    u64 val)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	u64 new_n, mutable_mask;
> +	int ret = 0;
> +
> +	new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> +
> +	mutex_lock(&kvm->arch.config_lock);
> +	if (unlikely(new_n != kvm->arch.pmcr_n)) {
> +		/*
> +		 * The vCPU can't have more counters than the PMU
> +		 * hardware implements.
> +		 */
> +		if (new_n <= kvm->arch.pmcr_n_limit)
> +			kvm->arch.pmcr_n = new_n;
> +		else
> +			ret = -EINVAL;
> +	}

Since we have set the default value of pmcr_n, if we want to set a new 
pmcr_n, shouldn't it be a different value?

So how about change the checking to:

if (likely(new_n <= kvm->arch.pmcr_n_limit)
	kvm->arch.pmcr_n = new_n;
else
	ret = -EINVAL;

what do you think?

> +	mutex_unlock(&kvm->arch.config_lock);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Ignore writes to RES0 bits, read only bits that are cleared on
> +	 * vCPU reset, and writable bits that KVM doesn't support yet.
> +	 * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
> +	 * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
> +	 * But, we leave the bit as it is here, as the vCPU's PMUver might
> +	 * be changed later (NOTE: the bit will be cleared on first vCPU run
> +	 * if necessary).
> +	 */
> +	mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
> +	val &= mutable_mask;
> +	val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
> +
> +	/* The LC bit is RES1 when AArch32 is not supported */
> +	if (!kvm_supports_32bit_el0())
> +		val |= ARMV8_PMU_PMCR_LC;
> +
> +	__vcpu_sys_reg(vcpu, r->reg) = val;
> +	return 0;
> +}
> +
>   /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>   #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
>   	{ SYS_DESC(SYS_DBGBVRn_EL1(n)),					\
> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>   	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
>   	{ SYS_DESC(SYS_SVCR), undef_access },
>   
> -	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> -	  .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
> +	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> +	  .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },

A little confusing, since the PMU_SYS_REG() defines the default 
visibility which is pmu_visibility can return REG_HIDDEN, the set_user 
to pmcr will be blocked, how can it being set?

Maybe I lose some details.

Thanks,
Shaoqin

>   	{ PMU_SYS_REG(PMCNTENSET_EL0),
>   	  .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
>   	{ PMU_SYS_REG(PMCNTENCLR_EL0),
Raghavendra Rao Ananta Aug. 21, 2023, 11:28 p.m. UTC | #2
Hi Shaoqin,

On Mon, Aug 21, 2023 at 5:12 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>
> Hi Raghavendra,
>
> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
> > From: Reiji Watanabe <reijiw@google.com>
> >
> > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > the previous patch, KVM ignores what is written by upserspace).
> > Add support userspace limiting PMCR_EL0.N.
> >
> > Disallow userspace to set PMCR_EL0.N to a value that is greater
> > than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
> > support more event counters than the host HW implements.
> > Although this is an ABI change, this change only affects
> > userspace setting PMCR_EL0.N to a larger value than the host.
> > As accesses to unadvertised event counters indices is CONSTRAINED
> > UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
> > on every vCPU reset before this series, I can't think of any
> > use case where a user space would do that.
> >
> > Also, ignore writes to read-only bits that are cleared on vCPU reset,
> > and RES{0,1} bits (including writable bits that KVM doesn't support
> > yet), as those bits shouldn't be modified (at least with
> > the current KVM).
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > ---
> >   arch/arm64/include/asm/kvm_host.h |  3 ++
> >   arch/arm64/kvm/pmu-emul.c         |  1 +
> >   arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
> >   3 files changed, 51 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 0f2dbbe8f6a7e..c15ec365283d1 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -259,6 +259,9 @@ struct kvm_arch {
> >       /* PMCR_EL0.N value for the guest */
> >       u8 pmcr_n;
> >
> > +     /* Limit value of PMCR_EL0.N for the guest */
> > +     u8 pmcr_n_limit;
> > +
> >       /* Hypercall features firmware registers' descriptor */
> >       struct kvm_smccc_features smccc_feat;
> >       struct maple_tree smccc_filter;
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index ce7de6bbdc967..39ad56a71ad20 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> >        * while the latter does not.
> >        */
> >       kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> >
> >       return 0;
> >   }
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 2075901356c5b..c01d62afa7db4 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >       return 0;
> >   }
> >
> > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > +                 u64 val)
> > +{
> > +     struct kvm *kvm = vcpu->kvm;
> > +     u64 new_n, mutable_mask;
> > +     int ret = 0;
> > +
> > +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > +
> > +     mutex_lock(&kvm->arch.config_lock);
> > +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > +             /*
> > +              * The vCPU can't have more counters than the PMU
> > +              * hardware implements.
> > +              */
> > +             if (new_n <= kvm->arch.pmcr_n_limit)
> > +                     kvm->arch.pmcr_n = new_n;
> > +             else
> > +                     ret = -EINVAL;
> > +     }
>
> Since we have set the default value of pmcr_n, if we want to set a new
> pmcr_n, shouldn't it be a different value?
>
> So how about change the checking to:
>
> if (likely(new_n <= kvm->arch.pmcr_n_limit)
>         kvm->arch.pmcr_n = new_n;
> else
>         ret = -EINVAL;
>
> what do you think?
>
Sorry, I guess I didn't fully understand your suggestion. Are you
saying that it's 'likely' that userspace would configure the correct
value?

> > +     mutex_unlock(&kvm->arch.config_lock);
> > +     if (ret)
> > +             return ret;
> > +
> > +     /*
> > +      * Ignore writes to RES0 bits, read only bits that are cleared on
> > +      * vCPU reset, and writable bits that KVM doesn't support yet.
> > +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
> > +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
> > +      * But, we leave the bit as it is here, as the vCPU's PMUver might
> > +      * be changed later (NOTE: the bit will be cleared on first vCPU run
> > +      * if necessary).
> > +      */
> > +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
> > +     val &= mutable_mask;
> > +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
> > +
> > +     /* The LC bit is RES1 when AArch32 is not supported */
> > +     if (!kvm_supports_32bit_el0())
> > +             val |= ARMV8_PMU_PMCR_LC;
> > +
> > +     __vcpu_sys_reg(vcpu, r->reg) = val;
> > +     return 0;
> > +}
> > +
> >   /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
> >   #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
> >       { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
> > @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> >       { SYS_DESC(SYS_CTR_EL0), access_ctr },
> >       { SYS_DESC(SYS_SVCR), undef_access },
> >
> > -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> > -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
> > +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> > +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
>
> A little confusing, since the PMU_SYS_REG() defines the default
> visibility which is pmu_visibility can return REG_HIDDEN, the set_user
> to pmcr will be blocked, how can it being set?
>
> Maybe I lose some details.
>
pmu_visibility() returns REG_HIDDEN only if userspace has not added
support for PMUv3 via KVM_ARM_PREFERRED_TARGET ioctl. Else, it should
return 0, and give access.

Thank you.
Raghavendra

> Thanks,
> Shaoqin
>
> >       { PMU_SYS_REG(PMCNTENSET_EL0),
> >         .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
> >       { PMU_SYS_REG(PMCNTENCLR_EL0),
>
> --
> Shaoqin
>
Shaoqin Huang Aug. 22, 2023, 3:26 a.m. UTC | #3
Hi Raghavendra,

On 8/22/23 07:28, Raghavendra Rao Ananta wrote:
> Hi Shaoqin,
> 
> On Mon, Aug 21, 2023 at 5:12 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>>
>> Hi Raghavendra,
>>
>> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
>>> From: Reiji Watanabe <reijiw@google.com>
>>>
>>> KVM does not yet support userspace modifying PMCR_EL0.N (With
>>> the previous patch, KVM ignores what is written by upserspace).
>>> Add support userspace limiting PMCR_EL0.N.
>>>
>>> Disallow userspace to set PMCR_EL0.N to a value that is greater
>>> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
>>> support more event counters than the host HW implements.
>>> Although this is an ABI change, this change only affects
>>> userspace setting PMCR_EL0.N to a larger value than the host.
>>> As accesses to unadvertised event counters indices is CONSTRAINED
>>> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
>>> on every vCPU reset before this series, I can't think of any
>>> use case where a user space would do that.
>>>
>>> Also, ignore writes to read-only bits that are cleared on vCPU reset,
>>> and RES{0,1} bits (including writable bits that KVM doesn't support
>>> yet), as those bits shouldn't be modified (at least with
>>> the current KVM).
>>>
>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
>>> ---
>>>    arch/arm64/include/asm/kvm_host.h |  3 ++
>>>    arch/arm64/kvm/pmu-emul.c         |  1 +
>>>    arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
>>>    3 files changed, 51 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>> index 0f2dbbe8f6a7e..c15ec365283d1 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -259,6 +259,9 @@ struct kvm_arch {
>>>        /* PMCR_EL0.N value for the guest */
>>>        u8 pmcr_n;
>>>
>>> +     /* Limit value of PMCR_EL0.N for the guest */
>>> +     u8 pmcr_n_limit;
>>> +
>>>        /* Hypercall features firmware registers' descriptor */
>>>        struct kvm_smccc_features smccc_feat;
>>>        struct maple_tree smccc_filter;
>>> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
>>> index ce7de6bbdc967..39ad56a71ad20 100644
>>> --- a/arch/arm64/kvm/pmu-emul.c
>>> +++ b/arch/arm64/kvm/pmu-emul.c
>>> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>>>         * while the latter does not.
>>>         */
>>>        kvm->arch.pmcr_n = arm_pmu->num_events - 1;
>>> +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>>>
>>>        return 0;
>>>    }
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 2075901356c5b..c01d62afa7db4 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>>        return 0;
>>>    }
>>>
>>> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>> +                 u64 val)
>>> +{
>>> +     struct kvm *kvm = vcpu->kvm;
>>> +     u64 new_n, mutable_mask;
>>> +     int ret = 0;
>>> +
>>> +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
>>> +
>>> +     mutex_lock(&kvm->arch.config_lock);
>>> +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
>>> +             /*
>>> +              * The vCPU can't have more counters than the PMU
>>> +              * hardware implements.
>>> +              */
>>> +             if (new_n <= kvm->arch.pmcr_n_limit)
>>> +                     kvm->arch.pmcr_n = new_n;
>>> +             else
>>> +                     ret = -EINVAL;
>>> +     }
>>
>> Since we have set the default value of pmcr_n, if we want to set a new
>> pmcr_n, shouldn't it be a different value?
>>
>> So how about change the checking to:
>>
>> if (likely(new_n <= kvm->arch.pmcr_n_limit)
>>          kvm->arch.pmcr_n = new_n;
>> else
>>          ret = -EINVAL;
>>
>> what do you think?
>>
> Sorry, I guess I didn't fully understand your suggestion. Are you
> saying that it's 'likely' that userspace would configure the correct
> value?
>
It depends on how userspace use this api to limit the number of pmcr. I 
think what you mean in the code is that userspace need to set every 
vcpu's pmcr to the same value, so the `unlikely` here is right, only one 
vcpu can change the kvm->arch.pmcr.n, it saves the cpu cycles.

What suggest above might be wrong. Since I think when userspace want to 
limit the number of pmcr, it may just set the new_n on one vcpu, since 
the kvm->arch.pmcr_n is a VM-local value, every vcpu can see it, so it's 
`likely` the (new_n <= kvm->arch.pmcr_n_limit), it can decrease one 
checking statement.

Thanks,
Shaoqin

>>> +     mutex_unlock(&kvm->arch.config_lock);
>>> +     if (ret)
>>> +             return ret;
>>> +
>>> +     /*
>>> +      * Ignore writes to RES0 bits, read only bits that are cleared on
>>> +      * vCPU reset, and writable bits that KVM doesn't support yet.
>>> +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
>>> +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
>>> +      * But, we leave the bit as it is here, as the vCPU's PMUver might
>>> +      * be changed later (NOTE: the bit will be cleared on first vCPU run
>>> +      * if necessary).
>>> +      */
>>> +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
>>> +     val &= mutable_mask;
>>> +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
>>> +
>>> +     /* The LC bit is RES1 when AArch32 is not supported */
>>> +     if (!kvm_supports_32bit_el0())
>>> +             val |= ARMV8_PMU_PMCR_LC;
>>> +
>>> +     __vcpu_sys_reg(vcpu, r->reg) = val;
>>> +     return 0;
>>> +}
>>> +
>>>    /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>>>    #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
>>>        { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
>>> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>>>        { SYS_DESC(SYS_CTR_EL0), access_ctr },
>>>        { SYS_DESC(SYS_SVCR), undef_access },
>>>
>>> -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
>>> -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
>>> +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
>>> +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
>>
>> A little confusing, since the PMU_SYS_REG() defines the default
>> visibility which is pmu_visibility can return REG_HIDDEN, the set_user
>> to pmcr will be blocked, how can it being set?
>>
>> Maybe I lose some details.
>>
> pmu_visibility() returns REG_HIDDEN only if userspace has not added
> support for PMUv3 via KVM_ARM_PREFERRED_TARGET ioctl. Else, it should
> return 0, and give access.
> 

Got it. Thanks.

> Thank you.
> Raghavendra
> 
>> Thanks,
>> Shaoqin
>>
>>>        { PMU_SYS_REG(PMCNTENSET_EL0),
>>>          .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
>>>        { PMU_SYS_REG(PMCNTENCLR_EL0),
>>
>> --
>> Shaoqin
>>
>
Shaoqin Huang Aug. 22, 2023, 10:05 a.m. UTC | #4
Hi Raghavendra,

On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
> From: Reiji Watanabe <reijiw@google.com>
> 
> KVM does not yet support userspace modifying PMCR_EL0.N (With
> the previous patch, KVM ignores what is written by upserspace).
> Add support userspace limiting PMCR_EL0.N.
> 
> Disallow userspace to set PMCR_EL0.N to a value that is greater
> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
> support more event counters than the host HW implements.
> Although this is an ABI change, this change only affects
> userspace setting PMCR_EL0.N to a larger value than the host.
> As accesses to unadvertised event counters indices is CONSTRAINED
> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
> on every vCPU reset before this series, I can't think of any
> use case where a user space would do that.
> 
> Also, ignore writes to read-only bits that are cleared on vCPU reset,
> and RES{0,1} bits (including writable bits that KVM doesn't support
> yet), as those bits shouldn't be modified (at least with
> the current KVM).
> 
> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> ---
>   arch/arm64/include/asm/kvm_host.h |  3 ++
>   arch/arm64/kvm/pmu-emul.c         |  1 +
>   arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
>   3 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 0f2dbbe8f6a7e..c15ec365283d1 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -259,6 +259,9 @@ struct kvm_arch {
>   	/* PMCR_EL0.N value for the guest */
>   	u8 pmcr_n;
>   
> +	/* Limit value of PMCR_EL0.N for the guest */
> +	u8 pmcr_n_limit;
> +
>   	/* Hypercall features firmware registers' descriptor */
>   	struct kvm_smccc_features smccc_feat;
>   	struct maple_tree smccc_filter;
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index ce7de6bbdc967..39ad56a71ad20 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>   	 * while the latter does not.
>   	 */
>   	kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> +	kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>   
>   	return 0;
>   }
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2075901356c5b..c01d62afa7db4 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>   	return 0;
>   }
>   
> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> +		    u64 val)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	u64 new_n, mutable_mask;
> +	int ret = 0;
> +
> +	new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> +
> +	mutex_lock(&kvm->arch.config_lock);
> +	if (unlikely(new_n != kvm->arch.pmcr_n)) {
> +		/*
> +		 * The vCPU can't have more counters than the PMU
> +		 * hardware implements.
> +		 */
> +		if (new_n <= kvm->arch.pmcr_n_limit)
> +			kvm->arch.pmcr_n = new_n;
> +		else
> +			ret = -EINVAL;
> +	}
> +	mutex_unlock(&kvm->arch.config_lock);

Another thing I am just wonder is that should we block any modification 
to the pmcr_n after vm start to run? Like add one more checking 
kvm_vm_has_ran_once() at the beginning of the set_pmcr() function.

Thanks,
Shaoqin

> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Ignore writes to RES0 bits, read only bits that are cleared on
> +	 * vCPU reset, and writable bits that KVM doesn't support yet.
> +	 * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
> +	 * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
> +	 * But, we leave the bit as it is here, as the vCPU's PMUver might
> +	 * be changed later (NOTE: the bit will be cleared on first vCPU run
> +	 * if necessary).
> +	 */
> +	mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
> +	val &= mutable_mask;
> +	val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
> +
> +	/* The LC bit is RES1 when AArch32 is not supported */
> +	if (!kvm_supports_32bit_el0())
> +		val |= ARMV8_PMU_PMCR_LC;
> +
> +	__vcpu_sys_reg(vcpu, r->reg) = val;
> +	return 0;
> +}
> +
>   /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>   #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
>   	{ SYS_DESC(SYS_DBGBVRn_EL1(n)),					\
> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>   	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
>   	{ SYS_DESC(SYS_SVCR), undef_access },
>   
> -	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> -	  .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
> +	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> +	  .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
>   	{ PMU_SYS_REG(PMCNTENSET_EL0),
>   	  .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
>   	{ PMU_SYS_REG(PMCNTENCLR_EL0),
Raghavendra Rao Ananta Aug. 23, 2023, 4:06 p.m. UTC | #5
On Tue, Aug 22, 2023 at 3:06 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>
> Hi Raghavendra,
>
> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
> > From: Reiji Watanabe <reijiw@google.com>
> >
> > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > the previous patch, KVM ignores what is written by upserspace).
> > Add support userspace limiting PMCR_EL0.N.
> >
> > Disallow userspace to set PMCR_EL0.N to a value that is greater
> > than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
> > support more event counters than the host HW implements.
> > Although this is an ABI change, this change only affects
> > userspace setting PMCR_EL0.N to a larger value than the host.
> > As accesses to unadvertised event counters indices is CONSTRAINED
> > UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
> > on every vCPU reset before this series, I can't think of any
> > use case where a user space would do that.
> >
> > Also, ignore writes to read-only bits that are cleared on vCPU reset,
> > and RES{0,1} bits (including writable bits that KVM doesn't support
> > yet), as those bits shouldn't be modified (at least with
> > the current KVM).
> >
> > Signed-off-by: Reiji Watanabe <reijiw@google.com>
> > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> > ---
> >   arch/arm64/include/asm/kvm_host.h |  3 ++
> >   arch/arm64/kvm/pmu-emul.c         |  1 +
> >   arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
> >   3 files changed, 51 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 0f2dbbe8f6a7e..c15ec365283d1 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -259,6 +259,9 @@ struct kvm_arch {
> >       /* PMCR_EL0.N value for the guest */
> >       u8 pmcr_n;
> >
> > +     /* Limit value of PMCR_EL0.N for the guest */
> > +     u8 pmcr_n_limit;
> > +
> >       /* Hypercall features firmware registers' descriptor */
> >       struct kvm_smccc_features smccc_feat;
> >       struct maple_tree smccc_filter;
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index ce7de6bbdc967..39ad56a71ad20 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> >        * while the latter does not.
> >        */
> >       kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> >
> >       return 0;
> >   }
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 2075901356c5b..c01d62afa7db4 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >       return 0;
> >   }
> >
> > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > +                 u64 val)
> > +{
> > +     struct kvm *kvm = vcpu->kvm;
> > +     u64 new_n, mutable_mask;
> > +     int ret = 0;
> > +
> > +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > +
> > +     mutex_lock(&kvm->arch.config_lock);
> > +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > +             /*
> > +              * The vCPU can't have more counters than the PMU
> > +              * hardware implements.
> > +              */
> > +             if (new_n <= kvm->arch.pmcr_n_limit)
> > +                     kvm->arch.pmcr_n = new_n;
> > +             else
> > +                     ret = -EINVAL;
> > +     }
> > +     mutex_unlock(&kvm->arch.config_lock);
>
> Another thing I am just wonder is that should we block any modification
> to the pmcr_n after vm start to run? Like add one more checking
> kvm_vm_has_ran_once() at the beginning of the set_pmcr() function.
>
Thanks for bringing it up. Reiji and I discussed about this. Checking
for kvm_vm_has_ran_once() will be a good move, however, it will go
against the ABI expectations of setting the PMCR. I'd like others to
weigh in on this as well. What do you think?

Thank you.
Raghavendra
> Thanks,
> Shaoqin
>
> > +     if (ret)
> > +             return ret;
> > +
> > +     /*
> > +      * Ignore writes to RES0 bits, read only bits that are cleared on
> > +      * vCPU reset, and writable bits that KVM doesn't support yet.
> > +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
> > +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
> > +      * But, we leave the bit as it is here, as the vCPU's PMUver might
> > +      * be changed later (NOTE: the bit will be cleared on first vCPU run
> > +      * if necessary).
> > +      */
> > +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
> > +     val &= mutable_mask;
> > +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
> > +
> > +     /* The LC bit is RES1 when AArch32 is not supported */
> > +     if (!kvm_supports_32bit_el0())
> > +             val |= ARMV8_PMU_PMCR_LC;
> > +
> > +     __vcpu_sys_reg(vcpu, r->reg) = val;
> > +     return 0;
> > +}
> > +
> >   /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
> >   #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
> >       { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
> > @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> >       { SYS_DESC(SYS_CTR_EL0), access_ctr },
> >       { SYS_DESC(SYS_SVCR), undef_access },
> >
> > -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> > -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
> > +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> > +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
> >       { PMU_SYS_REG(PMCNTENSET_EL0),
> >         .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
> >       { PMU_SYS_REG(PMCNTENCLR_EL0),
>
> --
> Shaoqin
>
Shaoqin Huang Aug. 24, 2023, 8:50 a.m. UTC | #6
On 8/24/23 00:06, Raghavendra Rao Ananta wrote:
> On Tue, Aug 22, 2023 at 3:06 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>>
>> Hi Raghavendra,
>>
>> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
>>> From: Reiji Watanabe <reijiw@google.com>
>>>
>>> KVM does not yet support userspace modifying PMCR_EL0.N (With
>>> the previous patch, KVM ignores what is written by upserspace).
>>> Add support userspace limiting PMCR_EL0.N.
>>>
>>> Disallow userspace to set PMCR_EL0.N to a value that is greater
>>> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
>>> support more event counters than the host HW implements.
>>> Although this is an ABI change, this change only affects
>>> userspace setting PMCR_EL0.N to a larger value than the host.
>>> As accesses to unadvertised event counters indices is CONSTRAINED
>>> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
>>> on every vCPU reset before this series, I can't think of any
>>> use case where a user space would do that.
>>>
>>> Also, ignore writes to read-only bits that are cleared on vCPU reset,
>>> and RES{0,1} bits (including writable bits that KVM doesn't support
>>> yet), as those bits shouldn't be modified (at least with
>>> the current KVM).
>>>
>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
>>> ---
>>>    arch/arm64/include/asm/kvm_host.h |  3 ++
>>>    arch/arm64/kvm/pmu-emul.c         |  1 +
>>>    arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
>>>    3 files changed, 51 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>> index 0f2dbbe8f6a7e..c15ec365283d1 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -259,6 +259,9 @@ struct kvm_arch {
>>>        /* PMCR_EL0.N value for the guest */
>>>        u8 pmcr_n;
>>>
>>> +     /* Limit value of PMCR_EL0.N for the guest */
>>> +     u8 pmcr_n_limit;
>>> +
>>>        /* Hypercall features firmware registers' descriptor */
>>>        struct kvm_smccc_features smccc_feat;
>>>        struct maple_tree smccc_filter;
>>> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
>>> index ce7de6bbdc967..39ad56a71ad20 100644
>>> --- a/arch/arm64/kvm/pmu-emul.c
>>> +++ b/arch/arm64/kvm/pmu-emul.c
>>> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>>>         * while the latter does not.
>>>         */
>>>        kvm->arch.pmcr_n = arm_pmu->num_events - 1;
>>> +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>>>
>>>        return 0;
>>>    }
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 2075901356c5b..c01d62afa7db4 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>>        return 0;
>>>    }
>>>
>>> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>> +                 u64 val)
>>> +{
>>> +     struct kvm *kvm = vcpu->kvm;
>>> +     u64 new_n, mutable_mask;
>>> +     int ret = 0;
>>> +
>>> +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
>>> +
>>> +     mutex_lock(&kvm->arch.config_lock);
>>> +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
>>> +             /*
>>> +              * The vCPU can't have more counters than the PMU
>>> +              * hardware implements.
>>> +              */
>>> +             if (new_n <= kvm->arch.pmcr_n_limit)
>>> +                     kvm->arch.pmcr_n = new_n;
>>> +             else
>>> +                     ret = -EINVAL;
>>> +     }
>>> +     mutex_unlock(&kvm->arch.config_lock);
>>
>> Another thing I am just wonder is that should we block any modification
>> to the pmcr_n after vm start to run? Like add one more checking
>> kvm_vm_has_ran_once() at the beginning of the set_pmcr() function.
>>
> Thanks for bringing it up. Reiji and I discussed about this. Checking
> for kvm_vm_has_ran_once() will be a good move, however, it will go
> against the ABI expectations of setting the PMCR. I'd like others to
> weigh in on this as well. What do you think?
> 
> Thank you.
> Raghavendra

Before this change, kvm not allowed userspace to change the pmcr_n, but 
allowed to change the lower ARMV8_PMU_PMCR_MASK bits. With this change, 
we now allow to change the pmcr_n, we should not block the change to 
ARMV8_PMU_PMCR_MASK after vm start to run, but how about we just block 
the change to ARMV8_PMU_PMCR_N after vm start to run?

Thanks,
Shaoqin

>> Thanks,
>> Shaoqin
>>
>>> +     if (ret)
>>> +             return ret;
>>> +
>>> +     /*
>>> +      * Ignore writes to RES0 bits, read only bits that are cleared on
>>> +      * vCPU reset, and writable bits that KVM doesn't support yet.
>>> +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
>>> +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
>>> +      * But, we leave the bit as it is here, as the vCPU's PMUver might
>>> +      * be changed later (NOTE: the bit will be cleared on first vCPU run
>>> +      * if necessary).
>>> +      */
>>> +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
>>> +     val &= mutable_mask;
>>> +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
>>> +
>>> +     /* The LC bit is RES1 when AArch32 is not supported */
>>> +     if (!kvm_supports_32bit_el0())
>>> +             val |= ARMV8_PMU_PMCR_LC;
>>> +
>>> +     __vcpu_sys_reg(vcpu, r->reg) = val;
>>> +     return 0;
>>> +}
>>> +
>>>    /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>>>    #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
>>>        { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
>>> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>>>        { SYS_DESC(SYS_CTR_EL0), access_ctr },
>>>        { SYS_DESC(SYS_SVCR), undef_access },
>>>
>>> -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
>>> -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
>>> +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
>>> +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
>>>        { PMU_SYS_REG(PMCNTENSET_EL0),
>>>          .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
>>>        { PMU_SYS_REG(PMCNTENCLR_EL0),
>>
>> --
>> Shaoqin
>>
>
Raghavendra Rao Ananta Aug. 25, 2023, 10:34 p.m. UTC | #7
On Thu, Aug 24, 2023 at 1:50 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>
>
>
> On 8/24/23 00:06, Raghavendra Rao Ananta wrote:
> > On Tue, Aug 22, 2023 at 3:06 AM Shaoqin Huang <shahuang@redhat.com> wrote:
> >>
> >> Hi Raghavendra,
> >>
> >> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
> >>> From: Reiji Watanabe <reijiw@google.com>
> >>>
> >>> KVM does not yet support userspace modifying PMCR_EL0.N (With
> >>> the previous patch, KVM ignores what is written by upserspace).
> >>> Add support userspace limiting PMCR_EL0.N.
> >>>
> >>> Disallow userspace to set PMCR_EL0.N to a value that is greater
> >>> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
> >>> support more event counters than the host HW implements.
> >>> Although this is an ABI change, this change only affects
> >>> userspace setting PMCR_EL0.N to a larger value than the host.
> >>> As accesses to unadvertised event counters indices is CONSTRAINED
> >>> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
> >>> on every vCPU reset before this series, I can't think of any
> >>> use case where a user space would do that.
> >>>
> >>> Also, ignore writes to read-only bits that are cleared on vCPU reset,
> >>> and RES{0,1} bits (including writable bits that KVM doesn't support
> >>> yet), as those bits shouldn't be modified (at least with
> >>> the current KVM).
> >>>
> >>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
> >>> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
> >>> ---
> >>>    arch/arm64/include/asm/kvm_host.h |  3 ++
> >>>    arch/arm64/kvm/pmu-emul.c         |  1 +
> >>>    arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
> >>>    3 files changed, 51 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> >>> index 0f2dbbe8f6a7e..c15ec365283d1 100644
> >>> --- a/arch/arm64/include/asm/kvm_host.h
> >>> +++ b/arch/arm64/include/asm/kvm_host.h
> >>> @@ -259,6 +259,9 @@ struct kvm_arch {
> >>>        /* PMCR_EL0.N value for the guest */
> >>>        u8 pmcr_n;
> >>>
> >>> +     /* Limit value of PMCR_EL0.N for the guest */
> >>> +     u8 pmcr_n_limit;
> >>> +
> >>>        /* Hypercall features firmware registers' descriptor */
> >>>        struct kvm_smccc_features smccc_feat;
> >>>        struct maple_tree smccc_filter;
> >>> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> >>> index ce7de6bbdc967..39ad56a71ad20 100644
> >>> --- a/arch/arm64/kvm/pmu-emul.c
> >>> +++ b/arch/arm64/kvm/pmu-emul.c
> >>> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> >>>         * while the latter does not.
> >>>         */
> >>>        kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> >>> +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> >>>
> >>>        return 0;
> >>>    }
> >>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >>> index 2075901356c5b..c01d62afa7db4 100644
> >>> --- a/arch/arm64/kvm/sys_regs.c
> >>> +++ b/arch/arm64/kvm/sys_regs.c
> >>> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >>>        return 0;
> >>>    }
> >>>
> >>> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >>> +                 u64 val)
> >>> +{
> >>> +     struct kvm *kvm = vcpu->kvm;
> >>> +     u64 new_n, mutable_mask;
> >>> +     int ret = 0;
> >>> +
> >>> +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> >>> +
> >>> +     mutex_lock(&kvm->arch.config_lock);
> >>> +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> >>> +             /*
> >>> +              * The vCPU can't have more counters than the PMU
> >>> +              * hardware implements.
> >>> +              */
> >>> +             if (new_n <= kvm->arch.pmcr_n_limit)
> >>> +                     kvm->arch.pmcr_n = new_n;
> >>> +             else
> >>> +                     ret = -EINVAL;
> >>> +     }
> >>> +     mutex_unlock(&kvm->arch.config_lock);
> >>
> >> Another thing I am just wonder is that should we block any modification
> >> to the pmcr_n after vm start to run? Like add one more checking
> >> kvm_vm_has_ran_once() at the beginning of the set_pmcr() function.
> >>
> > Thanks for bringing it up. Reiji and I discussed about this. Checking
> > for kvm_vm_has_ran_once() will be a good move, however, it will go
> > against the ABI expectations of setting the PMCR. I'd like others to
> > weigh in on this as well. What do you think?
> >
> > Thank you.
> > Raghavendra
>
> Before this change, kvm not allowed userspace to change the pmcr_n, but
> allowed to change the lower ARMV8_PMU_PMCR_MASK bits. With this change,
> we now allow to change the pmcr_n, we should not block the change to
> ARMV8_PMU_PMCR_MASK after vm start to run, but how about we just block
> the change to ARMV8_PMU_PMCR_N after vm start to run?
>
I believe you are referring to the guest trap access part of it
(access_pmcr()). This patch focuses on the userspace access of PMCR
via the KVM_SET_ONE_REG ioctl. Before this patch, KVM did not control
the writes to the reg and userspace was free to write to any bits at
any time.

Thank you.
Raghavendra
> Thanks,
> Shaoqin
>
> >> Thanks,
> >> Shaoqin
> >>
> >>> +     if (ret)
> >>> +             return ret;
> >>> +
> >>> +     /*
> >>> +      * Ignore writes to RES0 bits, read only bits that are cleared on
> >>> +      * vCPU reset, and writable bits that KVM doesn't support yet.
> >>> +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
> >>> +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
> >>> +      * But, we leave the bit as it is here, as the vCPU's PMUver might
> >>> +      * be changed later (NOTE: the bit will be cleared on first vCPU run
> >>> +      * if necessary).
> >>> +      */
> >>> +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
> >>> +     val &= mutable_mask;
> >>> +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
> >>> +
> >>> +     /* The LC bit is RES1 when AArch32 is not supported */
> >>> +     if (!kvm_supports_32bit_el0())
> >>> +             val |= ARMV8_PMU_PMCR_LC;
> >>> +
> >>> +     __vcpu_sys_reg(vcpu, r->reg) = val;
> >>> +     return 0;
> >>> +}
> >>> +
> >>>    /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
> >>>    #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
> >>>        { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
> >>> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> >>>        { SYS_DESC(SYS_CTR_EL0), access_ctr },
> >>>        { SYS_DESC(SYS_SVCR), undef_access },
> >>>
> >>> -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
> >>> -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
> >>> +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
> >>> +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
> >>>        { PMU_SYS_REG(PMCNTENSET_EL0),
> >>>          .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
> >>>        { PMU_SYS_REG(PMCNTENCLR_EL0),
> >>
> >> --
> >> Shaoqin
> >>
> >
>
> --
> Shaoqin
>
Shaoqin Huang Aug. 26, 2023, 2:40 a.m. UTC | #8
On 8/26/23 06:34, Raghavendra Rao Ananta wrote:
> On Thu, Aug 24, 2023 at 1:50 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>>
>>
>>
>> On 8/24/23 00:06, Raghavendra Rao Ananta wrote:
>>> On Tue, Aug 22, 2023 at 3:06 AM Shaoqin Huang <shahuang@redhat.com> wrote:
>>>>
>>>> Hi Raghavendra,
>>>>
>>>> On 8/17/23 08:30, Raghavendra Rao Ananta wrote:
>>>>> From: Reiji Watanabe <reijiw@google.com>
>>>>>
>>>>> KVM does not yet support userspace modifying PMCR_EL0.N (With
>>>>> the previous patch, KVM ignores what is written by upserspace).
>>>>> Add support userspace limiting PMCR_EL0.N.
>>>>>
>>>>> Disallow userspace to set PMCR_EL0.N to a value that is greater
>>>>> than the host value (KVM_SET_ONE_REG will fail), as KVM doesn't
>>>>> support more event counters than the host HW implements.
>>>>> Although this is an ABI change, this change only affects
>>>>> userspace setting PMCR_EL0.N to a larger value than the host.
>>>>> As accesses to unadvertised event counters indices is CONSTRAINED
>>>>> UNPREDICTABLE behavior, and PMCR_EL0.N was reset to the host value
>>>>> on every vCPU reset before this series, I can't think of any
>>>>> use case where a user space would do that.
>>>>>
>>>>> Also, ignore writes to read-only bits that are cleared on vCPU reset,
>>>>> and RES{0,1} bits (including writable bits that KVM doesn't support
>>>>> yet), as those bits shouldn't be modified (at least with
>>>>> the current KVM).
>>>>>
>>>>> Signed-off-by: Reiji Watanabe <reijiw@google.com>
>>>>> Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
>>>>> ---
>>>>>     arch/arm64/include/asm/kvm_host.h |  3 ++
>>>>>     arch/arm64/kvm/pmu-emul.c         |  1 +
>>>>>     arch/arm64/kvm/sys_regs.c         | 49 +++++++++++++++++++++++++++++--
>>>>>     3 files changed, 51 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>>>> index 0f2dbbe8f6a7e..c15ec365283d1 100644
>>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>>> @@ -259,6 +259,9 @@ struct kvm_arch {
>>>>>         /* PMCR_EL0.N value for the guest */
>>>>>         u8 pmcr_n;
>>>>>
>>>>> +     /* Limit value of PMCR_EL0.N for the guest */
>>>>> +     u8 pmcr_n_limit;
>>>>> +
>>>>>         /* Hypercall features firmware registers' descriptor */
>>>>>         struct kvm_smccc_features smccc_feat;
>>>>>         struct maple_tree smccc_filter;
>>>>> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
>>>>> index ce7de6bbdc967..39ad56a71ad20 100644
>>>>> --- a/arch/arm64/kvm/pmu-emul.c
>>>>> +++ b/arch/arm64/kvm/pmu-emul.c
>>>>> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>>>>>          * while the latter does not.
>>>>>          */
>>>>>         kvm->arch.pmcr_n = arm_pmu->num_events - 1;
>>>>> +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>>>>>
>>>>>         return 0;
>>>>>     }
>>>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>>>> index 2075901356c5b..c01d62afa7db4 100644
>>>>> --- a/arch/arm64/kvm/sys_regs.c
>>>>> +++ b/arch/arm64/kvm/sys_regs.c
>>>>> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>>>>         return 0;
>>>>>     }
>>>>>
>>>>> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>>>>> +                 u64 val)
>>>>> +{
>>>>> +     struct kvm *kvm = vcpu->kvm;
>>>>> +     u64 new_n, mutable_mask;
>>>>> +     int ret = 0;
>>>>> +
>>>>> +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
>>>>> +
>>>>> +     mutex_lock(&kvm->arch.config_lock);
>>>>> +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
>>>>> +             /*
>>>>> +              * The vCPU can't have more counters than the PMU
>>>>> +              * hardware implements.
>>>>> +              */
>>>>> +             if (new_n <= kvm->arch.pmcr_n_limit)
>>>>> +                     kvm->arch.pmcr_n = new_n;
>>>>> +             else
>>>>> +                     ret = -EINVAL;
>>>>> +     }
>>>>> +     mutex_unlock(&kvm->arch.config_lock);
>>>>
>>>> Another thing I am just wonder is that should we block any modification
>>>> to the pmcr_n after vm start to run? Like add one more checking
>>>> kvm_vm_has_ran_once() at the beginning of the set_pmcr() function.
>>>>
>>> Thanks for bringing it up. Reiji and I discussed about this. Checking
>>> for kvm_vm_has_ran_once() will be a good move, however, it will go
>>> against the ABI expectations of setting the PMCR. I'd like others to
>>> weigh in on this as well. What do you think?
>>>
>>> Thank you.
>>> Raghavendra
>>
>> Before this change, kvm not allowed userspace to change the pmcr_n, but
>> allowed to change the lower ARMV8_PMU_PMCR_MASK bits. With this change,
>> we now allow to change the pmcr_n, we should not block the change to
>> ARMV8_PMU_PMCR_MASK after vm start to run, but how about we just block
>> the change to ARMV8_PMU_PMCR_N after vm start to run?
>>
> I believe you are referring to the guest trap access part of it
> (access_pmcr()). This patch focuses on the userspace access of PMCR
> via the KVM_SET_ONE_REG ioctl. Before this patch, KVM did not control
> the writes to the reg and userspace was free to write to any bits at
> any time.
> 

Oh yeah. Thanks for your explanation. My head sometimes broken down.

Thanks,
Shaoqin

> Thank you.
> Raghavendra
>> Thanks,
>> Shaoqin
>>
>>>> Thanks,
>>>> Shaoqin
>>>>
>>>>> +     if (ret)
>>>>> +             return ret;
>>>>> +
>>>>> +     /*
>>>>> +      * Ignore writes to RES0 bits, read only bits that are cleared on
>>>>> +      * vCPU reset, and writable bits that KVM doesn't support yet.
>>>>> +      * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
>>>>> +      * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
>>>>> +      * But, we leave the bit as it is here, as the vCPU's PMUver might
>>>>> +      * be changed later (NOTE: the bit will be cleared on first vCPU run
>>>>> +      * if necessary).
>>>>> +      */
>>>>> +     mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
>>>>> +     val &= mutable_mask;
>>>>> +     val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
>>>>> +
>>>>> +     /* The LC bit is RES1 when AArch32 is not supported */
>>>>> +     if (!kvm_supports_32bit_el0())
>>>>> +             val |= ARMV8_PMU_PMCR_LC;
>>>>> +
>>>>> +     __vcpu_sys_reg(vcpu, r->reg) = val;
>>>>> +     return 0;
>>>>> +}
>>>>> +
>>>>>     /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>>>>>     #define DBG_BCR_BVR_WCR_WVR_EL1(n)                                  \
>>>>>         { SYS_DESC(SYS_DBGBVRn_EL1(n)),                                 \
>>>>> @@ -2147,8 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>>>>>         { SYS_DESC(SYS_CTR_EL0), access_ctr },
>>>>>         { SYS_DESC(SYS_SVCR), undef_access },
>>>>>
>>>>> -     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
>>>>> -       .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
>>>>> +     { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
>>>>> +       .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
>>>>>         { PMU_SYS_REG(PMCNTENSET_EL0),
>>>>>           .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
>>>>>         { PMU_SYS_REG(PMCNTENCLR_EL0),
>>>>
>>>> --
>>>> Shaoqin
>>>>
>>>
>>
>> --
>> Shaoqin
>>
>
Oliver Upton Sept. 15, 2023, 8:36 p.m. UTC | #9
On Tue, Aug 22, 2023 at 11:26:23AM +0800, Shaoqin Huang wrote:

[...]

> > > > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > > > +                 u64 val)
> > > > +{
> > > > +     struct kvm *kvm = vcpu->kvm;
> > > > +     u64 new_n, mutable_mask;
> > > > +     int ret = 0;
> > > > +
> > > > +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > > > +
> > > > +     mutex_lock(&kvm->arch.config_lock);
> > > > +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > > > +             /*
> > > > +              * The vCPU can't have more counters than the PMU
> > > > +              * hardware implements.
> > > > +              */
> > > > +             if (new_n <= kvm->arch.pmcr_n_limit)
> > > > +                     kvm->arch.pmcr_n = new_n;
> > > > +             else
> > > > +                     ret = -EINVAL;
> > > > +     }
> > > 
> > > Since we have set the default value of pmcr_n, if we want to set a new
> > > pmcr_n, shouldn't it be a different value?
> > > 
> > > So how about change the checking to:
> > > 
> > > if (likely(new_n <= kvm->arch.pmcr_n_limit)
> > >          kvm->arch.pmcr_n = new_n;
> > > else
> > >          ret = -EINVAL;
> > > 
> > > what do you think?
> > > 
> > Sorry, I guess I didn't fully understand your suggestion. Are you
> > saying that it's 'likely' that userspace would configure the correct
> > value?
> > 
> It depends on how userspace use this api to limit the number of pmcr. I
> think what you mean in the code is that userspace need to set every vcpu's
> pmcr to the same value, so the `unlikely` here is right, only one vcpu can
> change the kvm->arch.pmcr.n, it saves the cpu cycles.
> 
> What suggest above might be wrong. Since I think when userspace want to
> limit the number of pmcr, it may just set the new_n on one vcpu, since the
> kvm->arch.pmcr_n is a VM-local value, every vcpu can see it, so it's
> `likely` the (new_n <= kvm->arch.pmcr_n_limit), it can decrease one checking
> statement.

How about we just do away with branch hints in the first place? This is
_not_ a hot path.
Oliver Upton Sept. 15, 2023, 8:53 p.m. UTC | #10
Hi Raghu,

On Thu, Aug 17, 2023 at 12:30:25AM +0000, Raghavendra Rao Ananta wrote:
> From: Reiji Watanabe <reijiw@google.com>
> 
> KVM does not yet support userspace modifying PMCR_EL0.N (With
> the previous patch, KVM ignores what is written by upserspace).

typo: userspace

> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index ce7de6bbdc967..39ad56a71ad20 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
>  	 * while the latter does not.
>  	 */
>  	kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> +	kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;

Can't we just get at this through the arm_pmu instance rather than
copying it into kvm_arch?

>  	return 0;
>  }
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 2075901356c5b..c01d62afa7db4 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
>  	return 0;
>  }
>  
> +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> +		    u64 val)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	u64 new_n, mutable_mask;
> +	int ret = 0;
> +
> +	new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> +
> +	mutex_lock(&kvm->arch.config_lock);
> +	if (unlikely(new_n != kvm->arch.pmcr_n)) {
> +		/*
> +		 * The vCPU can't have more counters than the PMU
> +		 * hardware implements.
> +		 */
> +		if (new_n <= kvm->arch.pmcr_n_limit)
> +			kvm->arch.pmcr_n = new_n;
> +		else
> +			ret = -EINVAL;
> +	}

Hmm, I'm not so sure about returning an error here. ABI has it that
userspace can write any value to PMCR_EL0 successfully. Can we just
ignore writes that attempt to set PMCR_EL0.N to something higher than
supported by hardware? Our general stance should be that system register
fields responsible for feature identification are immutable after the VM
has started.
Oliver Upton Sept. 15, 2023, 9:54 p.m. UTC | #11
On Fri, Sep 15, 2023 at 08:53:16PM +0000, Oliver Upton wrote:
> Hi Raghu,
> 
> On Thu, Aug 17, 2023 at 12:30:25AM +0000, Raghavendra Rao Ananta wrote:
> > From: Reiji Watanabe <reijiw@google.com>
> > 
> > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > the previous patch, KVM ignores what is written by upserspace).
> 
> typo: userspace
> 
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index ce7de6bbdc967..39ad56a71ad20 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> >  	 * while the latter does not.
> >  	 */
> >  	kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > +	kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> 
> Can't we just get at this through the arm_pmu instance rather than
> copying it into kvm_arch?
> 
> >  	return 0;
> >  }
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 2075901356c5b..c01d62afa7db4 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >  	return 0;
> >  }
> >  
> > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > +		    u64 val)
> > +{
> > +	struct kvm *kvm = vcpu->kvm;
> > +	u64 new_n, mutable_mask;
> > +	int ret = 0;
> > +
> > +	new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > +
> > +	mutex_lock(&kvm->arch.config_lock);
> > +	if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > +		/*
> > +		 * The vCPU can't have more counters than the PMU
> > +		 * hardware implements.
> > +		 */
> > +		if (new_n <= kvm->arch.pmcr_n_limit)
> > +			kvm->arch.pmcr_n = new_n;
> > +		else
> > +			ret = -EINVAL;
> > +	}
> 
> Hmm, I'm not so sure about returning an error here. ABI has it that
> userspace can write any value to PMCR_EL0 successfully. Can we just
> ignore writes that attempt to set PMCR_EL0.N to something higher than
> supported by hardware? Our general stance should be that system register
> fields responsible for feature identification are immutable after the VM
> has started.

I hacked up my reply and dropped some context; this doesn't read right.
Shaoqin made the point about preventing changes to PMCR_EL0.N after the
VM has started and I firmly agree. The behavior should be:

 - Writes to PMCR always succeed

 - PMCR_EL0.N values greater than what's supported by hardware are
   ignored

 - Changes to N after the VM has started are ignored.
Raghavendra Rao Ananta Sept. 18, 2023, 5:02 p.m. UTC | #12
On Fri, Sep 15, 2023 at 1:36 PM Oliver Upton <oliver.upton@linux.dev> wrote:
>
> On Tue, Aug 22, 2023 at 11:26:23AM +0800, Shaoqin Huang wrote:
>
> [...]
>
> > > > > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > > > > +                 u64 val)
> > > > > +{
> > > > > +     struct kvm *kvm = vcpu->kvm;
> > > > > +     u64 new_n, mutable_mask;
> > > > > +     int ret = 0;
> > > > > +
> > > > > +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > > > > +
> > > > > +     mutex_lock(&kvm->arch.config_lock);
> > > > > +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > > > > +             /*
> > > > > +              * The vCPU can't have more counters than the PMU
> > > > > +              * hardware implements.
> > > > > +              */
> > > > > +             if (new_n <= kvm->arch.pmcr_n_limit)
> > > > > +                     kvm->arch.pmcr_n = new_n;
> > > > > +             else
> > > > > +                     ret = -EINVAL;
> > > > > +     }
> > > >
> > > > Since we have set the default value of pmcr_n, if we want to set a new
> > > > pmcr_n, shouldn't it be a different value?
> > > >
> > > > So how about change the checking to:
> > > >
> > > > if (likely(new_n <= kvm->arch.pmcr_n_limit)
> > > >          kvm->arch.pmcr_n = new_n;
> > > > else
> > > >          ret = -EINVAL;
> > > >
> > > > what do you think?
> > > >
> > > Sorry, I guess I didn't fully understand your suggestion. Are you
> > > saying that it's 'likely' that userspace would configure the correct
> > > value?
> > >
> > It depends on how userspace use this api to limit the number of pmcr. I
> > think what you mean in the code is that userspace need to set every vcpu's
> > pmcr to the same value, so the `unlikely` here is right, only one vcpu can
> > change the kvm->arch.pmcr.n, it saves the cpu cycles.
> >
> > What suggest above might be wrong. Since I think when userspace want to
> > limit the number of pmcr, it may just set the new_n on one vcpu, since the
> > kvm->arch.pmcr_n is a VM-local value, every vcpu can see it, so it's
> > `likely` the (new_n <= kvm->arch.pmcr_n_limit), it can decrease one checking
> > statement.
>
> How about we just do away with branch hints in the first place? This is
> _not_ a hot path.
>
Sounds good to me.

Thank you.
Raghavendra
> --
> Thanks,
> Oliver
Raghavendra Rao Ananta Sept. 18, 2023, 5:07 p.m. UTC | #13
On Fri, Sep 15, 2023 at 1:53 PM Oliver Upton <oliver.upton@linux.dev> wrote:
>
> Hi Raghu,
>
> On Thu, Aug 17, 2023 at 12:30:25AM +0000, Raghavendra Rao Ananta wrote:
> > From: Reiji Watanabe <reijiw@google.com>
> >
> > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > the previous patch, KVM ignores what is written by upserspace).
>
> typo: userspace
>
Noted.
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index ce7de6bbdc967..39ad56a71ad20 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> >        * while the latter does not.
> >        */
> >       kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > +     kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
>
> Can't we just get at this through the arm_pmu instance rather than
> copying it into kvm_arch?
>
Yeah, I suppose we can directly access it in set_pmcr().

Thank you.
Raghavendra
> >       return 0;
> >  }
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 2075901356c5b..c01d62afa7db4 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> >       return 0;
> >  }
> >
> > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > +                 u64 val)
> > +{
> > +     struct kvm *kvm = vcpu->kvm;
> > +     u64 new_n, mutable_mask;
> > +     int ret = 0;
> > +
> > +     new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > +
> > +     mutex_lock(&kvm->arch.config_lock);
> > +     if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > +             /*
> > +              * The vCPU can't have more counters than the PMU
> > +              * hardware implements.
> > +              */
> > +             if (new_n <= kvm->arch.pmcr_n_limit)
> > +                     kvm->arch.pmcr_n = new_n;
> > +             else
> > +                     ret = -EINVAL;
> > +     }
>
> Hmm, I'm not so sure about returning an error here. ABI has it that
> userspace can write any value to PMCR_EL0 successfully. Can we just
> ignore writes that attempt to set PMCR_EL0.N to something higher than
> supported by hardware? Our general stance should be that system register
> fields responsible for feature identification are immutable after the VM
> has started.
>
> --
> Thanks,
> Oliver
Raghavendra Rao Ananta Sept. 18, 2023, 5:11 p.m. UTC | #14
On Fri, Sep 15, 2023 at 2:54 PM Oliver Upton <oliver.upton@linux.dev> wrote:
>
> On Fri, Sep 15, 2023 at 08:53:16PM +0000, Oliver Upton wrote:
> > Hi Raghu,
> >
> > On Thu, Aug 17, 2023 at 12:30:25AM +0000, Raghavendra Rao Ananta wrote:
> > > From: Reiji Watanabe <reijiw@google.com>
> > >
> > > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > > the previous patch, KVM ignores what is written by upserspace).
> >
> > typo: userspace
> >
> > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > > index ce7de6bbdc967..39ad56a71ad20 100644
> > > --- a/arch/arm64/kvm/pmu-emul.c
> > > +++ b/arch/arm64/kvm/pmu-emul.c
> > > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> > >      * while the latter does not.
> > >      */
> > >     kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > > +   kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> >
> > Can't we just get at this through the arm_pmu instance rather than
> > copying it into kvm_arch?
> >
> > >     return 0;
> > >  }
> > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > > index 2075901356c5b..c01d62afa7db4 100644
> > > --- a/arch/arm64/kvm/sys_regs.c
> > > +++ b/arch/arm64/kvm/sys_regs.c
> > > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > >     return 0;
> > >  }
> > >
> > > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > > +               u64 val)
> > > +{
> > > +   struct kvm *kvm = vcpu->kvm;
> > > +   u64 new_n, mutable_mask;
> > > +   int ret = 0;
> > > +
> > > +   new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > > +
> > > +   mutex_lock(&kvm->arch.config_lock);
> > > +   if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > > +           /*
> > > +            * The vCPU can't have more counters than the PMU
> > > +            * hardware implements.
> > > +            */
> > > +           if (new_n <= kvm->arch.pmcr_n_limit)
> > > +                   kvm->arch.pmcr_n = new_n;
> > > +           else
> > > +                   ret = -EINVAL;
> > > +   }
> >
> > Hmm, I'm not so sure about returning an error here. ABI has it that
> > userspace can write any value to PMCR_EL0 successfully. Can we just
> > ignore writes that attempt to set PMCR_EL0.N to something higher than
> > supported by hardware? Our general stance should be that system register
> > fields responsible for feature identification are immutable after the VM
> > has started.
>
> I hacked up my reply and dropped some context; this doesn't read right.
> Shaoqin made the point about preventing changes to PMCR_EL0.N after the
> VM has started and I firmly agree. The behavior should be:
>
>  - Writes to PMCR always succeed
>
>  - PMCR_EL0.N values greater than what's supported by hardware are
>    ignored
>
>  - Changes to N after the VM has started are ignored.
>
Reiji and I were wondering if we should proceed with this as this
would change userspace expectation. BTW, when you said "ignored", does
that mean we silently return to userspace with a success or with EBUSY
(changing the expectations)?

Thank you.
Raghavendra
> --
> Thanks,
> Oliver
Raghavendra Rao Ananta Sept. 18, 2023, 5:22 p.m. UTC | #15
Hi Oliver,

On Mon, Sep 18, 2023 at 10:11 AM Raghavendra Rao Ananta
<rananta@google.com> wrote:
>
> On Fri, Sep 15, 2023 at 2:54 PM Oliver Upton <oliver.upton@linux.dev> wrote:
> >
> > On Fri, Sep 15, 2023 at 08:53:16PM +0000, Oliver Upton wrote:
> > > Hi Raghu,
> > >
> > > On Thu, Aug 17, 2023 at 12:30:25AM +0000, Raghavendra Rao Ananta wrote:
> > > > From: Reiji Watanabe <reijiw@google.com>
> > > >
> > > > KVM does not yet support userspace modifying PMCR_EL0.N (With
> > > > the previous patch, KVM ignores what is written by upserspace).
> > >
> > > typo: userspace
> > >
> > > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > > > index ce7de6bbdc967..39ad56a71ad20 100644
> > > > --- a/arch/arm64/kvm/pmu-emul.c
> > > > +++ b/arch/arm64/kvm/pmu-emul.c
> > > > @@ -896,6 +896,7 @@ int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
> > > >      * while the latter does not.
> > > >      */
> > > >     kvm->arch.pmcr_n = arm_pmu->num_events - 1;
> > > > +   kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
> > >
> > > Can't we just get at this through the arm_pmu instance rather than
> > > copying it into kvm_arch?
> > >
> > > >     return 0;
> > > >  }
> > > > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > > > index 2075901356c5b..c01d62afa7db4 100644
> > > > --- a/arch/arm64/kvm/sys_regs.c
> > > > +++ b/arch/arm64/kvm/sys_regs.c
> > > > @@ -1086,6 +1086,51 @@ static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > > >     return 0;
> > > >  }
> > > >
> > > > +static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
> > > > +               u64 val)
> > > > +{
> > > > +   struct kvm *kvm = vcpu->kvm;
> > > > +   u64 new_n, mutable_mask;
> > > > +   int ret = 0;
> > > > +
> > > > +   new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
> > > > +
> > > > +   mutex_lock(&kvm->arch.config_lock);
> > > > +   if (unlikely(new_n != kvm->arch.pmcr_n)) {
> > > > +           /*
> > > > +            * The vCPU can't have more counters than the PMU
> > > > +            * hardware implements.
> > > > +            */
> > > > +           if (new_n <= kvm->arch.pmcr_n_limit)
> > > > +                   kvm->arch.pmcr_n = new_n;
> > > > +           else
> > > > +                   ret = -EINVAL;
> > > > +   }
> > >
> > > Hmm, I'm not so sure about returning an error here. ABI has it that
> > > userspace can write any value to PMCR_EL0 successfully. Can we just
> > > ignore writes that attempt to set PMCR_EL0.N to something higher than
> > > supported by hardware? Our general stance should be that system register
> > > fields responsible for feature identification are immutable after the VM
> > > has started.
> >
> > I hacked up my reply and dropped some context; this doesn't read right.
> > Shaoqin made the point about preventing changes to PMCR_EL0.N after the
> > VM has started and I firmly agree. The behavior should be:
> >
> >  - Writes to PMCR always succeed
> >
> >  - PMCR_EL0.N values greater than what's supported by hardware are
> >    ignored
> >
> >  - Changes to N after the VM has started are ignored.
> >
> Reiji and I were wondering if we should proceed with this as this
> would change userspace expectation. BTW, when you said "ignored", does
> that mean we silently return to userspace with a success or with EBUSY
> (changing the expectations)?
>
Sorry, I just read your earlier comment (one before you detailed the
behavior), from which I'm guessing "ignore" means simply disregard the
change and return success to userspace. But wouldn't that cause issues
in debugging?

Thank you.
Raghavendra
> Thank you.
> Raghavendra
> > --
> > Thanks,
> > Oliver
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0f2dbbe8f6a7e..c15ec365283d1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -259,6 +259,9 @@  struct kvm_arch {
 	/* PMCR_EL0.N value for the guest */
 	u8 pmcr_n;
 
+	/* Limit value of PMCR_EL0.N for the guest */
+	u8 pmcr_n_limit;
+
 	/* Hypercall features firmware registers' descriptor */
 	struct kvm_smccc_features smccc_feat;
 	struct maple_tree smccc_filter;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index ce7de6bbdc967..39ad56a71ad20 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -896,6 +896,7 @@  int kvm_arm_set_vm_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
 	 * while the latter does not.
 	 */
 	kvm->arch.pmcr_n = arm_pmu->num_events - 1;
+	kvm->arch.pmcr_n_limit = arm_pmu->num_events - 1;
 
 	return 0;
 }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2075901356c5b..c01d62afa7db4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1086,6 +1086,51 @@  static int get_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
 	return 0;
 }
 
+static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+		    u64 val)
+{
+	struct kvm *kvm = vcpu->kvm;
+	u64 new_n, mutable_mask;
+	int ret = 0;
+
+	new_n = FIELD_GET(ARMV8_PMU_PMCR_N, val);
+
+	mutex_lock(&kvm->arch.config_lock);
+	if (unlikely(new_n != kvm->arch.pmcr_n)) {
+		/*
+		 * The vCPU can't have more counters than the PMU
+		 * hardware implements.
+		 */
+		if (new_n <= kvm->arch.pmcr_n_limit)
+			kvm->arch.pmcr_n = new_n;
+		else
+			ret = -EINVAL;
+	}
+	mutex_unlock(&kvm->arch.config_lock);
+	if (ret)
+		return ret;
+
+	/*
+	 * Ignore writes to RES0 bits, read only bits that are cleared on
+	 * vCPU reset, and writable bits that KVM doesn't support yet.
+	 * (i.e. only PMCR.N and bits [7:0] are mutable from userspace)
+	 * The LP bit is RES0 when FEAT_PMUv3p5 is not supported on the vCPU.
+	 * But, we leave the bit as it is here, as the vCPU's PMUver might
+	 * be changed later (NOTE: the bit will be cleared on first vCPU run
+	 * if necessary).
+	 */
+	mutable_mask = (ARMV8_PMU_PMCR_MASK | ARMV8_PMU_PMCR_N);
+	val &= mutable_mask;
+	val |= (__vcpu_sys_reg(vcpu, r->reg) & ~mutable_mask);
+
+	/* The LC bit is RES1 when AArch32 is not supported */
+	if (!kvm_supports_32bit_el0())
+		val |= ARMV8_PMU_PMCR_LC;
+
+	__vcpu_sys_reg(vcpu, r->reg) = val;
+	return 0;
+}
+
 /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
 #define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
 	{ SYS_DESC(SYS_DBGBVRn_EL1(n)),					\
@@ -2147,8 +2192,8 @@  static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
 	{ SYS_DESC(SYS_SVCR), undef_access },
 
-	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
-	  .reset = reset_pmcr, .reg = PMCR_EL0, .get_user = get_pmcr },
+	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
+	  .reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
 	{ PMU_SYS_REG(PMCNTENSET_EL0),
 	  .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
 	{ PMU_SYS_REG(PMCNTENCLR_EL0),