diff mbox series

[RFC] KVM: arm64: Support FEAT_CCIDX

Message ID 1618042597-59294-1-git-send-email-zhangshaokun@hisilicon.com (mailing list archive)
State New, archived
Headers show
Series [RFC] KVM: arm64: Support FEAT_CCIDX | expand

Commit Message

Shaokun Zhang April 10, 2021, 8:16 a.m. UTC
CCSIDR_EL1 can be implemented as 64-bit format inferred by CCIDX field
in ID_AA64MMFR2_EL1. The bits of Numsets and Associativity are different
from the 32-bit format. Let's support this feature.

Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
---
 arch/arm64/kvm/sys_regs.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

Comments

Marc Zyngier April 10, 2021, 9:54 a.m. UTC | #1
On Sat, 10 Apr 2021 09:16:37 +0100,
Shaokun Zhang <zhangshaokun@hisilicon.com> wrote:
> 
> CCSIDR_EL1 can be implemented as 64-bit format inferred by CCIDX field
> in ID_AA64MMFR2_EL1. The bits of Numsets and Associativity are different
> from the 32-bit format. Let's support this feature.
> 
> Cc: Marc Zyngier <maz@kernel.org>
> Cc: James Morse <james.morse@arm.com>
> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
> ---
>  arch/arm64/kvm/sys_regs.c | 27 +++++++++++++++++++--------
>  1 file changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 52fdb9a015a4..0dc822cef20b 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -18,6 +18,7 @@
>  
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
> +#include <asm/cpufeature.h>

If you are going to add this (why?), at least add it in alphabetic order.

>  #include <asm/debug-monitors.h>
>  #include <asm/esr.h>
>  #include <asm/kvm_arm.h>
> @@ -95,9 +96,9 @@ static u32 cache_levels;
>  #define CSSELR_MAX 14
>  
>  /* Which cache CCSIDR represents depends on CSSELR value. */
> -static u32 get_ccsidr(u32 csselr)
> +static u64 get_ccsidr(u32 csselr)
>  {
> -	u32 ccsidr;
> +	u64 ccsidr;
>  
>  	/* Make sure noone else changes CSSELR during this! */
>  	local_irq_disable();
> @@ -1275,12 +1276,16 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>  static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>  			  const struct sys_reg_desc *r)
>  {
> -	u32 csselr;
> +	u32 csselr, ccidx;
> +	u64 mmfr2;
>  
>  	if (p->is_write)
>  		return write_to_read_only(vcpu, p, r);
>  
>  	csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
> +	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
> +	ccidx = cpuid_feature_extract_unsigned_field(mmfr2,
> +						     ID_AA64MMFR2_CCIDX_SHIFT);

What happens on an asymmetric system where only some of the CPUs have
FEAT_CCIDX?

>  	p->regval = get_ccsidr(csselr);
>  
>  	/*
> @@ -1295,8 +1300,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>  	 * geometry (which is not permitted by the architecture), they would
>  	 * only do so for virtually indexed caches.]
>  	 */
> -	if (!(csselr & 1)) // data or unified cache
> -		p->regval &= ~GENMASK(27, 3);
> +	if (!(csselr & 1)) { // data or unified cache
> +		if (ccidx)
> +			p->regval &= ~(GENMASK(23, 3) | GENMASK(55, 32));
> +		else
> +			p->regval &= ~GENMASK(27, 3);
> +	}
> +
>  	return true;
>  }
>  
> @@ -2521,7 +2531,7 @@ static bool is_valid_cache(u32 val)
>  static int demux_c15_get(u64 id, void __user *uaddr)
>  {
>  	u32 val;
> -	u32 __user *uval = uaddr;
> +	u64 __user *uval = uaddr;

What? Has the user API changed while I wasn't looking? Getting CCSIDR
can only return a 32bit quantity on AArch32. In fact, CCSIDR is
*always* 32bit, CCIDX or not. I have no idea what you are trying to do
here, but at best you are now corrupting 32bit of userspace.

>  
>  	/* Fail if we have unknown bits set. */
>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
> @@ -2545,8 +2555,9 @@ static int demux_c15_get(u64 id, void __user *uaddr)
>  
>  static int demux_c15_set(u64 id, void __user *uaddr)
>  {
> -	u32 val, newval;
> -	u32 __user *uval = uaddr;
> +	u32 val;
> +	u64 newval;
> +	u64 __user *uval = uaddr;

Same brokenness.

>  
>  	/* Fail if we have unknown bits set. */
>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK

What about CCSIDR2_EL1, which is mandatory when FEAT_CCSIDX is
present? How about the AArch32 handling of that register? I don't
think you have though this one through.

Another question is: why should we care at all? Why don't we drop all
that and only implement a virtual cache topology? A VM shouldn't care
at all about this, and we are already lying about the topology anyway.
We could even get the VMM to set whatever stupid topology it wants for
the sake of it (and to restore previously created VMs).

	M.
Shaokun Zhang April 12, 2021, 2:22 a.m. UTC | #2
Hi Marc,

On 2021/4/10 17:54, Marc Zyngier wrote:
> On Sat, 10 Apr 2021 09:16:37 +0100,
> Shaokun Zhang <zhangshaokun@hisilicon.com> wrote:
>>
>> CCSIDR_EL1 can be implemented as 64-bit format inferred by CCIDX field
>> in ID_AA64MMFR2_EL1. The bits of Numsets and Associativity are different
>> from the 32-bit format. Let's support this feature.
>>
>> Cc: Marc Zyngier <maz@kernel.org>
>> Cc: James Morse <james.morse@arm.com>
>> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
>> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
>> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
>> ---
>>  arch/arm64/kvm/sys_regs.c | 27 +++++++++++++++++++--------
>>  1 file changed, 19 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 52fdb9a015a4..0dc822cef20b 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -18,6 +18,7 @@
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/cputype.h>
>> +#include <asm/cpufeature.h>
> 
> If you are going to add this (why?), at least add it in alphabetic order.

cpuid_feature_extract_unsigned_field will be used later.
It shall do in alphabetic order.

> 
>>  #include <asm/debug-monitors.h>
>>  #include <asm/esr.h>
>>  #include <asm/kvm_arm.h>
>> @@ -95,9 +96,9 @@ static u32 cache_levels;
>>  #define CSSELR_MAX 14
>>  
>>  /* Which cache CCSIDR represents depends on CSSELR value. */
>> -static u32 get_ccsidr(u32 csselr)
>> +static u64 get_ccsidr(u32 csselr)
>>  {
>> -	u32 ccsidr;
>> +	u64 ccsidr;
>>  
>>  	/* Make sure noone else changes CSSELR during this! */
>>  	local_irq_disable();
>> @@ -1275,12 +1276,16 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>>  static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>>  			  const struct sys_reg_desc *r)
>>  {
>> -	u32 csselr;
>> +	u32 csselr, ccidx;
>> +	u64 mmfr2;
>>  
>>  	if (p->is_write)
>>  		return write_to_read_only(vcpu, p, r);
>>  
>>  	csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
>> +	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
>> +	ccidx = cpuid_feature_extract_unsigned_field(mmfr2,
>> +						     ID_AA64MMFR2_CCIDX_SHIFT);
> 
> What happens on an asymmetric system where only some of the CPUs have
> FEAT_CCIDX?

If other CPUs don't have FEAT_CCIDX, its value is 0b0000 while CCSIDR_EL1 is 32-bit format.

> 
>>  	p->regval = get_ccsidr(csselr);
>>  
>>  	/*
>> @@ -1295,8 +1300,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>>  	 * geometry (which is not permitted by the architecture), they would
>>  	 * only do so for virtually indexed caches.]
>>  	 */
>> -	if (!(csselr & 1)) // data or unified cache
>> -		p->regval &= ~GENMASK(27, 3);
>> +	if (!(csselr & 1)) { // data or unified cache
>> +		if (ccidx)
>> +			p->regval &= ~(GENMASK(23, 3) | GENMASK(55, 32));
>> +		else
>> +			p->regval &= ~GENMASK(27, 3);
>> +	}
>> +
>>  	return true;
>>  }
>>  
>> @@ -2521,7 +2531,7 @@ static bool is_valid_cache(u32 val)
>>  static int demux_c15_get(u64 id, void __user *uaddr)
>>  {
>>  	u32 val;
>> -	u32 __user *uval = uaddr;
>> +	u64 __user *uval = uaddr;
> 
> What? Has the user API changed while I wasn't looking? Getting CCSIDR
> can only return a 32bit quantity on AArch32. In fact, CCSIDR is
> *always* 32bit, CCIDX or not. I have no idea what you are trying to do
> here, but at best you are now corrupting 32bit of userspace.
> 

Oops, I really missed this.

>>  
>>  	/* Fail if we have unknown bits set. */
>>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
>> @@ -2545,8 +2555,9 @@ static int demux_c15_get(u64 id, void __user *uaddr)
>>  
>>  static int demux_c15_set(u64 id, void __user *uaddr)
>>  {
>> -	u32 val, newval;
>> -	u32 __user *uval = uaddr;
>> +	u32 val;
>> +	u64 newval;
>> +	u64 __user *uval = uaddr;
> 
> Same brokenness.
> 
>>  
>>  	/* Fail if we have unknown bits set. */
>>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
> 
> What about CCSIDR2_EL1, which is mandatory when FEAT_CCSIDX is
> present? How about the AArch32 handling of that register? I don't
> think you have though this one through.

To be honest, AArch32 is not considered directly and I sent this only
as RFC. When I wrote some flush cache code using by set/way mode and
noticed that CCSIDR_EL1 is used here.

> 
> Another question is: why should we care at all? Why don't we drop all
> that and only implement a virtual cache topology? A VM shouldn't care
> at all about this, and we are already lying about the topology anyway.
> We could even get the VMM to set whatever stupid topology it wants for
> the sake of it (and to restore previously created VMs).

Got it,

Thanks for your detailed comments.
Shaokun

> 
> 	M.
>
Marc Zyngier April 12, 2021, 7:49 a.m. UTC | #3
Hi Shaokun,

On Mon, 12 Apr 2021 03:22:03 +0100,
Shaokun Zhang <zhangshaokun@hisilicon.com> wrote:
> 
> Hi Marc,
> 
> On 2021/4/10 17:54, Marc Zyngier wrote:
> > On Sat, 10 Apr 2021 09:16:37 +0100,
> > Shaokun Zhang <zhangshaokun@hisilicon.com> wrote:
> >>
> >> CCSIDR_EL1 can be implemented as 64-bit format inferred by CCIDX field
> >> in ID_AA64MMFR2_EL1. The bits of Numsets and Associativity are different
> >> from the 32-bit format. Let's support this feature.
> >>
> >> Cc: Marc Zyngier <maz@kernel.org>
> >> Cc: James Morse <james.morse@arm.com>
> >> Cc: Alexandru Elisei <alexandru.elisei@arm.com>
> >> Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> >> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
> >> ---
> >>  arch/arm64/kvm/sys_regs.c | 27 +++++++++++++++++++--------
> >>  1 file changed, 19 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> >> index 52fdb9a015a4..0dc822cef20b 100644
> >> --- a/arch/arm64/kvm/sys_regs.c
> >> +++ b/arch/arm64/kvm/sys_regs.c
> >> @@ -18,6 +18,7 @@
> >>  
> >>  #include <asm/cacheflush.h>
> >>  #include <asm/cputype.h>
> >> +#include <asm/cpufeature.h>
> > 
> > If you are going to add this (why?), at least add it in alphabetic order.
> 
> cpuid_feature_extract_unsigned_field will be used later.
> It shall do in alphabetic order.

We already use this function all over the place, and it is already
dragged in via many other paths. Anyway, that's not the biggest
problem.

> 
> > 
> >>  #include <asm/debug-monitors.h>
> >>  #include <asm/esr.h>
> >>  #include <asm/kvm_arm.h>
> >> @@ -95,9 +96,9 @@ static u32 cache_levels;
> >>  #define CSSELR_MAX 14
> >>  
> >>  /* Which cache CCSIDR represents depends on CSSELR value. */
> >> -static u32 get_ccsidr(u32 csselr)
> >> +static u64 get_ccsidr(u32 csselr)
> >>  {
> >> -	u32 ccsidr;
> >> +	u64 ccsidr;
> >>  
> >>  	/* Make sure noone else changes CSSELR during this! */
> >>  	local_irq_disable();
> >> @@ -1275,12 +1276,16 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> >>  static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> >>  			  const struct sys_reg_desc *r)
> >>  {
> >> -	u32 csselr;
> >> +	u32 csselr, ccidx;
> >> +	u64 mmfr2;
> >>  
> >>  	if (p->is_write)
> >>  		return write_to_read_only(vcpu, p, r);
> >>  
> >>  	csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
> >> +	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
> >> +	ccidx = cpuid_feature_extract_unsigned_field(mmfr2,
> >> +						     ID_AA64MMFR2_CCIDX_SHIFT);
> > 
> > What happens on an asymmetric system where only some of the CPUs have
> > FEAT_CCIDX?
> 
> If other CPUs don't have FEAT_CCIDX, its value is 0b0000 while
> CCSIDR_EL1 is 32-bit format.
>

You are missing the point: CPU-A has CCIDX, CPU-B doesn't. My vcpu
runs on CPU-A, gets preempted right after the read of
ID_AA64MMFR2_EL1, and then scheduled on CPU-B. You will now compute
the guest view of CCSIDR_EL1 with CPU-B's value, but interpreting it
with CPU-A's configuration. That's totally broken.

> > 
> >>  	p->regval = get_ccsidr(csselr);
> >>  
> >>  	/*
> >> @@ -1295,8 +1300,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
> >>  	 * geometry (which is not permitted by the architecture), they would
> >>  	 * only do so for virtually indexed caches.]
> >>  	 */
> >> -	if (!(csselr & 1)) // data or unified cache
> >> -		p->regval &= ~GENMASK(27, 3);
> >> +	if (!(csselr & 1)) { // data or unified cache
> >> +		if (ccidx)
> >> +			p->regval &= ~(GENMASK(23, 3) | GENMASK(55, 32));
> >> +		else
> >> +			p->regval &= ~GENMASK(27, 3);
> >> +	}
> >> +
> >>  	return true;
> >>  }
> >>  
> >> @@ -2521,7 +2531,7 @@ static bool is_valid_cache(u32 val)
> >>  static int demux_c15_get(u64 id, void __user *uaddr)
> >>  {
> >>  	u32 val;
> >> -	u32 __user *uval = uaddr;
> >> +	u64 __user *uval = uaddr;
> > 
> > What? Has the user API changed while I wasn't looking? Getting CCSIDR
> > can only return a 32bit quantity on AArch32. In fact, CCSIDR is
> > *always* 32bit, CCIDX or not. I have no idea what you are trying to do
> > here, but at best you are now corrupting 32bit of userspace.
> > 
> 
> Oops, I really missed this.
> 
> >>  
> >>  	/* Fail if we have unknown bits set. */
> >>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
> >> @@ -2545,8 +2555,9 @@ static int demux_c15_get(u64 id, void __user *uaddr)
> >>  
> >>  static int demux_c15_set(u64 id, void __user *uaddr)
> >>  {
> >> -	u32 val, newval;
> >> -	u32 __user *uval = uaddr;
> >> +	u32 val;
> >> +	u64 newval;
> >> +	u64 __user *uval = uaddr;
> > 
> > Same brokenness.
> > 
> >>  
> >>  	/* Fail if we have unknown bits set. */
> >>  	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
> > 
> > What about CCSIDR2_EL1, which is mandatory when FEAT_CCSIDX is
> > present? How about the AArch32 handling of that register? I don't
> > think you have though this one through.
> 
> To be honest, AArch32 is not considered directly and I sent this only
> as RFC.

If you don't consider AArch32 when writing a patch, please don't send
it. AArch32 guests are fully supported, and a patch that breaks them
isn't acceptable.

Also, a RFC patch doesn't mean things are allowed to break. We use RFC
as way to ask for people feedback on a design, but the proposed
implementation has to be a valid one. That's not a license to send
broken stuff.

> When I wrote some flush cache code using by set/way mode and
> noticed that CCSIDR_EL1 is used here.
>
> > 
> > Another question is: why should we care at all? Why don't we drop all
> > that and only implement a virtual cache topology? A VM shouldn't care
> > at all about this, and we are already lying about the topology anyway.
> > We could even get the VMM to set whatever stupid topology it wants for
> > the sake of it (and to restore previously created VMs).
> 
> Got it,
> 
> Thanks for your detailed comments.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 52fdb9a015a4..0dc822cef20b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -18,6 +18,7 @@ 
 
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
@@ -95,9 +96,9 @@  static u32 cache_levels;
 #define CSSELR_MAX 14
 
 /* Which cache CCSIDR represents depends on CSSELR value. */
-static u32 get_ccsidr(u32 csselr)
+static u64 get_ccsidr(u32 csselr)
 {
-	u32 ccsidr;
+	u64 ccsidr;
 
 	/* Make sure noone else changes CSSELR during this! */
 	local_irq_disable();
@@ -1275,12 +1276,16 @@  static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			  const struct sys_reg_desc *r)
 {
-	u32 csselr;
+	u32 csselr, ccidx;
+	u64 mmfr2;
 
 	if (p->is_write)
 		return write_to_read_only(vcpu, p, r);
 
 	csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1);
+	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+	ccidx = cpuid_feature_extract_unsigned_field(mmfr2,
+						     ID_AA64MMFR2_CCIDX_SHIFT);
 	p->regval = get_ccsidr(csselr);
 
 	/*
@@ -1295,8 +1300,13 @@  static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	 * geometry (which is not permitted by the architecture), they would
 	 * only do so for virtually indexed caches.]
 	 */
-	if (!(csselr & 1)) // data or unified cache
-		p->regval &= ~GENMASK(27, 3);
+	if (!(csselr & 1)) { // data or unified cache
+		if (ccidx)
+			p->regval &= ~(GENMASK(23, 3) | GENMASK(55, 32));
+		else
+			p->regval &= ~GENMASK(27, 3);
+	}
+
 	return true;
 }
 
@@ -2521,7 +2531,7 @@  static bool is_valid_cache(u32 val)
 static int demux_c15_get(u64 id, void __user *uaddr)
 {
 	u32 val;
-	u32 __user *uval = uaddr;
+	u64 __user *uval = uaddr;
 
 	/* Fail if we have unknown bits set. */
 	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
@@ -2545,8 +2555,9 @@  static int demux_c15_get(u64 id, void __user *uaddr)
 
 static int demux_c15_set(u64 id, void __user *uaddr)
 {
-	u32 val, newval;
-	u32 __user *uval = uaddr;
+	u32 val;
+	u64 newval;
+	u64 __user *uval = uaddr;
 
 	/* Fail if we have unknown bits set. */
 	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK