diff mbox series

[RFC,1/7] KVM: arm64: Detect and enable PBHA for stage2

Message ID 20211015161416.2196-2-james.morse@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: mm: Prototype to allow drivers to request PBHA values | expand

Commit Message

James Morse Oct. 15, 2021, 4:14 p.m. UTC
Page Based Hardware Attributes (PBHA, aka HPDS2) allow a page table entry
to specify up to four bits that can be used by the hardware for some
implementation defined purpose.

This is a problem for KVM guests as the host may swap guest memory using
a different combination of PBHA bits than the guest used when writing the
data. Without knowing what the PBHA bits do, its not possible to know if
this will corrupt the guest's data.

The arm-arm doesn't describe how the PBHA bits are combined between stage1
and stage2. Arm's Cortex CPUs appear to all do the same thing: stage2 wins.

Enable PBHA for stage2, where the configured value is zero. This has no
effect if PBHA isn't in use. On Cortex cores that have the 'stage2 wins'
behaviour, this disables whatever the guest may be doing. For any other
core with a sensible combination policy, it should be harmless.

Signed-off-by: James Morse <james.morse@arm.com>
---
I've checked the TRMs for Neoverse-N1, and Cortexs: A76, A77, A78 and X1.
They all have this 'stage2 wins' behaviour. The behaviour isn't documented
by A510 or A710's TRM.
---
 arch/arm64/include/asm/kvm_arm.h     | 1 +
 arch/arm64/include/asm/kvm_pgtable.h | 9 +++++++++
 arch/arm64/kernel/cpufeature.c       | 9 +++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 9 +++++++++
 arch/arm64/tools/cpucaps             | 1 +
 5 files changed, 29 insertions(+)

Comments

Marc Zyngier Oct. 16, 2021, 1:27 p.m. UTC | #1
Hi James,

On Fri, 15 Oct 2021 17:14:10 +0100,
James Morse <james.morse@arm.com> wrote:
> 
> Page Based Hardware Attributes (PBHA, aka HPDS2) allow a page table entry
> to specify up to four bits that can be used by the hardware for some
> implementation defined purpose.
> 
> This is a problem for KVM guests as the host may swap guest memory using
> a different combination of PBHA bits than the guest used when writing the
> data. Without knowing what the PBHA bits do, its not possible to know if
> this will corrupt the guest's data.
> 
> The arm-arm doesn't describe how the PBHA bits are combined between stage1
> and stage2. Arm's Cortex CPUs appear to all do the same thing: stage2 wins.
> 
> Enable PBHA for stage2, where the configured value is zero. This has no
> effect if PBHA isn't in use. On Cortex cores that have the 'stage2 wins'
> behaviour, this disables whatever the guest may be doing. For any other
> core with a sensible combination policy, it should be harmless.

So the other side of the above is whether it has the potential to be
harmful on systems that combine PBHA bits differently. Specially if
they use VTCR_EL2.PBHA bits as a indication that they can OR S1 and S2
instead of a direct S2 override, thus letting the S1 bits that would
otherwise not being conveyed outside of the core.

I guess we have no way to know until someone reports really bad memory
corruption and loss of data. The architecture is totally broken here.

> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> I've checked the TRMs for Neoverse-N1, and Cortexs: A76, A77, A78 and X1.
> They all have this 'stage2 wins' behaviour. The behaviour isn't documented
> by A510 or A710's TRM.
> ---
>  arch/arm64/include/asm/kvm_arm.h     | 1 +
>  arch/arm64/include/asm/kvm_pgtable.h | 9 +++++++++
>  arch/arm64/kernel/cpufeature.c       | 9 +++++++++
>  arch/arm64/kvm/hyp/pgtable.c         | 9 +++++++++
>  arch/arm64/tools/cpucaps             | 1 +
>  5 files changed, 29 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 327120c0089f..bab7f0ad3724 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -126,6 +126,7 @@
>  #define VTCR_EL2_VS_SHIFT	19
>  #define VTCR_EL2_VS_8BIT	(0 << VTCR_EL2_VS_SHIFT)
>  #define VTCR_EL2_VS_16BIT	(1 << VTCR_EL2_VS_SHIFT)
> +#define VTCR_EL2_PBHA_MASK	GENMASK(28, 25)
>  
>  #define VTCR_EL2_T0SZ(x)	TCR_T0SZ(x)
>  
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 027783829584..678bff4bfd7f 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -125,6 +125,10 @@ enum kvm_pgtable_stage2_flags {
>   * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
>   * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
>   * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
> + * @KVM_PGTABLE_PROT_PBHA0:	Page-Based Hardware Attribute 0.
> + * @KVM_PGTABLE_PROT_PBHA1:	Page-Based Hardware Attribute 1.
> + * @KVM_PGTABLE_PROT_PBHA2:	Page-Based Hardware Attribute 2.
> + * @KVM_PGTABLE_PROT_PBHA3:	Page-Based Hardware Attribute 3.
>   */
>  enum kvm_pgtable_prot {
>  	KVM_PGTABLE_PROT_X			= BIT(0),
> @@ -137,6 +141,11 @@ enum kvm_pgtable_prot {
>  	KVM_PGTABLE_PROT_SW1			= BIT(56),
>  	KVM_PGTABLE_PROT_SW2			= BIT(57),
>  	KVM_PGTABLE_PROT_SW3			= BIT(58),
> +
> +	KVM_PGTABLE_PROT_PBHA0			= BIT(59),
> +	KVM_PGTABLE_PROT_PBHA1			= BIT(60),
> +	KVM_PGTABLE_PROT_PBHA2			= BIT(61),
> +	KVM_PGTABLE_PROT_PBHA3			= BIT(62),
>  };
>  
>  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index f8a3067d10c6..8694f9dec5e5 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -2328,6 +2328,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.matches = has_cpuid_feature,
>  		.min_field_value = 1,
>  	},
> +	{
> +		.capability = ARM64_HAS_PBHA,
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.sys_reg = SYS_ID_AA64MMFR1_EL1,
> +		.sign = FTR_UNSIGNED,
> +		.field_pos = ID_AA64MMFR1_HPD_SHIFT,
> +		.matches = has_cpuid_feature,
> +		.min_field_value = 2,
> +	},
>  	{},
>  };
>  
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index f8ceebe4982e..7bd90ea1c61f 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -540,6 +540,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
>  	 */
>  	vtcr |= VTCR_EL2_HA;
>  
> +	/*
> +	 * Enable PBHA for stage2 on systems that support it. The configured
> +	 * value will always be 0, which is defined as the safe default
> +	 * setting. On Cortex cores, enabling PBHA for stage2 effectively
> +	 * disables it for stage1.
> +	 */
> +	if (cpus_have_final_cap(ARM64_HAS_PBHA))
> +		vtcr = FIELD_PREP(VTCR_EL2_PBHA_MASK, 0xf);

Err... Surely you mean 'vtcr |= FIELD_PREP(...)' here, right?

> +
>  	/* Set the vmid bits */
>  	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
>  		VTCR_EL2_VS_16BIT :
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 49305c2e6dfd..132596d8b518 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -28,6 +28,7 @@ HAS_LSE_ATOMICS
>  HAS_NO_FPSIMD
>  HAS_NO_HW_PREFETCH
>  HAS_PAN
> +HAS_PBHA
>  HAS_RAS_EXTN
>  HAS_RNG
>  HAS_SB

Thanks,

	M.
James Morse Oct. 18, 2021, 5:26 p.m. UTC | #2
Hi Marc,

On 16/10/2021 14:27, Marc Zyngier wrote:
> On Fri, 15 Oct 2021 17:14:10 +0100,
> James Morse <james.morse@arm.com> wrote:
>>
>> Page Based Hardware Attributes (PBHA, aka HPDS2) allow a page table entry
>> to specify up to four bits that can be used by the hardware for some
>> implementation defined purpose.
>>
>> This is a problem for KVM guests as the host may swap guest memory using
>> a different combination of PBHA bits than the guest used when writing the
>> data. Without knowing what the PBHA bits do, its not possible to know if
>> this will corrupt the guest's data.
>>
>> The arm-arm doesn't describe how the PBHA bits are combined between stage1
>> and stage2. Arm's Cortex CPUs appear to all do the same thing: stage2 wins.
>>
>> Enable PBHA for stage2, where the configured value is zero. This has no
>> effect if PBHA isn't in use. On Cortex cores that have the 'stage2 wins'
>> behaviour, this disables whatever the guest may be doing. For any other
>> core with a sensible combination policy, it should be harmless.

> So the other side of the above is whether it has the potential to be
> harmful on systems that combine PBHA bits differently. Specially if
> they use VTCR_EL2.PBHA bits as a indication that they can OR S1 and S2
> instead of a direct S2 override, thus letting the S1 bits that would
> otherwise not being conveyed outside of the core.

xor-ing them together would be more fun - and equally valid in this imp-def world!


> I guess we have no way to know until someone reports really bad memory
> corruption and loss of data. The architecture is totally broken here.

The alternative is to make this depend on the list of CPUs where we know the combining
behaviour. That isn't great either, as its an unmaintainable-and-outdated list of
all-cortex-cores.


>> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
>> index f8ceebe4982e..7bd90ea1c61f 100644
>> --- a/arch/arm64/kvm/hyp/pgtable.c
>> +++ b/arch/arm64/kvm/hyp/pgtable.c
>> @@ -540,6 +540,15 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
>>  	 */
>>  	vtcr |= VTCR_EL2_HA;
>>  
>> +	/*
>> +	 * Enable PBHA for stage2 on systems that support it. The configured
>> +	 * value will always be 0, which is defined as the safe default
>> +	 * setting. On Cortex cores, enabling PBHA for stage2 effectively
>> +	 * disables it for stage1.
>> +	 */
>> +	if (cpus_have_final_cap(ARM64_HAS_PBHA))
>> +		vtcr = FIELD_PREP(VTCR_EL2_PBHA_MASK, 0xf);

> Err... Surely you mean 'vtcr |= FIELD_PREP(...)' here, right?

Gah!. I'm off to the stationery cupboard for a brown paper bag....



Thanks,

James
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 327120c0089f..bab7f0ad3724 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -126,6 +126,7 @@ 
 #define VTCR_EL2_VS_SHIFT	19
 #define VTCR_EL2_VS_8BIT	(0 << VTCR_EL2_VS_SHIFT)
 #define VTCR_EL2_VS_16BIT	(1 << VTCR_EL2_VS_SHIFT)
+#define VTCR_EL2_PBHA_MASK	GENMASK(28, 25)
 
 #define VTCR_EL2_T0SZ(x)	TCR_T0SZ(x)
 
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 027783829584..678bff4bfd7f 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -125,6 +125,10 @@  enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
  * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
  * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
+ * @KVM_PGTABLE_PROT_PBHA0:	Page-Based Hardware Attribute 0.
+ * @KVM_PGTABLE_PROT_PBHA1:	Page-Based Hardware Attribute 1.
+ * @KVM_PGTABLE_PROT_PBHA2:	Page-Based Hardware Attribute 2.
+ * @KVM_PGTABLE_PROT_PBHA3:	Page-Based Hardware Attribute 3.
  */
 enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_X			= BIT(0),
@@ -137,6 +141,11 @@  enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_SW1			= BIT(56),
 	KVM_PGTABLE_PROT_SW2			= BIT(57),
 	KVM_PGTABLE_PROT_SW3			= BIT(58),
+
+	KVM_PGTABLE_PROT_PBHA0			= BIT(59),
+	KVM_PGTABLE_PROT_PBHA1			= BIT(60),
+	KVM_PGTABLE_PROT_PBHA2			= BIT(61),
+	KVM_PGTABLE_PROT_PBHA3			= BIT(62),
 };
 
 #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f8a3067d10c6..8694f9dec5e5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2328,6 +2328,15 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 	},
+	{
+		.capability = ARM64_HAS_PBHA,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64MMFR1_HPD_SHIFT,
+		.matches = has_cpuid_feature,
+		.min_field_value = 2,
+	},
 	{},
 };
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f8ceebe4982e..7bd90ea1c61f 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -540,6 +540,15 @@  u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
 	 */
 	vtcr |= VTCR_EL2_HA;
 
+	/*
+	 * Enable PBHA for stage2 on systems that support it. The configured
+	 * value will always be 0, which is defined as the safe default
+	 * setting. On Cortex cores, enabling PBHA for stage2 effectively
+	 * disables it for stage1.
+	 */
+	if (cpus_have_final_cap(ARM64_HAS_PBHA))
+		vtcr = FIELD_PREP(VTCR_EL2_PBHA_MASK, 0xf);
+
 	/* Set the vmid bits */
 	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
 		VTCR_EL2_VS_16BIT :
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 49305c2e6dfd..132596d8b518 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -28,6 +28,7 @@  HAS_LSE_ATOMICS
 HAS_NO_FPSIMD
 HAS_NO_HW_PREFETCH
 HAS_PAN
+HAS_PBHA
 HAS_RAS_EXTN
 HAS_RNG
 HAS_SB