diff mbox series

[3/3] arm64: errata: remove BF16 HWCAP due to incorrect result on Cortex-A510

Message ID 20220909165938.3931307-4-james.morse@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: errata: remove BF16 HWCAP due to incorrect result on Cortex-A510 | expand

Commit Message

James Morse Sept. 9, 2022, 4:59 p.m. UTC
Cortex-A510's erratum #2658417 causes two BF16 instructions to return the
wrong result in rare circumstances when a pair of A510 CPUs are using
shared neon hardware.

The two instructions affected are BFMMLA and VMMLA, support for these is
indicated by the BF16 HWCAP. Remove it on affected platforms.

Signed-off-by: James Morse <james.morse@arm.com>
---
 Documentation/arm64/silicon-errata.rst |  2 ++
 arch/arm64/Kconfig                     | 14 +++++++++++++
 arch/arm64/kernel/cpu_errata.c         | 27 ++++++++++++++++++++++++++
 arch/arm64/tools/cpucaps               |  1 +
 4 files changed, 44 insertions(+)

Comments

Suzuki K Poulose Sept. 12, 2022, 3:30 p.m. UTC | #1
On 09/09/2022 17:59, James Morse wrote:
> Cortex-A510's erratum #2658417 causes two BF16 instructions to return the
> wrong result in rare circumstances when a pair of A510 CPUs are using
> shared neon hardware.
> 
> The two instructions affected are BFMMLA and VMMLA, support for these is
> indicated by the BF16 HWCAP. Remove it on affected platforms.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
>   Documentation/arm64/silicon-errata.rst |  2 ++
>   arch/arm64/Kconfig                     | 14 +++++++++++++
>   arch/arm64/kernel/cpu_errata.c         | 27 ++++++++++++++++++++++++++
>   arch/arm64/tools/cpucaps               |  1 +
>   4 files changed, 44 insertions(+)
> 
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index fda97b3fcf01..17d9fc5d14fb 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -110,6 +110,8 @@ stable kernels.
>   +----------------+-----------------+-----------------+-----------------------------+
>   | ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
>   +----------------+-----------------+-----------------+-----------------------------+
> +| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
> ++----------------+-----------------+-----------------+-----------------------------+
>   | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
>   +----------------+-----------------+-----------------+-----------------------------+
>   | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9fb9fff08c94..06eedcc8a2c6 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -733,6 +733,20 @@ config ARM64_ERRATUM_2077057
>   
>   	  If unsure, say Y.
>   
> +config ARM64_ERRATUM_2658417
> +	bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result"
> +	default y
> +	help
> +	  This option adds the workaround for ARM Cortex-A510 erratum 2658417.
> +	  Affected Cortex-A510 may produce the wrong result for BFMMLA or VMMLA
> +	  instructions in rare circumstances when a pair of A510 CPUs are using
> +	  shared neon hardware.
> +	  As the sharing is not discoverable by the kernel, hide the BF16
> +	  HWCAP to indicate that user-space should not be using these
> +	  instructions.
> +
> +	  If unsure, say Y.
> +
>   config ARM64_ERRATUM_2119858
>   	bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
>   	default y
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 53b973b6059f..aef3245a8597 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -121,6 +121,22 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
>   	sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
>   }
>   
> +static DEFINE_RAW_SPINLOCK(reg_user_mask_modification);
> +static void __maybe_unused
> +cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused)
> +{
> +	struct arm64_ftr_reg *regp;
> +
> +	regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
> +	if (!regp)
> +		return;
> +
> +	raw_spin_lock(&reg_user_mask_modification);
> +	if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK)
> +		regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
> +	raw_spin_unlock(&reg_user_mask_modification);
> +}
> +
>   #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)	\
>   	.matches = is_affected_midr_range,			\
>   	.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
> @@ -691,6 +707,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>   		CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
>   		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
>   	},
> +#endif
> +#ifdef CONFIG_ARM64_ERRATUM_2658417
> +	{
> +		.desc = "ARM erratum 2658417",
> +		.capability = ARM64_WORKAROUND_2658417,
> +		/* Cortex-A510 r0p0 - r1p1 */
> +		ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),

> +		MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),

nit: Do we need to document this in the Kconfig help text ?

> +		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,

nit: type is populated with ERRATA_MIDR_RANGE macro. So this may be
skipped.

> +		.cpu_enable = cpu_clear_bf16_from_user_emulation,
> +	},
>   #endif
>   	{
>   	}
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 63b2484ce6c3..f553a7cb1b07 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -68,6 +68,7 @@ WORKAROUND_2038923
>   WORKAROUND_2064142
>   WORKAROUND_2077057
>   WORKAROUND_2457168
> +WORKAROUND_2658417
>   WORKAROUND_TRBE_OVERWRITE_FILL_MODE
>   WORKAROUND_TSB_FLUSH_FAILURE
>   WORKAROUND_TRBE_WRITE_OUT_OF_RANGE


Suzuki
James Morse Sept. 15, 2022, 1:39 p.m. UTC | #2
Hi Suzuki,

On 12/09/2022 16:30, Suzuki K Poulose wrote:
> On 09/09/2022 17:59, James Morse wrote:
>> Cortex-A510's erratum #2658417 causes two BF16 instructions to return the
>> wrong result in rare circumstances when a pair of A510 CPUs are using
>> shared neon hardware.
>>
>> The two instructions affected are BFMMLA and VMMLA, support for these is
>> indicated by the BF16 HWCAP. Remove it on affected platforms.

>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>> index 53b973b6059f..aef3245a8597 100644
>> --- a/arch/arm64/kernel/cpu_errata.c
>> +++ b/arch/arm64/kernel/cpu_errata.c
>> @@ -691,6 +707,17 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>>           CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
>>           .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
>>       },
>> +#endif
>> +#ifdef CONFIG_ARM64_ERRATUM_2658417
>> +    {
>> +        .desc = "ARM erratum 2658417",
>> +        .capability = ARM64_WORKAROUND_2658417,
>> +        /* Cortex-A510 r0p0 - r1p1 */
>> +        ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
> 
>> +        MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),

> nit: Do we need to document this in the Kconfig help text ?

I agree its unusual, but isn't it a special case of "if your hardware isn't broken the
workaround isn't applied"?

Do you think a section on revisions affected in Kconfig really benefits anyone?
Can't they look at the errata document instead? (that is why the erratum number appears in
so many places)


>> +        .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,

> nit: type is populated with ERRATA_MIDR_RANGE macro. So this may be
> skipped.

Those macro's are just a jungle!


Thanks,

James
diff mbox series

Patch

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index fda97b3fcf01..17d9fc5d14fb 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -110,6 +110,8 @@  stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9fb9fff08c94..06eedcc8a2c6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -733,6 +733,20 @@  config ARM64_ERRATUM_2077057
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_2658417
+	bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result"
+	default y
+	help
+	  This option adds the workaround for ARM Cortex-A510 erratum 2658417.
+	  Affected Cortex-A510 may produce the wrong result for BFMMLA or VMMLA
+	  instructions in rare circumstances when a pair of A510 CPUs are using
+	  shared neon hardware.
+	  As the sharing is not discoverable by the kernel, hide the BF16
+	  HWCAP to indicate that user-space should not be using these
+	  instructions.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_2119858
 	bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
 	default y
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 53b973b6059f..aef3245a8597 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -121,6 +121,22 @@  cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
 }
 
+static DEFINE_RAW_SPINLOCK(reg_user_mask_modification);
+static void __maybe_unused
+cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused)
+{
+	struct arm64_ftr_reg *regp;
+
+	regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	if (!regp)
+		return;
+
+	raw_spin_lock(&reg_user_mask_modification);
+	if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK)
+		regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
+	raw_spin_unlock(&reg_user_mask_modification);
+}
+
 #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)	\
 	.matches = is_affected_midr_range,			\
 	.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@@ -691,6 +707,17 @@  const struct arm64_cpu_capabilities arm64_errata[] = {
 		CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_2658417
+	{
+		.desc = "ARM erratum 2658417",
+		.capability = ARM64_WORKAROUND_2658417,
+		/* Cortex-A510 r0p0 - r1p1 */
+		ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+		MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.cpu_enable = cpu_clear_bf16_from_user_emulation,
+	},
 #endif
 	{
 	}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 63b2484ce6c3..f553a7cb1b07 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -68,6 +68,7 @@  WORKAROUND_2038923
 WORKAROUND_2064142
 WORKAROUND_2077057
 WORKAROUND_2457168
+WORKAROUND_2658417
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
 WORKAROUND_TRBE_WRITE_OUT_OF_RANGE