diff mbox series

[v2,1/2] arm64: errata: Remove AES hwcap for COMPAT tasks

Message ID 20220413170545.3042558-2-james.morse@arm.com (mailing list archive)
State New, archived
Headers show
Series ARM/arm64: errata: Remove AES hwcap for 32bit tasks on A57/A72 | expand

Commit Message

James Morse April 13, 2022, 5:05 p.m. UTC
Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
occurs between a pair of AES instructions in aarch32 mode may corrupt
the ELR. The task will subsequently produce the wrong AES result.

The AES instructions are part of the cryptographic extensions, which are
optional. User-space software will detect the support for these
instructions from the hwcaps. If the platform doesn't support these
instructions a software implementation should be used.

Remove the hwcap bits on affected parts to indicate user-space should
not use the AES instructions.

Signed-off-by: James Morse <james.morse@arm.com>
---
 Documentation/arm64/silicon-errata.rst |  4 ++++
 arch/arm64/Kconfig                     | 16 ++++++++++++++++
 arch/arm64/kernel/cpu_errata.c         | 16 ++++++++++++++++
 arch/arm64/kernel/cpufeature.c         | 11 ++++++++++-
 arch/arm64/tools/cpucaps               |  1 +
 5 files changed, 47 insertions(+), 1 deletion(-)

Comments

Ard Biesheuvel April 13, 2022, 5:33 p.m. UTC | #1
Hi James,

On Wed, 13 Apr 2022 at 19:06, James Morse <james.morse@arm.com> wrote:
>
> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
> occurs between a pair of AES instructions in aarch32 mode may corrupt
> the ELR. The task will subsequently produce the wrong AES result.
>
> The AES instructions are part of the cryptographic extensions, which are
> optional. User-space software will detect the support for these
> instructions from the hwcaps. If the platform doesn't support these
> instructions a software implementation should be used.
>
> Remove the hwcap bits on affected parts to indicate user-space should
> not use the AES instructions.
>
> Signed-off-by: James Morse <james.morse@arm.com>

Acked-by: Ard Biesheuvel <ardb@kernel.org>

One nit/question below,

> ---
>  Documentation/arm64/silicon-errata.rst |  4 ++++
>  arch/arm64/Kconfig                     | 16 ++++++++++++++++
>  arch/arm64/kernel/cpu_errata.c         | 16 ++++++++++++++++
>  arch/arm64/kernel/cpufeature.c         | 11 ++++++++++-
>  arch/arm64/tools/cpucaps               |  1 +
>  5 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index 466cb9e89047..053dc12696b5 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -82,10 +82,14 @@ stable kernels.
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| ARM            | Cortex-A57      | #1742098        | ARM64_ERRATUM_1742098       |
> ++----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A72      | #853709         | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| ARM            | Cortex-A72      | #1655431        | ARM64_ERRATUM_1742098       |
> ++----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 57c4c995965f..df19e60c4c46 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -491,6 +491,22 @@ config ARM64_ERRATUM_834220
>
>           If unsure, say Y.
>
> +config ARM64_ERRATUM_1742098
> +       bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
> +       depends on COMPAT
> +       default y
> +       help
> +         This option removes the AES hwcap for aarch32 user-space to
> +         workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
> +
> +         Affected parts may corrupt the AES state if an interrupt is
> +         taken between a pair of AES instructions. These instructions
> +         are only present if the cryptography extensions are present.
> +         All software should have a fallback implementation for CPUs
> +         that don't implement the cryptography extensions.
> +
> +         If unsure, say Y.
> +
>  config ARM64_ERRATUM_845719
>         bool "Cortex-A53: 845719: a load might read incorrect data"
>         depends on COMPAT
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 4c9b5b4b7a0b..8f85dac4cd79 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -393,6 +393,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
>  };
>  #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
>
> +#ifdef CONFIG_ARM64_ERRATUM_1742098
> +static struct midr_range broken_aarch32_aes[] = {
> +       MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),

Not sure it matters, but are you sure early A57 is affected as well?

> +       MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
> +       {},
> +};
> +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
> +
>  const struct arm64_cpu_capabilities arm64_errata[] = {
>  #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
>         {
> @@ -655,6 +663,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>                 /* Cortex-A510 r0p0 - r0p1 */
>                 ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
>         },
> +#endif
> +#ifdef CONFIG_ARM64_ERRATUM_1742098
> +       {
> +               .desc = "ARM erratum 1742098",
> +               .capability = ARM64_WORKAROUND_1742098,
> +               CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
> +               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
> +       },
>  #endif
>         {
>         }
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index d72c4b4d389c..3faf16f1c040 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1922,6 +1922,12 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>  }
>  #endif /* CONFIG_ARM64_MTE */
>
> +static void elf_hwcap_fixup(void)
> +{
> +       if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
> +               compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
> +}
> +
>  #ifdef CONFIG_KVM
>  static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
>  {
> @@ -3034,8 +3040,10 @@ void __init setup_cpu_features(void)
>         setup_system_capabilities();
>         setup_elf_hwcaps(arm64_elf_hwcaps);
>
> -       if (system_supports_32bit_el0())
> +       if (system_supports_32bit_el0()) {
>                 setup_elf_hwcaps(compat_elf_hwcaps);
> +               elf_hwcap_fixup();
> +       }
>
>         if (system_uses_ttbr0_pan())
>                 pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
> @@ -3087,6 +3095,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu)
>                                                          cpu_active_mask);
>         get_cpu_device(lucky_winner)->offline_disabled = true;
>         setup_elf_hwcaps(compat_elf_hwcaps);
> +       elf_hwcap_fixup();
>         pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
>                 cpu, lucky_winner);
>         return 0;
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 3ed418f70e3b..8cd6088f8875 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -58,6 +58,7 @@ WORKAROUND_1418040
>  WORKAROUND_1463225
>  WORKAROUND_1508412
>  WORKAROUND_1542419
> +WORKAROUND_1742098
>  WORKAROUND_1902691
>  WORKAROUND_2038923
>  WORKAROUND_2064142
> --
> 2.30.2
>
Will Deacon April 14, 2022, 10:03 a.m. UTC | #2
On Wed, Apr 13, 2022 at 06:05:44PM +0100, James Morse wrote:
> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
> occurs between a pair of AES instructions in aarch32 mode may corrupt
> the ELR. The task will subsequently produce the wrong AES result.
> 
> The AES instructions are part of the cryptographic extensions, which are
> optional. User-space software will detect the support for these
> instructions from the hwcaps. If the platform doesn't support these
> instructions a software implementation should be used.
> 
> Remove the hwcap bits on affected parts to indicate user-space should
> not use the AES instructions.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
>  Documentation/arm64/silicon-errata.rst |  4 ++++
>  arch/arm64/Kconfig                     | 16 ++++++++++++++++
>  arch/arm64/kernel/cpu_errata.c         | 16 ++++++++++++++++
>  arch/arm64/kernel/cpufeature.c         | 11 ++++++++++-
>  arch/arm64/tools/cpucaps               |  1 +
>  5 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
> index 466cb9e89047..053dc12696b5 100644
> --- a/Documentation/arm64/silicon-errata.rst
> +++ b/Documentation/arm64/silicon-errata.rst
> @@ -82,10 +82,14 @@ stable kernels.
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| ARM            | Cortex-A57      | #1742098        | ARM64_ERRATUM_1742098       |
> ++----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A72      | #853709         | N/A                         |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
>  +----------------+-----------------+-----------------+-----------------------------+
> +| ARM            | Cortex-A72      | #1655431        | ARM64_ERRATUM_1742098       |
> ++----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
>  +----------------+-----------------+-----------------+-----------------------------+
>  | ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 57c4c995965f..df19e60c4c46 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -491,6 +491,22 @@ config ARM64_ERRATUM_834220
>  
>  	  If unsure, say Y.
>  
> +config ARM64_ERRATUM_1742098
> +	bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
> +	depends on COMPAT
> +	default y
> +	help
> +	  This option removes the AES hwcap for aarch32 user-space to
> +	  workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
> +
> +	  Affected parts may corrupt the AES state if an interrupt is
> +	  taken between a pair of AES instructions. These instructions
> +	  are only present if the cryptography extensions are present.
> +	  All software should have a fallback implementation for CPUs
> +	  that don't implement the cryptography extensions.
> +
> +	  If unsure, say Y.
> +
>  config ARM64_ERRATUM_845719
>  	bool "Cortex-A53: 845719: a load might read incorrect data"
>  	depends on COMPAT
> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index 4c9b5b4b7a0b..8f85dac4cd79 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -393,6 +393,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
>  };
>  #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
>  
> +#ifdef CONFIG_ARM64_ERRATUM_1742098
> +static struct midr_range broken_aarch32_aes[] = {
> +	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
> +	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
> +	{},
> +};
> +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */

Comment here is wrong ^^^

> +
>  const struct arm64_cpu_capabilities arm64_errata[] = {
>  #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
>  	{
> @@ -655,6 +663,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
>  		/* Cortex-A510 r0p0 - r0p1 */
>  		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
>  	},
> +#endif
> +#ifdef CONFIG_ARM64_ERRATUM_1742098
> +	{
> +		.desc = "ARM erratum 1742098",
> +		.capability = ARM64_WORKAROUND_1742098,
> +		CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
> +		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
> +	},
>  #endif
>  	{
>  	}
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index d72c4b4d389c..3faf16f1c040 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1922,6 +1922,12 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>  }
>  #endif /* CONFIG_ARM64_MTE */
>  
> +static void elf_hwcap_fixup(void)
> +{
> +	if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
> +		compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
> +}

How does this deal with big/little if we late online an affected CPU?  It
would probably be easier if we treated these CPUs as not having the 32-bit
AES instructions at all (rather than removing the hwcap later), then the
early cap check would prevent late onlining.

Will
James Morse April 14, 2022, 5:43 p.m. UTC | #3
Hi Will,

(CC: +Suzuki)

On 14/04/2022 11:03, Will Deacon wrote:
> On Wed, Apr 13, 2022 at 06:05:44PM +0100, James Morse wrote:
>> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
>> occurs between a pair of AES instructions in aarch32 mode may corrupt
>> the ELR. The task will subsequently produce the wrong AES result.
>>
>> The AES instructions are part of the cryptographic extensions, which are
>> optional. User-space software will detect the support for these
>> instructions from the hwcaps. If the platform doesn't support these
>> instructions a software implementation should be used.
>>
>> Remove the hwcap bits on affected parts to indicate user-space should
>> not use the AES instructions.

>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>> index d72c4b4d389c..3faf16f1c040 100644
>> --- a/arch/arm64/kernel/cpufeature.c
>> +++ b/arch/arm64/kernel/cpufeature.c
>> @@ -1922,6 +1922,12 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>>  }
>>  #endif /* CONFIG_ARM64_MTE */
>>  
>> +static void elf_hwcap_fixup(void)
>> +{
>> +	if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
>> +		compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
>> +}
> 
> How does this deal with big/little if we late online an affected CPU?  It
> would probably be easier if we treated these CPUs as not having the 32-bit
> AES instructions at all (rather than removing the hwcap later), then the
> early cap check would prevent late onlining.

I thought any new CPU to online late with a new errata was rejected by the
type == ARM64_CPUCAP_LOCAL_CPU_ERRATUM. Suzuki's documentation in cpufeature.h has:
| However, it is not safe if a "late" CPU requires a workaround and the system hasn't
| enabled it already.

In this case verify_local_cpu_caps() would take the else for 'system_has_cap', and because
the cpu matches, but ARM64_CPUCAP_LOCAL_CPU_ERRATUM doesn't have the 'late cpu permitted'
bit set, it should call cpu_die_early().

That said - I haven't tested this configuration. (I'll give it a go with the model)


v1 did as you suggest - but the HWCAPs are built from the id registers, and touching the
id registers will regress KVM guest migration as the id registers are both visible to
Qemu, and invariant.


Thanks,

James
James Morse April 14, 2022, 5:45 p.m. UTC | #4
Hi Ard,

On 13/04/2022 18:33, Ard Biesheuvel wrote:
> On Wed, 13 Apr 2022 at 19:06, James Morse <james.morse@arm.com> wrote:
>>
>> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
>> occurs between a pair of AES instructions in aarch32 mode may corrupt
>> the ELR. The task will subsequently produce the wrong AES result.
>>
>> The AES instructions are part of the cryptographic extensions, which are
>> optional. User-space software will detect the support for these
>> instructions from the hwcaps. If the platform doesn't support these
>> instructions a software implementation should be used.
>>
>> Remove the hwcap bits on affected parts to indicate user-space should
>> not use the AES instructions.
>>
>> Signed-off-by: James Morse <james.morse@arm.com>
> 
> Acked-by: Ard Biesheuvel <ardb@kernel.org>

Thanks!

> One nit/question below,


>> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
>> index 4c9b5b4b7a0b..8f85dac4cd79 100644
>> --- a/arch/arm64/kernel/cpu_errata.c
>> +++ b/arch/arm64/kernel/cpu_errata.c
>> @@ -393,6 +393,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = {
>>  };
>>  #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
>>
>> +#ifdef CONFIG_ARM64_ERRATUM_1742098
>> +static struct midr_range broken_aarch32_aes[] = {
>> +       MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),

> Not sure it matters, but are you sure early A57 is affected as well?

That's what I remember reading last time too - but this is what the errata document on
developer.arm.com says. It's something I'm chasing up...


Thanks,

James
Will Deacon April 20, 2022, 10:17 a.m. UTC | #5
On Thu, Apr 14, 2022 at 06:43:32PM +0100, James Morse wrote:
> On 14/04/2022 11:03, Will Deacon wrote:
> > On Wed, Apr 13, 2022 at 06:05:44PM +0100, James Morse wrote:
> >> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
> >> occurs between a pair of AES instructions in aarch32 mode may corrupt
> >> the ELR. The task will subsequently produce the wrong AES result.
> >>
> >> The AES instructions are part of the cryptographic extensions, which are
> >> optional. User-space software will detect the support for these
> >> instructions from the hwcaps. If the platform doesn't support these
> >> instructions a software implementation should be used.
> >>
> >> Remove the hwcap bits on affected parts to indicate user-space should
> >> not use the AES instructions.
> 
> >> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> >> index d72c4b4d389c..3faf16f1c040 100644
> >> --- a/arch/arm64/kernel/cpufeature.c
> >> +++ b/arch/arm64/kernel/cpufeature.c
> >> @@ -1922,6 +1922,12 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
> >>  }
> >>  #endif /* CONFIG_ARM64_MTE */
> >>  
> >> +static void elf_hwcap_fixup(void)
> >> +{
> >> +	if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
> >> +		compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
> >> +}
> > 
> > How does this deal with big/little if we late online an affected CPU?  It
> > would probably be easier if we treated these CPUs as not having the 32-bit
> > AES instructions at all (rather than removing the hwcap later), then the
> > early cap check would prevent late onlining.
> 
> I thought any new CPU to online late with a new errata was rejected by the
> type == ARM64_CPUCAP_LOCAL_CPU_ERRATUM. Suzuki's documentation in cpufeature.h has:
> | However, it is not safe if a "late" CPU requires a workaround and the system hasn't
> | enabled it already.
> 
> In this case verify_local_cpu_caps() would take the else for 'system_has_cap', and because
> the cpu matches, but ARM64_CPUCAP_LOCAL_CPU_ERRATUM doesn't have the 'late cpu permitted'
> bit set, it should call cpu_die_early().
> 
> That said - I haven't tested this configuration. (I'll give it a go with the model)

Ah yes, that probably works, but please do test it to confirm.

> v1 did as you suggest - but the HWCAPs are built from the id registers, and touching the
> id registers will regress KVM guest migration as the id registers are both visible to
> Qemu, and invariant.

Hmm, is that really something we expect to work in general? It seems to me
that any erratum workaround which effectively removes functionality is going
to be a blocker for migration.

Will
James Morse July 14, 2022, 4:05 p.m. UTC | #6
Hi Will,

On 20/04/2022 11:17, Will Deacon wrote:
> On Thu, Apr 14, 2022 at 06:43:32PM +0100, James Morse wrote:
>> On 14/04/2022 11:03, Will Deacon wrote:
>>> On Wed, Apr 13, 2022 at 06:05:44PM +0100, James Morse wrote:
>>>> Cortex-A57 and Cortex-A72 have an erratum where an interrupt that
>>>> occurs between a pair of AES instructions in aarch32 mode may corrupt
>>>> the ELR. The task will subsequently produce the wrong AES result.
>>>>
>>>> The AES instructions are part of the cryptographic extensions, which are
>>>> optional. User-space software will detect the support for these
>>>> instructions from the hwcaps. If the platform doesn't support these
>>>> instructions a software implementation should be used.
>>>>
>>>> Remove the hwcap bits on affected parts to indicate user-space should
>>>> not use the AES instructions.
>>
>>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>>> index d72c4b4d389c..3faf16f1c040 100644
>>>> --- a/arch/arm64/kernel/cpufeature.c
>>>> +++ b/arch/arm64/kernel/cpufeature.c
>>>> @@ -1922,6 +1922,12 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
>>>>  }
>>>>  #endif /* CONFIG_ARM64_MTE */
>>>>  
>>>> +static void elf_hwcap_fixup(void)
>>>> +{
>>>> +	if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
>>>> +		compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
>>>> +}
>>>
>>> How does this deal with big/little if we late online an affected CPU?  It
>>> would probably be easier if we treated these CPUs as not having the 32-bit
>>> AES instructions at all (rather than removing the hwcap later), then the
>>> early cap check would prevent late onlining.
>>
>> I thought any new CPU to online late with a new errata was rejected by the
>> type == ARM64_CPUCAP_LOCAL_CPU_ERRATUM. Suzuki's documentation in cpufeature.h has:
>> | However, it is not safe if a "late" CPU requires a workaround and the system hasn't
>> | enabled it already.
>>
>> In this case verify_local_cpu_caps() would take the else for 'system_has_cap', and because
>> the cpu matches, but ARM64_CPUCAP_LOCAL_CPU_ERRATUM doesn't have the 'late cpu permitted'
>> bit set, it should call cpu_die_early().
>>
>> That said - I haven't tested this configuration. (I'll give it a go with the model)
> 
> Ah yes, that probably works, but please do test it to confirm.
> 
>> v1 did as you suggest - but the HWCAPs are built from the id registers, and touching the
>> id registers will regress KVM guest migration as the id registers are both visible to
>> Qemu, and invariant.

> Hmm, is that really something we expect to work in general? It seems to me
> that any erratum workaround which effectively removes functionality is going
> to be a blocker for migration.

The workaround is only for the host. The guest may already have the workaround. I think
these things should be kept separate unless the guest would be broken by the workaround.

We don't normally apply workaround for EL1 from EL2 unless its needed by the host. Its up
to the guest to have its own workaround.


Thanks,

James
diff mbox series

Patch

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 466cb9e89047..053dc12696b5 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -82,10 +82,14 @@  stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A57      | #1319537        | ARM64_ERRATUM_1319367       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A57      | #1742098        | ARM64_ERRATUM_1742098       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #853709         | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A72      | #1319367        | ARM64_ERRATUM_1319367       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A72      | #1655431        | ARM64_ERRATUM_1742098       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A76      | #1188873,1418040| ARM64_ERRATUM_1418040       |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 57c4c995965f..df19e60c4c46 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -491,6 +491,22 @@  config ARM64_ERRATUM_834220
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1742098
+	bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence"
+	depends on COMPAT
+	default y
+	help
+	  This option removes the AES hwcap for aarch32 user-space to
+	  workaround erratum 1742098 on Cortex-A57 and Cortex-A72.
+
+	  Affected parts may corrupt the AES state if an interrupt is
+	  taken between a pair of AES instructions. These instructions
+	  are only present if the cryptography extensions are present.
+	  All software should have a fallback implementation for CPUs
+	  that don't implement the cryptography extensions.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_845719
 	bool "Cortex-A53: 845719: a load might read incorrect data"
 	depends on COMPAT
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 4c9b5b4b7a0b..8f85dac4cd79 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -393,6 +393,14 @@  static struct midr_range trbe_write_out_of_range_cpus[] = {
 };
 #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
 
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+static struct midr_range broken_aarch32_aes[] = {
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+	{},
+};
+#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
@@ -655,6 +663,14 @@  const struct arm64_cpu_capabilities arm64_errata[] = {
 		/* Cortex-A510 r0p0 - r0p1 */
 		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1)
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1742098
+	{
+		.desc = "ARM erratum 1742098",
+		.capability = ARM64_WORKAROUND_1742098,
+		CAP_MIDR_RANGE_LIST(broken_aarch32_aes),
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+	},
 #endif
 	{
 	}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d72c4b4d389c..3faf16f1c040 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1922,6 +1922,12 @@  static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 }
 #endif /* CONFIG_ARM64_MTE */
 
+static void elf_hwcap_fixup(void)
+{
+	if (cpus_have_const_cap(ARM64_WORKAROUND_1742098))
+		compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES;
+}
+
 #ifdef CONFIG_KVM
 static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
 {
@@ -3034,8 +3040,10 @@  void __init setup_cpu_features(void)
 	setup_system_capabilities();
 	setup_elf_hwcaps(arm64_elf_hwcaps);
 
-	if (system_supports_32bit_el0())
+	if (system_supports_32bit_el0()) {
 		setup_elf_hwcaps(compat_elf_hwcaps);
+		elf_hwcap_fixup();
+	}
 
 	if (system_uses_ttbr0_pan())
 		pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
@@ -3087,6 +3095,7 @@  static int enable_mismatched_32bit_el0(unsigned int cpu)
 							 cpu_active_mask);
 	get_cpu_device(lucky_winner)->offline_disabled = true;
 	setup_elf_hwcaps(compat_elf_hwcaps);
+	elf_hwcap_fixup();
 	pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
 		cpu, lucky_winner);
 	return 0;
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 3ed418f70e3b..8cd6088f8875 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -58,6 +58,7 @@  WORKAROUND_1418040
 WORKAROUND_1463225
 WORKAROUND_1508412
 WORKAROUND_1542419
+WORKAROUND_1742098
 WORKAROUND_1902691
 WORKAROUND_2038923
 WORKAROUND_2064142