diff mbox series

[1/2] arm64: errata: Work around AmpereOne's erratum AC03_CPU_36

Message ID 20250415154711.1698544-1-scott@os.amperecomputing.com (mailing list archive)
State New
Headers show
Series [1/2] arm64: errata: Work around AmpereOne's erratum AC03_CPU_36 | expand

Commit Message

D Scott Phillips April 15, 2025, 3:47 p.m. UTC
AC03_CPU_36 can cause asynchronous exceptions to be routed to the wrong
exception level if an async exception coincides with an update to the
controls for the target exception level in HCR_EL2. On affected
machines, always do writes to HCR_EL2 with async exceptions blocked.

Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
---
 arch/arm64/Kconfig              | 17 +++++++++++++++++
 arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++--
 arch/arm64/kernel/cpu_errata.c  | 14 ++++++++++++++
 arch/arm64/tools/cpucaps        |  1 +
 4 files changed, 48 insertions(+), 2 deletions(-)

Comments

Oliver Upton April 15, 2025, 5:12 p.m. UTC | #1
On Tue, Apr 15, 2025 at 08:47:10AM -0700, D Scott Phillips wrote:
> AC03_CPU_36 can cause asynchronous exceptions to be routed to the wrong
> exception level if an async exception coincides with an update to the
> controls for the target exception level in HCR_EL2. On affected
> machines, always do writes to HCR_EL2 with async exceptions blocked.
> 
> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
> ---
>  arch/arm64/Kconfig              | 17 +++++++++++++++++
>  arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++--
>  arch/arm64/kernel/cpu_errata.c  | 14 ++++++++++++++
>  arch/arm64/tools/cpucaps        |  1 +
>  4 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index a182295e6f08b..e5fd87446a3b8 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -445,6 +445,23 @@ menu "Kernel Features"
>  
>  menu "ARM errata workarounds via the alternatives framework"
>  
> +config AMPERE_ERRATUM_AC03_CPU_36
> +        bool "AmpereOne: AC03_CPU_36: CPU can take an invalid exception, if an asynchronous exception to EL2 occurs while EL2 software is changing the EL2 exception controls."
> +	default y
> +	help
> +	  This option adds an alternative code sequence to work around Ampere
> +	  errata AC03_CPU_36 on AmpereOne.
> +
> +	  If an async exception happens at the same time as an update to the
> +	  controls for the target EL for async exceptions, an exception can be
> +	  delivered to the wrong EL. For example, an EL may be routed from EL2
> +	  to EL1.
> +
> +	  The workaround masks all asynchronous exception types when writing
> +	  to HCR_EL2.
> +
> +	  If unsure, say Y.
> +
>  config AMPERE_ERRATUM_AC03_CPU_38
>          bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
>  	default y
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 2639d3633073d..e7781f7e7f7a7 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -1136,14 +1136,28 @@
>  	__val;							\
>  })
>  
> +#define __sysreg_is_hcr_el2(r)					\
> +	(__builtin_strcmp("hcr_el2", __stringify(r)) == 0)

This looks fragile. What about:

	write_sysreg(hcr, HCR_EL2);

or:

	write_sysreg_s(hcr, SYS_HCR_EL2);


Thanks,
Oliver
D Scott Phillips April 15, 2025, 5:30 p.m. UTC | #2
Oliver Upton <oliver.upton@linux.dev> writes:

> On Tue, Apr 15, 2025 at 08:47:10AM -0700, D Scott Phillips wrote:
>> AC03_CPU_36 can cause asynchronous exceptions to be routed to the wrong
>> exception level if an async exception coincides with an update to the
>> controls for the target exception level in HCR_EL2. On affected
>> machines, always do writes to HCR_EL2 with async exceptions blocked.
>> 
>> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
>> ---
>>  arch/arm64/Kconfig              | 17 +++++++++++++++++
>>  arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++--
>>  arch/arm64/kernel/cpu_errata.c  | 14 ++++++++++++++
>>  arch/arm64/tools/cpucaps        |  1 +
>>  4 files changed, 48 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index a182295e6f08b..e5fd87446a3b8 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -445,6 +445,23 @@ menu "Kernel Features"
>>  
>>  menu "ARM errata workarounds via the alternatives framework"
>>  
>> +config AMPERE_ERRATUM_AC03_CPU_36
>> +        bool "AmpereOne: AC03_CPU_36: CPU can take an invalid exception, if an asynchronous exception to EL2 occurs while EL2 software is changing the EL2 exception controls."
>> +	default y
>> +	help
>> +	  This option adds an alternative code sequence to work around Ampere
>> +	  errata AC03_CPU_36 on AmpereOne.
>> +
>> +	  If an async exception happens at the same time as an update to the
>> +	  controls for the target EL for async exceptions, an exception can be
>> +	  delivered to the wrong EL. For example, an EL may be routed from EL2
>> +	  to EL1.
>> +
>> +	  The workaround masks all asynchronous exception types when writing
>> +	  to HCR_EL2.
>> +
>> +	  If unsure, say Y.
>> +
>>  config AMPERE_ERRATUM_AC03_CPU_38
>>          bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
>>  	default y
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 2639d3633073d..e7781f7e7f7a7 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -1136,14 +1136,28 @@
>>  	__val;							\
>>  })
>>  
>> +#define __sysreg_is_hcr_el2(r)					\
>> +	(__builtin_strcmp("hcr_el2", __stringify(r)) == 0)
>
> This looks fragile. What about:
>
> 	write_sysreg(hcr, HCR_EL2);
>
> or:
>
> 	write_sysreg_s(hcr, SYS_HCR_EL2);

I had also thought about changing the users of write_sysreg(..hcr_el2)
to some new function write_hcr_el2() or something, but I guess that
would have the same fragility. Any suggestions on a better way? Trying
harder with the string stuff, or do something totally else?
Oliver Upton April 15, 2025, 6:12 p.m. UTC | #3
On Tue, Apr 15, 2025 at 10:30:36AM -0700, D Scott Phillips wrote:
> Oliver Upton <oliver.upton@linux.dev> writes:
> > On Tue, Apr 15, 2025 at 08:47:10AM -0700, D Scott Phillips wrote:
> >> AC03_CPU_36 can cause asynchronous exceptions to be routed to the wrong
> >> exception level if an async exception coincides with an update to the
> >> controls for the target exception level in HCR_EL2. On affected
> >> machines, always do writes to HCR_EL2 with async exceptions blocked.
> >> 
> >> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
> >> ---
> >>  arch/arm64/Kconfig              | 17 +++++++++++++++++
> >>  arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++--
> >>  arch/arm64/kernel/cpu_errata.c  | 14 ++++++++++++++
> >>  arch/arm64/tools/cpucaps        |  1 +
> >>  4 files changed, 48 insertions(+), 2 deletions(-)
> >> 
> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> >> index a182295e6f08b..e5fd87446a3b8 100644
> >> --- a/arch/arm64/Kconfig
> >> +++ b/arch/arm64/Kconfig
> >> @@ -445,6 +445,23 @@ menu "Kernel Features"
> >>  
> >>  menu "ARM errata workarounds via the alternatives framework"
> >>  
> >> +config AMPERE_ERRATUM_AC03_CPU_36
> >> +        bool "AmpereOne: AC03_CPU_36: CPU can take an invalid exception, if an asynchronous exception to EL2 occurs while EL2 software is changing the EL2 exception controls."
> >> +	default y
> >> +	help
> >> +	  This option adds an alternative code sequence to work around Ampere
> >> +	  errata AC03_CPU_36 on AmpereOne.
> >> +
> >> +	  If an async exception happens at the same time as an update to the
> >> +	  controls for the target EL for async exceptions, an exception can be
> >> +	  delivered to the wrong EL. For example, an EL may be routed from EL2
> >> +	  to EL1.
> >> +
> >> +	  The workaround masks all asynchronous exception types when writing
> >> +	  to HCR_EL2.
> >> +
> >> +	  If unsure, say Y.
> >> +
> >>  config AMPERE_ERRATUM_AC03_CPU_38
> >>          bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
> >>  	default y
> >> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> >> index 2639d3633073d..e7781f7e7f7a7 100644
> >> --- a/arch/arm64/include/asm/sysreg.h
> >> +++ b/arch/arm64/include/asm/sysreg.h
> >> @@ -1136,14 +1136,28 @@
> >>  	__val;							\
> >>  })
> >>  
> >> +#define __sysreg_is_hcr_el2(r)					\
> >> +	(__builtin_strcmp("hcr_el2", __stringify(r)) == 0)
> >
> > This looks fragile. What about:
> >
> > 	write_sysreg(hcr, HCR_EL2);
> >
> > or:
> >
> > 	write_sysreg_s(hcr, SYS_HCR_EL2);
> 
> I had also thought about changing the users of write_sysreg(..hcr_el2)
> to some new function write_hcr_el2() or something, but I guess that
> would have the same fragility. Any suggestions on a better way? Trying
> harder with the string stuff, or do something totally else?

I think the least bad approach would be to convert to HCR-specific
accessors. It's the most likely to encourage folks to respect the errata
mitigation + keeps the ugliness out of unrelated common helpers.

Thanks,
Oliver
D Scott Phillips April 15, 2025, 6:17 p.m. UTC | #4
Oliver Upton <oliver.upton@linux.dev> writes:

> On Tue, Apr 15, 2025 at 10:30:36AM -0700, D Scott Phillips wrote:
>> Oliver Upton <oliver.upton@linux.dev> writes:
>> > On Tue, Apr 15, 2025 at 08:47:10AM -0700, D Scott Phillips wrote:
>> >> AC03_CPU_36 can cause asynchronous exceptions to be routed to the wrong
>> >> exception level if an async exception coincides with an update to the
>> >> controls for the target exception level in HCR_EL2. On affected
>> >> machines, always do writes to HCR_EL2 with async exceptions blocked.
>> >> 
>> >> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
>> >> ---
>> >>  arch/arm64/Kconfig              | 17 +++++++++++++++++
>> >>  arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++--
>> >>  arch/arm64/kernel/cpu_errata.c  | 14 ++++++++++++++
>> >>  arch/arm64/tools/cpucaps        |  1 +
>> >>  4 files changed, 48 insertions(+), 2 deletions(-)
>> >> 
>> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> >> index a182295e6f08b..e5fd87446a3b8 100644
>> >> --- a/arch/arm64/Kconfig
>> >> +++ b/arch/arm64/Kconfig
>> >> @@ -445,6 +445,23 @@ menu "Kernel Features"
>> >>  
>> >>  menu "ARM errata workarounds via the alternatives framework"
>> >>  
>> >> +config AMPERE_ERRATUM_AC03_CPU_36
>> >> +        bool "AmpereOne: AC03_CPU_36: CPU can take an invalid exception, if an asynchronous exception to EL2 occurs while EL2 software is changing the EL2 exception controls."
>> >> +	default y
>> >> +	help
>> >> +	  This option adds an alternative code sequence to work around Ampere
>> >> +	  errata AC03_CPU_36 on AmpereOne.
>> >> +
>> >> +	  If an async exception happens at the same time as an update to the
>> >> +	  controls for the target EL for async exceptions, an exception can be
>> >> +	  delivered to the wrong EL. For example, an EL may be routed from EL2
>> >> +	  to EL1.
>> >> +
>> >> +	  The workaround masks all asynchronous exception types when writing
>> >> +	  to HCR_EL2.
>> >> +
>> >> +	  If unsure, say Y.
>> >> +
>> >>  config AMPERE_ERRATUM_AC03_CPU_38
>> >>          bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
>> >>  	default y
>> >> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> >> index 2639d3633073d..e7781f7e7f7a7 100644
>> >> --- a/arch/arm64/include/asm/sysreg.h
>> >> +++ b/arch/arm64/include/asm/sysreg.h
>> >> @@ -1136,14 +1136,28 @@
>> >>  	__val;							\
>> >>  })
>> >>  
>> >> +#define __sysreg_is_hcr_el2(r)					\
>> >> +	(__builtin_strcmp("hcr_el2", __stringify(r)) == 0)
>> >
>> > This looks fragile. What about:
>> >
>> > 	write_sysreg(hcr, HCR_EL2);
>> >
>> > or:
>> >
>> > 	write_sysreg_s(hcr, SYS_HCR_EL2);
>> 
>> I had also thought about changing the users of write_sysreg(..hcr_el2)
>> to some new function write_hcr_el2() or something, but I guess that
>> would have the same fragility. Any suggestions on a better way? Trying
>> harder with the string stuff, or do something totally else?
>
> I think the least bad approach would be to convert to HCR-specific
> accessors. It's the most likely to encourage folks to respect the errata
> mitigation + keeps the ugliness out of unrelated common helpers.

OK, will do
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a182295e6f08b..e5fd87446a3b8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -445,6 +445,23 @@  menu "Kernel Features"
 
 menu "ARM errata workarounds via the alternatives framework"
 
+config AMPERE_ERRATUM_AC03_CPU_36
+        bool "AmpereOne: AC03_CPU_36: CPU can take an invalid exception, if an asynchronous exception to EL2 occurs while EL2 software is changing the EL2 exception controls."
+	default y
+	help
+	  This option adds an alternative code sequence to work around Ampere
+	  errata AC03_CPU_36 on AmpereOne.
+
+	  If an async exception happens at the same time as an update to the
+	  controls for the target EL for async exceptions, an exception can be
+	  delivered to the wrong EL. For example, an EL may be routed from EL2
+	  to EL1.
+
+	  The workaround masks all asynchronous exception types when writing
+	  to HCR_EL2.
+
+	  If unsure, say Y.
+
 config AMPERE_ERRATUM_AC03_CPU_38
         bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
 	default y
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 2639d3633073d..e7781f7e7f7a7 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1136,14 +1136,28 @@ 
 	__val;							\
 })
 
+#define __sysreg_is_hcr_el2(r)					\
+	(__builtin_strcmp("hcr_el2", __stringify(r)) == 0)
+#define __hcr_el2_ac03_cpu_36(r)				\
+	(IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC03_CPU_36) &&	\
+	 __sysreg_is_hcr_el2(r) &&				\
+	 alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC03_CPU_36))
+
 /*
  * The "Z" constraint normally means a zero immediate, but when combined with
  * the "%x0" template means XZR.
  */
 #define write_sysreg(v, r) do {					\
 	u64 __val = (u64)(v);					\
-	asm volatile("msr " __stringify(r) ", %x0"		\
-		     : : "rZ" (__val));				\
+	if (__hcr_el2_ac03_cpu_36(r)) {				\
+		u64 __daif;					\
+		asm volatile("mrs %0, daif; msr daifset, #0xf;"	\
+			     "msr hcr_el2, %x1; msr daif, %0"	\
+		: "=&r"(__daif) : "rZ" (__val));		\
+	} else {						\
+		asm volatile("msr " __stringify(r) ", %x0"	\
+			     : : "rZ" (__val));			\
+	}							\
 } while (0)
 
 /*
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b55f5f7057502..89be85bf631fd 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -549,6 +549,13 @@  static const struct midr_range erratum_spec_ssbs_list[] = {
 };
 #endif
 
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_36
+static const struct midr_range erratum_ac03_cpu_36_list[] = {
+	MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+	{},
+};
+#endif
+
 #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
 static const struct midr_range erratum_ac03_cpu_38_list[] = {
 	MIDR_ALL_VERSIONS(MIDR_AMPERE1),
@@ -869,6 +876,13 @@  const struct arm64_cpu_capabilities arm64_errata[] = {
 		ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list),
 	},
 #endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_36
+	{
+		.desc = "AmpereOne erratum AC03_CPU_36",
+		.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_36,
+		ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_36_list),
+	},
+#endif
 #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
 	{
 		.desc = "AmpereOne erratum AC03_CPU_38",
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 772c1b008e437..f430fd5900d15 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -93,6 +93,7 @@  WORKAROUND_2077057
 WORKAROUND_2457168
 WORKAROUND_2645198
 WORKAROUND_2658417
+WORKAROUND_AMPERE_AC03_CPU_36
 WORKAROUND_AMPERE_AC03_CPU_38
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE