diff mbox series

[v4] arm64: errata: Fix exec handling in erratum 1418040 workaround

Message ID 20211217211920.2004032-1-scott@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series [v4] arm64: errata: Fix exec handling in erratum 1418040 workaround | expand

Commit Message

D Scott Phillips Dec. 17, 2021, 9:19 p.m. UTC
The erratum 1418040 workaround enables vct access trapping when executing
compat threads. The workaround is applied when switching between tasks, but
the need for the workaround could also change at an exec(), when a
non-compat task execs a compat binary or vice versa. Apply the workaround
in arch_setup_new_exec().

The leaves a small window of time between SET_PERSONALITY and
arch_setup_new_exec where preemption could occur and confuse the old
workaround logic that compares TIF_32BIT between prev and next. Instead, we
can just read cntkctl to make sure it's in the state that the next task
needs. I measured cntkctl read time to be about the same as a mov from a
general-purpose register on N1. Update the workaround logic to examine the
current value of cntkctl instead of the previous task's compat state.

Fixes: d49f7d7376d0 ("arm64: Move handling of erratum 1418040 into C code")
Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
Cc: <stable@vger.kernel.org> # 5.4.x
---

 v4: - Move exec() handling into arch_setup_new_exec(), drop prev32==next32
       comparison to fix possible confusion in the small window between
       SET_PERSONALITY() and arch_setup_new_exec(). (Catalin)

 v3: - Un-nest conditionals (Marc)

 v2: - Use sysreg_clear_set instead of open coding (Marc)
     - guard this_cpu_has_cap() check under IS_ENABLED() to avoid tons of
       WARN_ON(preemptible()) when built with !CONFIG_ARM64_ERRATUM_1418040

 arch/arm64/kernel/process.c | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

Comments

D Scott Phillips Dec. 17, 2021, 9:36 p.m. UTC | #1
D Scott Phillips <scott@os.amperecomputing.com> writes:

> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index aacf2f5559a8..b37ff23e625e 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -439,34 +439,23 @@ static void entry_task_switch(struct task_struct *next)
>  
>  /*
>   * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> - * Assuming the virtual counter is enabled at the beginning of times:
> - *
> - * - disable access when switching from a 64bit task to a 32bit task
> - * - enable access when switching from a 32bit task to a 64bit task
> + * Ensure access is disabled when switching to a 32bit task, ensure
> + * access is enabled when switching to a 64bit task.
>   */
> -static void erratum_1418040_thread_switch(struct task_struct *prev,
> -					  struct task_struct *next)
> +static void erratum_1418040_thread_switch(struct task_struct *next)
>  {
> -	bool prev32, next32;
> -	u64 val;
> -
> -	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
> -		return;
> +	preempt_disable();
>  
> -	prev32 = is_compat_thread(task_thread_info(prev));
> -	next32 = is_compat_thread(task_thread_info(next));
> -
> -	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
> +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
> +	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
>  		return;

Oops, missing preempt_enable() here.
Marc Zyngier Dec. 20, 2021, 12:03 p.m. UTC | #2
On Fri, 17 Dec 2021 21:19:20 +0000,
D Scott Phillips <scott@os.amperecomputing.com> wrote:
> 
> The erratum 1418040 workaround enables vct access trapping when executing

nit: s/vct/CNTVCT_EL1/.

> compat threads. The workaround is applied when switching between tasks, but
> the need for the workaround could also change at an exec(), when a
> non-compat task execs a compat binary or vice versa. Apply the workaround
> in arch_setup_new_exec().
> 
> The leaves a small window of time between SET_PERSONALITY and
> arch_setup_new_exec where preemption could occur and confuse the old
> workaround logic that compares TIF_32BIT between prev and next. Instead, we
> can just read cntkctl to make sure it's in the state that the next task
> needs. I measured cntkctl read time to be about the same as a mov from a
> general-purpose register on N1. Update the workaround logic to examine the
> current value of cntkctl instead of the previous task's compat state.
> 
> Fixes: d49f7d7376d0 ("arm64: Move handling of erratum 1418040 into C code")
> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
> Cc: <stable@vger.kernel.org> # 5.4.x
> ---
> 
>  v4: - Move exec() handling into arch_setup_new_exec(), drop prev32==next32
>        comparison to fix possible confusion in the small window between
>        SET_PERSONALITY() and arch_setup_new_exec(). (Catalin)
> 
>  v3: - Un-nest conditionals (Marc)
> 
>  v2: - Use sysreg_clear_set instead of open coding (Marc)
>      - guard this_cpu_has_cap() check under IS_ENABLED() to avoid tons of
>        WARN_ON(preemptible()) when built with !CONFIG_ARM64_ERRATUM_1418040
> 
>  arch/arm64/kernel/process.c | 34 ++++++++++++----------------------
>  1 file changed, 12 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index aacf2f5559a8..b37ff23e625e 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -439,34 +439,23 @@ static void entry_task_switch(struct task_struct *next)
>  
>  /*
>   * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> - * Assuming the virtual counter is enabled at the beginning of times:
> - *
> - * - disable access when switching from a 64bit task to a 32bit task
> - * - enable access when switching from a 32bit task to a 64bit task
> + * Ensure access is disabled when switching to a 32bit task, ensure
> + * access is enabled when switching to a 64bit task.
>   */
> -static void erratum_1418040_thread_switch(struct task_struct *prev,
> -					  struct task_struct *next)
> +static void erratum_1418040_thread_switch(struct task_struct *next)
>  {
> -	bool prev32, next32;
> -	u64 val;
> -
> -	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
> -		return;
> +	preempt_disable();

I'd rather avoid this on the __switch_to() path. We're guaranteed to
be non-preemptible when called from there, and we want it to be as
fast as possible. It would also avoid the bug on the early return just
below.

>
> -	prev32 = is_compat_thread(task_thread_info(prev));
> -	next32 = is_compat_thread(task_thread_info(next));
> -
> -	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
> +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
> +	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
>  		return;
>  
> -	val = read_sysreg(cntkctl_el1);
> -
> -	if (!next32)
> -		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
> +	if (is_compat_thread(task_thread_info(next)))
> +		sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
>  	else
> -		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
> +		sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
>  
> -	write_sysreg(val, cntkctl_el1);
> +	preempt_enable();
>  }
>  
>  /*
> @@ -501,7 +490,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>  	contextidr_thread_switch(next);
>  	entry_task_switch(next);
>  	ssbs_thread_switch(next);
> -	erratum_1418040_thread_switch(prev, next);
> +	erratum_1418040_thread_switch(next);
>  	ptrauth_thread_switch_user(next);
>  
>  	/*
> @@ -611,6 +600,7 @@ void arch_setup_new_exec(void)
>  	current->mm->context.flags = mmflags;
>  	ptrauth_thread_init_user();
>  	mte_thread_init_user();
> +	erratum_1418040_thread_switch(current);

But what is the point of this now? As you enter __switch_to(), the TIF
flags are set in stone for this particular return to userspace.

Since you are now evaluating the state of CNTKCTL_EL1 on each and
every switch, you are guaranteed to set the enable bit to the right
value on each return to userspace, even if you have gone via
SET_PERSONALITY().

Am I missing something?

Thanks,

	M.
D Scott Phillips Dec. 20, 2021, 4:40 p.m. UTC | #3
Marc Zyngier <maz@kernel.org> writes:

> On Fri, 17 Dec 2021 21:19:20 +0000,
> D Scott Phillips <scott@os.amperecomputing.com> wrote:
>> 
>> The erratum 1418040 workaround enables vct access trapping when executing
>
> nit: s/vct/CNTVCT_EL1/.

fixed, thanks

>> compat threads. The workaround is applied when switching between tasks, but
>> the need for the workaround could also change at an exec(), when a
>> non-compat task execs a compat binary or vice versa. Apply the workaround
>> in arch_setup_new_exec().
>> 
>> The leaves a small window of time between SET_PERSONALITY and
>> arch_setup_new_exec where preemption could occur and confuse the old
>> workaround logic that compares TIF_32BIT between prev and next. Instead, we
>> can just read cntkctl to make sure it's in the state that the next task
>> needs. I measured cntkctl read time to be about the same as a mov from a
>> general-purpose register on N1. Update the workaround logic to examine the
>> current value of cntkctl instead of the previous task's compat state.
>> 
>> Fixes: d49f7d7376d0 ("arm64: Move handling of erratum 1418040 into C code")
>> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
>> Cc: <stable@vger.kernel.org> # 5.4.x
>> ---
>> 
>>  v4: - Move exec() handling into arch_setup_new_exec(), drop prev32==next32
>>        comparison to fix possible confusion in the small window between
>>        SET_PERSONALITY() and arch_setup_new_exec(). (Catalin)
>> 
>>  v3: - Un-nest conditionals (Marc)
>> 
>>  v2: - Use sysreg_clear_set instead of open coding (Marc)
>>      - guard this_cpu_has_cap() check under IS_ENABLED() to avoid tons of
>>        WARN_ON(preemptible()) when built with !CONFIG_ARM64_ERRATUM_1418040
>> 
>>  arch/arm64/kernel/process.c | 34 ++++++++++++----------------------
>>  1 file changed, 12 insertions(+), 22 deletions(-)
>> 
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index aacf2f5559a8..b37ff23e625e 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -439,34 +439,23 @@ static void entry_task_switch(struct task_struct *next)
>>  
>>  /*
>>   * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
>> - * Assuming the virtual counter is enabled at the beginning of times:
>> - *
>> - * - disable access when switching from a 64bit task to a 32bit task
>> - * - enable access when switching from a 32bit task to a 64bit task
>> + * Ensure access is disabled when switching to a 32bit task, ensure
>> + * access is enabled when switching to a 64bit task.
>>   */
>> -static void erratum_1418040_thread_switch(struct task_struct *prev,
>> -					  struct task_struct *next)
>> +static void erratum_1418040_thread_switch(struct task_struct *next)
>>  {
>> -	bool prev32, next32;
>> -	u64 val;
>> -
>> -	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
>> -		return;
>> +	preempt_disable();
>
> I'd rather avoid this on the __switch_to() path. We're guaranteed to
> be non-preemptible when called from there, and we want it to be as
> fast as possible. It would also avoid the bug on the early return just
> below.

Yes, makes sense. I've added an erratum_1418040_new_exec() helper that
does preempt_disable/erratum_14518040_thread_switch(current)/preempt_enable,
and called it from arch_setup_new_exec().

>>
>> -	prev32 = is_compat_thread(task_thread_info(prev));
>> -	next32 = is_compat_thread(task_thread_info(next));
>> -
>> -	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
>> +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
>> +	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
>>  		return;
>>  
>> -	val = read_sysreg(cntkctl_el1);
>> -
>> -	if (!next32)
>> -		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
>> +	if (is_compat_thread(task_thread_info(next)))
>> +		sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
>>  	else
>> -		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
>> +		sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
>>  
>> -	write_sysreg(val, cntkctl_el1);
>> +	preempt_enable();
>>  }
>>  
>>  /*
>> @@ -501,7 +490,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>>  	contextidr_thread_switch(next);
>>  	entry_task_switch(next);
>>  	ssbs_thread_switch(next);
>> -	erratum_1418040_thread_switch(prev, next);
>> +	erratum_1418040_thread_switch(next);
>>  	ptrauth_thread_switch_user(next);
>>  
>>  	/*
>> @@ -611,6 +600,7 @@ void arch_setup_new_exec(void)
>>  	current->mm->context.flags = mmflags;
>>  	ptrauth_thread_init_user();
>>  	mte_thread_init_user();
>> +	erratum_1418040_thread_switch(current);
>
> But what is the point of this now? As you enter __switch_to(), the TIF
> flags are set in stone for this particular return to userspace.
>
> Since you are now evaluating the state of CNTKCTL_EL1 on each and
> every switch, you are guaranteed to set the enable bit to the right
> value on each return to userspace, even if you have gone via
> SET_PERSONALITY().
>
> Am I missing something?

The workaround in __switch_to isn't happening for every return to
userspace, but rather for every scheduler task switch. When a process
exec()s, no switch happens at that point. From the scheduler point of
view, this is still the same task.  So in the time period between
exec()ing a compat task from non-compat (or vice versa) and the first
time it gets switched out, we would apply the wrong workaround state,
unless we make a change to cntkctl from exec() before returning back to
EL0.

Thanks,

Scott
Marc Zyngier Dec. 20, 2021, 5:36 p.m. UTC | #4
On Mon, 20 Dec 2021 16:40:45 +0000,
D Scott Phillips <scott@os.amperecomputing.com> wrote:
> 
> Marc Zyngier <maz@kernel.org> writes:
> 
> > On Fri, 17 Dec 2021 21:19:20 +0000,
> > D Scott Phillips <scott@os.amperecomputing.com> wrote:
> >> 
> >> The erratum 1418040 workaround enables vct access trapping when executing
> >
> > nit: s/vct/CNTVCT_EL1/.
> 
> fixed, thanks
> 
> >> compat threads. The workaround is applied when switching between tasks, but
> >> the need for the workaround could also change at an exec(), when a
> >> non-compat task execs a compat binary or vice versa. Apply the workaround
> >> in arch_setup_new_exec().
> >> 
> >> The leaves a small window of time between SET_PERSONALITY and
> >> arch_setup_new_exec where preemption could occur and confuse the old
> >> workaround logic that compares TIF_32BIT between prev and next. Instead, we
> >> can just read cntkctl to make sure it's in the state that the next task
> >> needs. I measured cntkctl read time to be about the same as a mov from a
> >> general-purpose register on N1. Update the workaround logic to examine the
> >> current value of cntkctl instead of the previous task's compat state.
> >> 
> >> Fixes: d49f7d7376d0 ("arm64: Move handling of erratum 1418040 into C code")
> >> Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
> >> Cc: <stable@vger.kernel.org> # 5.4.x
> >> ---
> >> 
> >>  v4: - Move exec() handling into arch_setup_new_exec(), drop prev32==next32
> >>        comparison to fix possible confusion in the small window between
> >>        SET_PERSONALITY() and arch_setup_new_exec(). (Catalin)
> >> 
> >>  v3: - Un-nest conditionals (Marc)
> >> 
> >>  v2: - Use sysreg_clear_set instead of open coding (Marc)
> >>      - guard this_cpu_has_cap() check under IS_ENABLED() to avoid tons of
> >>        WARN_ON(preemptible()) when built with !CONFIG_ARM64_ERRATUM_1418040
> >> 
> >>  arch/arm64/kernel/process.c | 34 ++++++++++++----------------------
> >>  1 file changed, 12 insertions(+), 22 deletions(-)
> >> 
> >> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> >> index aacf2f5559a8..b37ff23e625e 100644
> >> --- a/arch/arm64/kernel/process.c
> >> +++ b/arch/arm64/kernel/process.c
> >> @@ -439,34 +439,23 @@ static void entry_task_switch(struct task_struct *next)
> >>  
> >>  /*
> >>   * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> >> - * Assuming the virtual counter is enabled at the beginning of times:
> >> - *
> >> - * - disable access when switching from a 64bit task to a 32bit task
> >> - * - enable access when switching from a 32bit task to a 64bit task
> >> + * Ensure access is disabled when switching to a 32bit task, ensure
> >> + * access is enabled when switching to a 64bit task.
> >>   */
> >> -static void erratum_1418040_thread_switch(struct task_struct *prev,
> >> -					  struct task_struct *next)
> >> +static void erratum_1418040_thread_switch(struct task_struct *next)
> >>  {
> >> -	bool prev32, next32;
> >> -	u64 val;
> >> -
> >> -	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
> >> -		return;
> >> +	preempt_disable();
> >
> > I'd rather avoid this on the __switch_to() path. We're guaranteed to
> > be non-preemptible when called from there, and we want it to be as
> > fast as possible. It would also avoid the bug on the early return just
> > below.
> 
> Yes, makes sense. I've added an erratum_1418040_new_exec() helper that
> does preempt_disable/erratum_14518040_thread_switch(current)/preempt_enable,
> and called it from arch_setup_new_exec().

Yes, that's much better.

> 
> >>
> >> -	prev32 = is_compat_thread(task_thread_info(prev));
> >> -	next32 = is_compat_thread(task_thread_info(next));
> >> -
> >> -	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
> >> +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
> >> +	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
> >>  		return;
> >>  
> >> -	val = read_sysreg(cntkctl_el1);
> >> -
> >> -	if (!next32)
> >> -		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
> >> +	if (is_compat_thread(task_thread_info(next)))
> >> +		sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
> >>  	else
> >> -		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
> >> +		sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
> >>  
> >> -	write_sysreg(val, cntkctl_el1);
> >> +	preempt_enable();
> >>  }
> >>  
> >>  /*
> >> @@ -501,7 +490,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
> >>  	contextidr_thread_switch(next);
> >>  	entry_task_switch(next);
> >>  	ssbs_thread_switch(next);
> >> -	erratum_1418040_thread_switch(prev, next);
> >> +	erratum_1418040_thread_switch(next);
> >>  	ptrauth_thread_switch_user(next);
> >>  
> >>  	/*
> >> @@ -611,6 +600,7 @@ void arch_setup_new_exec(void)
> >>  	current->mm->context.flags = mmflags;
> >>  	ptrauth_thread_init_user();
> >>  	mte_thread_init_user();
> >> +	erratum_1418040_thread_switch(current);
> >
> > But what is the point of this now? As you enter __switch_to(), the TIF
> > flags are set in stone for this particular return to userspace.
> >
> > Since you are now evaluating the state of CNTKCTL_EL1 on each and
> > every switch, you are guaranteed to set the enable bit to the right
> > value on each return to userspace, even if you have gone via
> > SET_PERSONALITY().
> >
> > Am I missing something?
> 
> The workaround in __switch_to isn't happening for every return to
> userspace, but rather for every scheduler task switch. When a process
> exec()s, no switch happens at that point. From the scheduler point of
> view, this is still the same task.  So in the time period between
> exec()ing a compat task from non-compat (or vice versa) and the first
> time it gets switched out, we would apply the wrong workaround state,
> unless we make a change to cntkctl from exec() before returning back to
> EL0.

Right. For $reason, I keep equating task switch and return to user.
Thanks for spelling it out for me.

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index aacf2f5559a8..b37ff23e625e 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -439,34 +439,23 @@  static void entry_task_switch(struct task_struct *next)
 
 /*
  * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Assuming the virtual counter is enabled at the beginning of times:
- *
- * - disable access when switching from a 64bit task to a 32bit task
- * - enable access when switching from a 32bit task to a 64bit task
+ * Ensure access is disabled when switching to a 32bit task, ensure
+ * access is enabled when switching to a 64bit task.
  */
-static void erratum_1418040_thread_switch(struct task_struct *prev,
-					  struct task_struct *next)
+static void erratum_1418040_thread_switch(struct task_struct *next)
 {
-	bool prev32, next32;
-	u64 val;
-
-	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
-		return;
+	preempt_disable();
 
-	prev32 = is_compat_thread(task_thread_info(prev));
-	next32 = is_compat_thread(task_thread_info(next));
-
-	if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
+	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
+	    !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
 		return;
 
-	val = read_sysreg(cntkctl_el1);
-
-	if (!next32)
-		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+	if (is_compat_thread(task_thread_info(next)))
+		sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
 	else
-		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
+		sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
 
-	write_sysreg(val, cntkctl_el1);
+	preempt_enable();
 }
 
 /*
@@ -501,7 +490,7 @@  __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	contextidr_thread_switch(next);
 	entry_task_switch(next);
 	ssbs_thread_switch(next);
-	erratum_1418040_thread_switch(prev, next);
+	erratum_1418040_thread_switch(next);
 	ptrauth_thread_switch_user(next);
 
 	/*
@@ -611,6 +600,7 @@  void arch_setup_new_exec(void)
 	current->mm->context.flags = mmflags;
 	ptrauth_thread_init_user();
 	mte_thread_init_user();
+	erratum_1418040_thread_switch(current);
 
 	if (task_spec_ssb_noexec(current)) {
 		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,