diff mbox series

[v2,1/2] arm64: Move handling of erratum 1418040 into C code

Message ID 20200731083358.50058-2-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: Allow erratum 1418040 for late CPUs | expand

Commit Message

Marc Zyngier July 31, 2020, 8:33 a.m. UTC
Instead of dealing with erratum 1418040 on each entry and exit,
let's move the handling to __switch_to() instead, which has
several advantages:

- It can be applied when it matters (switching between 32 and 64
  bit tasks).
- It is written in C (yay!)
- It can rely on static keys rather than alternatives

Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kernel/entry.S   | 21 ---------------------
 arch/arm64/kernel/process.c | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 21 deletions(-)

Comments

Will Deacon July 31, 2020, 10:41 a.m. UTC | #1
On Fri, Jul 31, 2020 at 09:33:57AM +0100, Marc Zyngier wrote:
> Instead of dealing with erratum 1418040 on each entry and exit,
> let's move the handling to __switch_to() instead, which has
> several advantages:
> 
> - It can be applied when it matters (switching between 32 and 64
>   bit tasks).
> - It is written in C (yay!)
> - It can rely on static keys rather than alternatives
> 
> Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/kernel/entry.S   | 21 ---------------------
>  arch/arm64/kernel/process.c | 35 +++++++++++++++++++++++++++++++++++
>  2 files changed, 35 insertions(+), 21 deletions(-)

[...]

> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 6089638c7d43..8bbf066224ab 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -515,6 +515,40 @@ static void entry_task_switch(struct task_struct *next)
>  	__this_cpu_write(__entry_task, next);
>  }
>  
> +/*
> + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> + * Assuming the virtual counter is enabled at the beginning of times:
> + *
> + * - disable access when switching from a 64bit task to a 32bit task
> + * - enable access when switching from a 32bit task to a 64bit task
> + */
> +static __always_inline

Do we need the __always_inline? None of the other calls from __switch_to()
have it.

> +void erratum_1418040_thread_switch(struct task_struct *prev,
> +				   struct task_struct *next)
> +{
> +	bool prev32, next32;
> +	u64 val;
> +
> +	if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
> +	      cpus_have_const_cap(ARM64_WORKAROUND_1418040)))
> +		return;
> +
> +	prev32 = is_compat_thread(task_thread_info(prev));
> +	next32 = is_compat_thread(task_thread_info(next));
> +
> +	if (prev32 == next32)
> +		return;
> +
> +	val = read_sysreg(cntkctl_el1);
> +
> +	if (prev32 & !next32)

I know they're bools but this is perverse!

Why can't it just be:

	if (next32)
		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
	else
		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;

?

Will
Marc Zyngier July 31, 2020, 11:14 a.m. UTC | #2
On 2020-07-31 11:41, Will Deacon wrote:
> On Fri, Jul 31, 2020 at 09:33:57AM +0100, Marc Zyngier wrote:
>> Instead of dealing with erratum 1418040 on each entry and exit,
>> let's move the handling to __switch_to() instead, which has
>> several advantages:
>> 
>> - It can be applied when it matters (switching between 32 and 64
>>   bit tasks).
>> - It is written in C (yay!)
>> - It can rely on static keys rather than alternatives
>> 
>> Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> ---
>>  arch/arm64/kernel/entry.S   | 21 ---------------------
>>  arch/arm64/kernel/process.c | 35 +++++++++++++++++++++++++++++++++++
>>  2 files changed, 35 insertions(+), 21 deletions(-)
> 
> [...]
> 
>> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
>> index 6089638c7d43..8bbf066224ab 100644
>> --- a/arch/arm64/kernel/process.c
>> +++ b/arch/arm64/kernel/process.c
>> @@ -515,6 +515,40 @@ static void entry_task_switch(struct task_struct 
>> *next)
>>  	__this_cpu_write(__entry_task, next);
>>  }
>> 
>> +/*
>> + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
>> + * Assuming the virtual counter is enabled at the beginning of times:
>> + *
>> + * - disable access when switching from a 64bit task to a 32bit task
>> + * - enable access when switching from a 32bit task to a 64bit task
>> + */
>> +static __always_inline
> 
> Do we need the __always_inline? None of the other calls from 
> __switch_to()
> have it.

Suggestion from Stephen. In my experience, it doesn't change much as
most things get inlined anyway. Happy to drop it.

> 
>> +void erratum_1418040_thread_switch(struct task_struct *prev,
>> +				   struct task_struct *next)
>> +{
>> +	bool prev32, next32;
>> +	u64 val;
>> +
>> +	if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
>> +	      cpus_have_const_cap(ARM64_WORKAROUND_1418040)))
>> +		return;
>> +
>> +	prev32 = is_compat_thread(task_thread_info(prev));
>> +	next32 = is_compat_thread(task_thread_info(next));
>> +
>> +	if (prev32 == next32)
>> +		return;
>> +
>> +	val = read_sysreg(cntkctl_el1);
>> +
>> +	if (prev32 & !next32)
> 
> I know they're bools but this is perverse!

Well, this is me writing this code, so don't attribute to perversity
what can adequately be explained by a silly typo... ;-)

> Why can't it just be:
> 
> 	if (next32)
> 		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
> 	else
> 		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
> 
> ?

Yup, that's better. Do you want to apply these changes directly,
or should I repin it?

         M.
Will Deacon July 31, 2020, 11:32 a.m. UTC | #3
On Fri, Jul 31, 2020 at 12:14:40PM +0100, Marc Zyngier wrote:
> On 2020-07-31 11:41, Will Deacon wrote:
> > On Fri, Jul 31, 2020 at 09:33:57AM +0100, Marc Zyngier wrote:
> > > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> > > index 6089638c7d43..8bbf066224ab 100644
> > > --- a/arch/arm64/kernel/process.c
> > > +++ b/arch/arm64/kernel/process.c
> > > @@ -515,6 +515,40 @@ static void entry_task_switch(struct
> > > task_struct *next)
> > >  	__this_cpu_write(__entry_task, next);
> > >  }
> > > 
> > > +/*
> > > + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> > > + * Assuming the virtual counter is enabled at the beginning of times:
> > > + *
> > > + * - disable access when switching from a 64bit task to a 32bit task
> > > + * - enable access when switching from a 32bit task to a 64bit task
> > > + */
> > > +static __always_inline
> > 
> > Do we need the __always_inline? None of the other calls from
> > __switch_to()
> > have it.
> 
> Suggestion from Stephen. In my experience, it doesn't change much as
> most things get inlined anyway. Happy to drop it.

Yes, please. We can add it back if it's shown to be a problem.

> > > +void erratum_1418040_thread_switch(struct task_struct *prev,
> > > +				   struct task_struct *next)
> > > +{
> > > +	bool prev32, next32;
> > > +	u64 val;
> > > +
> > > +	if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
> > > +	      cpus_have_const_cap(ARM64_WORKAROUND_1418040)))
> > > +		return;
> > > +
> > > +	prev32 = is_compat_thread(task_thread_info(prev));
> > > +	next32 = is_compat_thread(task_thread_info(next));
> > > +
> > > +	if (prev32 == next32)
> > > +		return;
> > > +
> > > +	val = read_sysreg(cntkctl_el1);
> > > +
> > > +	if (prev32 & !next32)
> > 
> > I know they're bools but this is perverse!
> 
> Well, this is me writing this code, so don't attribute to perversity
> what can adequately be explained by a silly typo... ;-)
> 
> > Why can't it just be:
> > 
> > 	if (next32)
> > 		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
> > 	else
> > 		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
> > 
> > ?
> 
> Yup, that's better. Do you want to apply these changes directly,
> or should I repin it?

If you don't mind respinning, that would be best please. You can add my Ack
as well, as I think this is one for Catalin now (I don't know if we're
getting an -rc8 or not).

Will
Stephen Boyd July 31, 2020, 6 p.m. UTC | #4
Quoting Will Deacon (2020-07-31 04:32:28)
> On Fri, Jul 31, 2020 at 12:14:40PM +0100, Marc Zyngier wrote:
> > On 2020-07-31 11:41, Will Deacon wrote:
> > > On Fri, Jul 31, 2020 at 09:33:57AM +0100, Marc Zyngier wrote:
> > > > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> > > > index 6089638c7d43..8bbf066224ab 100644
> > > > --- a/arch/arm64/kernel/process.c
> > > > +++ b/arch/arm64/kernel/process.c
> > > > @@ -515,6 +515,40 @@ static void entry_task_switch(struct
> > > > task_struct *next)
> > > >   __this_cpu_write(__entry_task, next);
> > > >  }
> > > > 
> > > > +/*
> > > > + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> > > > + * Assuming the virtual counter is enabled at the beginning of times:
> > > > + *
> > > > + * - disable access when switching from a 64bit task to a 32bit task
> > > > + * - enable access when switching from a 32bit task to a 64bit task
> > > > + */
> > > > +static __always_inline
> > > 
> > > Do we need the __always_inline? None of the other calls from
> > > __switch_to()
> > > have it.
> > 
> > Suggestion from Stephen. In my experience, it doesn't change much as
> > most things get inlined anyway. Happy to drop it.
> 
> Yes, please. We can add it back if it's shown to be a problem.

Just for my own edification, why is __always_inline undesirable? Should
there be an always inline version of the function that has the static
key so that the erratum path is kept out of the switch_to() default
path?
Catalin Marinas Aug. 1, 2020, 11:41 a.m. UTC | #5
On Fri, Jul 31, 2020 at 11:00:34AM -0700, Stephen Boyd wrote:
> Quoting Will Deacon (2020-07-31 04:32:28)
> > On Fri, Jul 31, 2020 at 12:14:40PM +0100, Marc Zyngier wrote:
> > > On 2020-07-31 11:41, Will Deacon wrote:
> > > > On Fri, Jul 31, 2020 at 09:33:57AM +0100, Marc Zyngier wrote:
> > > > > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> > > > > index 6089638c7d43..8bbf066224ab 100644
> > > > > --- a/arch/arm64/kernel/process.c
> > > > > +++ b/arch/arm64/kernel/process.c
> > > > > @@ -515,6 +515,40 @@ static void entry_task_switch(struct
> > > > > task_struct *next)
> > > > >   __this_cpu_write(__entry_task, next);
> > > > >  }
> > > > > 
> > > > > +/*
> > > > > + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
> > > > > + * Assuming the virtual counter is enabled at the beginning of times:
> > > > > + *
> > > > > + * - disable access when switching from a 64bit task to a 32bit task
> > > > > + * - enable access when switching from a 32bit task to a 64bit task
> > > > > + */
> > > > > +static __always_inline
> > > > 
> > > > Do we need the __always_inline? None of the other calls from
> > > > __switch_to()
> > > > have it.
> > > 
> > > Suggestion from Stephen. In my experience, it doesn't change much as
> > > most things get inlined anyway. Happy to drop it.
> > 
> > Yes, please. We can add it back if it's shown to be a problem.
> 
> Just for my own edification, why is __always_inline undesirable? Should
> there be an always inline version of the function that has the static
> key so that the erratum path is kept out of the switch_to() default
> path?

It's rather unnecessary, so it just makes the code a bit more
unreadable. I usually go by two questions:

1. Is the function required to be inlined for correct execution?

2. Is there a visible performance benefit by using __always_inline?
   (e.g. the compiler does a bad job)

For a static function called only in one place, the compiler usually
inlines it.

I can't speak on behalf of Will but from my perspective I'm against
adding __always_inline just because it's harmless (and occasionally I
edit it out of patches I apply manually).
diff mbox series

Patch

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 35de8ba60e3d..44445d471442 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -169,19 +169,6 @@  alternative_cb_end
 	stp	x28, x29, [sp, #16 * 14]
 
 	.if	\el == 0
-	.if	\regsize == 32
-	/*
-	 * If we're returning from a 32-bit task on a system affected by
-	 * 1418040 then re-enable userspace access to the virtual counter.
-	 */
-#ifdef CONFIG_ARM64_ERRATUM_1418040
-alternative_if ARM64_WORKAROUND_1418040
-	mrs	x0, cntkctl_el1
-	orr	x0, x0, #2	// ARCH_TIMER_USR_VCT_ACCESS_EN
-	msr	cntkctl_el1, x0
-alternative_else_nop_endif
-#endif
-	.endif
 	clear_gp_regs
 	mrs	x21, sp_el0
 	ldr_this_cpu	tsk, __entry_task, x20
@@ -337,14 +324,6 @@  alternative_else_nop_endif
 	tst	x22, #PSR_MODE32_BIT		// native task?
 	b.eq	3f
 
-#ifdef CONFIG_ARM64_ERRATUM_1418040
-alternative_if ARM64_WORKAROUND_1418040
-	mrs	x0, cntkctl_el1
-	bic	x0, x0, #2			// ARCH_TIMER_USR_VCT_ACCESS_EN
-	msr	cntkctl_el1, x0
-alternative_else_nop_endif
-#endif
-
 #ifdef CONFIG_ARM64_ERRATUM_845719
 alternative_if ARM64_WORKAROUND_845719
 #ifdef CONFIG_PID_IN_CONTEXTIDR
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6089638c7d43..8bbf066224ab 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -515,6 +515,40 @@  static void entry_task_switch(struct task_struct *next)
 	__this_cpu_write(__entry_task, next);
 }
 
+/*
+ * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
+ * Assuming the virtual counter is enabled at the beginning of times:
+ *
+ * - disable access when switching from a 64bit task to a 32bit task
+ * - enable access when switching from a 32bit task to a 64bit task
+ */
+static __always_inline
+void erratum_1418040_thread_switch(struct task_struct *prev,
+				   struct task_struct *next)
+{
+	bool prev32, next32;
+	u64 val;
+
+	if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+	      cpus_have_const_cap(ARM64_WORKAROUND_1418040)))
+		return;
+
+	prev32 = is_compat_thread(task_thread_info(prev));
+	next32 = is_compat_thread(task_thread_info(next));
+
+	if (prev32 == next32)
+		return;
+
+	val = read_sysreg(cntkctl_el1);
+
+	if (prev32 & !next32)
+		val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
+	else
+		val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
+
+	write_sysreg(val, cntkctl_el1);
+}
+
 /*
  * Thread switching.
  */
@@ -530,6 +564,7 @@  __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	entry_task_switch(next);
 	uao_thread_switch(next);
 	ssbs_thread_switch(next);
+	erratum_1418040_thread_switch(prev, next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case