diff mbox series

[v5,1/1] MIPS: Fix idle VS timer enqueue

Message ID 20250228100509.91121-2-marco.crivellari@suse.com (mailing list archive)
State Superseded
Headers show
Series MIPS: Fix idle VS timer enqueue | expand

Commit Message

Marco Crivellari Feb. 28, 2025, 10:05 a.m. UTC
MIPS re-enables interrupts on its idle routine and performs
a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.

The IRQs firing between the check and the 'wait' instruction may set the
TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
interrupting __r4k_wait() rollback their return address to the
beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
again before going back to sleep.

However idle IRQs can also queue timers that may require a tick
reprogramming through a new generic idle loop iteration but those timers
would go unnoticed here because __r4k_wait() only checks
TIF_NEED_RESCHED. It doesn't check for pending timers.

Fix this with fast-forwarding idle IRQs return address to the end of the
idle routine instead of the beginning, so that the generic idle loop
handles both TIF_NEED_RESCHED and pending timers.

CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
always 4 byte and remove the ifdef. Added ehb to make sure the hazard
is always cleared.

Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
---
 arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
 arch/mips/kernel/idle.c  |  1 -
 2 files changed, 23 insertions(+), 20 deletions(-)

Comments

Frederic Weisbecker Feb. 28, 2025, 1:32 p.m. UTC | #1
Le Fri, Feb 28, 2025 at 11:05:09AM +0100, Marco Crivellari a écrit :
> MIPS re-enables interrupts on its idle routine and performs
> a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> 
> The IRQs firing between the check and the 'wait' instruction may set the
> TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> interrupting __r4k_wait() rollback their return address to the
> beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> again before going back to sleep.
> 
> However idle IRQs can also queue timers that may require a tick
> reprogramming through a new generic idle loop iteration but those timers
> would go unnoticed here because __r4k_wait() only checks
> TIF_NEED_RESCHED. It doesn't check for pending timers.
> 
> Fix this with fast-forwarding idle IRQs return address to the end of the
> idle routine instead of the beginning, so that the generic idle loop
> handles both TIF_NEED_RESCHED and pending timers.
> 
> CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
> There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
> always 4 byte and remove the ifdef. Added ehb to make sure the hazard
> is always cleared.
> 
> Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>

Acked-by: Frederic Weisbecker <frederic@kernel.org>
Maciej W. Rozycki March 2, 2025, 12:54 a.m. UTC | #2
On Fri, 28 Feb 2025, Marco Crivellari wrote:

> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index a572ce36a24f..474738d9124e 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -104,27 +104,30 @@ handle_vcei:
>  
>  	__FINIT
>  
> -	.align	5	/* 32 byte rollback region */
> +	.align	5
>  LEAF(__r4k_wait)
>  	.set	push
>  	.set	noreorder
> -	/* start of rollback region */
> -	LONG_L	t0, TI_FLAGS($28)
> -	nop
> -	andi	t0, _TIF_NEED_RESCHED
> -	bnez	t0, 1f
> -	 nop
> -	nop
> -	nop
> -#ifdef CONFIG_CPU_MICROMIPS
> -	nop
> -	nop
> -	nop
> -	nop
> -#endif
> +	/* start of idle interrupt region */
> +	MFC0	t0, CP0_STATUS
> +	/* Enable Interrput */
                        ^^
 Typo here; also please capitalise sentences and use full stops.

> +	ori 	t0, 0x1f

 No time for a thorough review here as I'm heading for a holiday right 
away, but I can see you still have a coprocessor move hazard here with 
MIPS III hardware.  The incoming value of $t0 to ORI is not what MFC0 has 
obtained.  It's the value from before MFC0.

> +	xori	t0, 0x1e

 And then it's only this XORI that sees the output from MFC0 (though 
there's actually a race between the two competing writes to $t0), so 
effectively you clobber the CP0.Status register...

> +	MTC0	t0, CP0_STATUS

 ... here.  You need to schedule your instructions differently.  But do 
you need `.set noreorder' in the first place?  Can you maybe find a way to 
avoid it, making all the hazard avoidance stuff much easier?

  Maciej
Huacai Chen March 3, 2025, 8:13 a.m. UTC | #3
Hi, Maciej,

On Sun, Mar 2, 2025 at 8:54 AM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
>
> On Fri, 28 Feb 2025, Marco Crivellari wrote:
>
> > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > index a572ce36a24f..474738d9124e 100644
> > --- a/arch/mips/kernel/genex.S
> > +++ b/arch/mips/kernel/genex.S
> > @@ -104,27 +104,30 @@ handle_vcei:
> >
> >       __FINIT
> >
> > -     .align  5       /* 32 byte rollback region */
> > +     .align  5
> >  LEAF(__r4k_wait)
> >       .set    push
> >       .set    noreorder
> > -     /* start of rollback region */
> > -     LONG_L  t0, TI_FLAGS($28)
> > -     nop
> > -     andi    t0, _TIF_NEED_RESCHED
> > -     bnez    t0, 1f
> > -      nop
> > -     nop
> > -     nop
> > -#ifdef CONFIG_CPU_MICROMIPS
> > -     nop
> > -     nop
> > -     nop
> > -     nop
> > -#endif
> > +     /* start of idle interrupt region */
> > +     MFC0    t0, CP0_STATUS
> > +     /* Enable Interrput */
>                         ^^
>  Typo here; also please capitalise sentences and use full stops.
>
> > +     ori     t0, 0x1f
>
>  No time for a thorough review here as I'm heading for a holiday right
> away, but I can see you still have a coprocessor move hazard here with
> MIPS III hardware.  The incoming value of $t0 to ORI is not what MFC0 has
> obtained.  It's the value from before MFC0.
If this is a problem, then the current local_irq_enable() is also
wrong for all MIPS III hardware, because this patch uses the same
instruction sequence of local_irq_enable().


Huacai

>
> > +     xori    t0, 0x1e
>
>  And then it's only this XORI that sees the output from MFC0 (though
> there's actually a race between the two competing writes to $t0), so
> effectively you clobber the CP0.Status register...
>
> > +     MTC0    t0, CP0_STATUS
>
>  ... here.  You need to schedule your instructions differently.  But do
> you need `.set noreorder' in the first place?  Can you maybe find a way to
> avoid it, making all the hazard avoidance stuff much easier?
>
>   Maciej
Marco Crivellari March 5, 2025, 2:13 p.m. UTC | #4
Hi,

> +     /* Enable Interrput */
>                        ^^
> Typo here; also please capitalise sentences and use full stops.

Sorry, I probably forgot to run checkpatch this time.

>If this is a problem, then the current local_irq_enable() is also
>wrong for all MIPS III hardware, because this patch uses the same
>instruction sequence of local_irq_enable().

This is the doubt I have indeed.
Quoting from the manual (about M4K):

"The Spacing column shown in Table 2.6 and Table 2.7 indicates the
number of unrelated instructions (such as NOPs or SSNOPs) that,
prior to the capabilities of Release 2, would need to be placed
between the producer and consumer of the hazard in order to ensure
that the effects of the first instruction are seen by the second instruction."

The "Spacing column" value is 3, indeed.

"With the hazard elimination instructions available in Release 2, the
preferred method to eliminate hazards is to place one of the
instructions listed in Table 2.8 between the producer and consumer of the
hazard. Execution hazards can be removed by using the EHB [...]"

Not sure if I'm missing something here.

Thanks (to everyone)!

On Mon, Mar 3, 2025 at 9:13 AM Huacai Chen <chenhuacai@kernel.org> wrote:
>
> Hi, Maciej,
>
> On Sun, Mar 2, 2025 at 8:54 AM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
> >
> > On Fri, 28 Feb 2025, Marco Crivellari wrote:
> >
> > > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > > index a572ce36a24f..474738d9124e 100644
> > > --- a/arch/mips/kernel/genex.S
> > > +++ b/arch/mips/kernel/genex.S
> > > @@ -104,27 +104,30 @@ handle_vcei:
> > >
> > >       __FINIT
> > >
> > > -     .align  5       /* 32 byte rollback region */
> > > +     .align  5
> > >  LEAF(__r4k_wait)
> > >       .set    push
> > >       .set    noreorder
> > > -     /* start of rollback region */
> > > -     LONG_L  t0, TI_FLAGS($28)
> > > -     nop
> > > -     andi    t0, _TIF_NEED_RESCHED
> > > -     bnez    t0, 1f
> > > -      nop
> > > -     nop
> > > -     nop
> > > -#ifdef CONFIG_CPU_MICROMIPS
> > > -     nop
> > > -     nop
> > > -     nop
> > > -     nop
> > > -#endif
> > > +     /* start of idle interrupt region */
> > > +     MFC0    t0, CP0_STATUS
> > > +     /* Enable Interrput */
> >                         ^^
> >  Typo here; also please capitalise sentences and use full stops.
> >
> > > +     ori     t0, 0x1f
> >
> >  No time for a thorough review here as I'm heading for a holiday right
> > away, but I can see you still have a coprocessor move hazard here with
> > MIPS III hardware.  The incoming value of $t0 to ORI is not what MFC0 has
> > obtained.  It's the value from before MFC0.
> If this is a problem, then the current local_irq_enable() is also
> wrong for all MIPS III hardware, because this patch uses the same
> instruction sequence of local_irq_enable().
>
>
> Huacai
>
> >
> > > +     xori    t0, 0x1e
> >
> >  And then it's only this XORI that sees the output from MFC0 (though
> > there's actually a race between the two competing writes to $t0), so
> > effectively you clobber the CP0.Status register...
> >
> > > +     MTC0    t0, CP0_STATUS
> >
> >  ... here.  You need to schedule your instructions differently.  But do
> > you need `.set noreorder' in the first place?  Can you maybe find a way to
> > avoid it, making all the hazard avoidance stuff much easier?
> >
> >   Maciej



--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
diff mbox series

Patch

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index a572ce36a24f..474738d9124e 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -104,27 +104,30 @@  handle_vcei:
 
 	__FINIT
 
-	.align	5	/* 32 byte rollback region */
+	.align	5
 LEAF(__r4k_wait)
 	.set	push
 	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
+	/* start of idle interrupt region */
+	MFC0	t0, CP0_STATUS
+	/* Enable Interrput */
+	ori 	t0, 0x1f
+	xori	t0, 0x1e
+	MTC0	t0, CP0_STATUS
+	_ssnop
+	_ssnop
+	_ssnop
+	_ehb
 	.set	MIPS_ISA_ARCH_LEVEL_RAW
+	/*
+	 * If an interrupt lands here, between enabling interrupts above and
+	 * going idle on the next instruction, we must *NOT* go idle since the
+	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
+	 * resched. Fall through -- see rollback_handler below -- and have
+	 * the idle loop take care of things.
+	 */
 	wait
-	/* end of rollback region (the region size must be power of two) */
+	/* end of idle interrupt region */
 1:
 	jr	ra
 	 nop
@@ -136,9 +139,10 @@  LEAF(__r4k_wait)
 	.set	push
 	.set	noat
 	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
+	PTR_LA	k1, 1b
+	/* 36 byte idle interrupt region */
+	ori 	k0, 0x1f
+	PTR_ADDIU	k0, 5
 	bne	k0, k1, \handler
 	MTC0	k0, CP0_EPC
 	.set pop
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 5abc8b7340f8..1f74c0589f7e 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -37,7 +37,6 @@  static void __cpuidle r3081_wait(void)
 
 void __cpuidle r4k_wait(void)
 {
-	raw_local_irq_enable();
 	__r4k_wait();
 	raw_local_irq_disable();
 }