diff mbox series

[v6,1/1] MIPS: Fix idle VS timer enqueue

Message ID 20250315194002.13778-2-marco.crivellari@suse.com (mailing list archive)
State New
Headers show
Series MIPS: Fix idle VS timer enqueue | expand

Commit Message

Marco Crivellari March 15, 2025, 7:40 p.m. UTC
MIPS re-enables interrupts on its idle routine and performs
a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.

The IRQs firing between the check and the 'wait' instruction may set the
TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
interrupting __r4k_wait() rollback their return address to the
beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
again before going back to sleep.

However idle IRQs can also queue timers that may require a tick
reprogramming through a new generic idle loop iteration but those timers
would go unnoticed here because __r4k_wait() only checks
TIF_NEED_RESCHED. It doesn't check for pending timers.

Fix this with fast-forwarding idle IRQs return address to the end of the
idle routine instead of the beginning, so that the generic idle loop
handles both TIF_NEED_RESCHED and pending timers.

CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
always 4 byte and remove the ifdef. Added ehb to make sure the hazard
is always cleared.

Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
---
 arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
 arch/mips/kernel/idle.c  |  1 -
 2 files changed, 23 insertions(+), 20 deletions(-)

Comments

Thomas Bogendoerfer March 19, 2025, 11:07 a.m. UTC | #1
On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> MIPS re-enables interrupts on its idle routine and performs
> a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> 
> The IRQs firing between the check and the 'wait' instruction may set the
> TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> interrupting __r4k_wait() rollback their return address to the
> beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> again before going back to sleep.
> 
> However idle IRQs can also queue timers that may require a tick
> reprogramming through a new generic idle loop iteration but those timers
> would go unnoticed here because __r4k_wait() only checks
> TIF_NEED_RESCHED. It doesn't check for pending timers.

can you give a commit ID, when this change got introduced ?

> Fix this with fast-forwarding idle IRQs return address to the end of the
> idle routine instead of the beginning, so that the generic idle loop
> handles both TIF_NEED_RESCHED and pending timers.
> 
> CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
> There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
> always 4 byte and remove the ifdef. Added ehb to make sure the hazard
> is always cleared.
> 
> Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> ---
>  arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
>  arch/mips/kernel/idle.c  |  1 -
>  2 files changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index a572ce36a24f..4e012421d00f 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -104,27 +104,30 @@ handle_vcei:
>  
>  	__FINIT
>  
> -	.align	5	/* 32 byte rollback region */
> +	.align	5
>  LEAF(__r4k_wait)
>  	.set	push
>  	.set	noreorder
> -	/* start of rollback region */
> -	LONG_L	t0, TI_FLAGS($28)
> -	nop
> -	andi	t0, _TIF_NEED_RESCHED
> -	bnez	t0, 1f
> -	 nop
> -	nop
> -	nop
> -#ifdef CONFIG_CPU_MICROMIPS
> -	nop
> -	nop
> -	nop
> -	nop
> -#endif
> +	/* Start of idle interrupt region. */
> +	MFC0	t0, CP0_STATUS
> +	/* Enable interrupt. */
> +	ori 	t0, 0x1f
> +	xori	t0, 0x1e
> +	MTC0	t0, CP0_STATUS
> +	_ssnop
> +	_ssnop
> +	_ssnop
> +	_ehb
>  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> +	/*
> +	 * If an interrupt lands here, between enabling interrupts above and
> +	 * going idle on the next instruction, we must *NOT* go idle since the
> +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> +	 * resched. Fall through -- see rollback_handler below -- and have
> +	 * the idle loop take care of things.
> +	 */
>  	wait
> -	/* end of rollback region (the region size must be power of two) */
> +	/* End of idle interrupt region. */
>  1:

please give this label a name for example __r4k_wait_exit and do a
runtime check that it really has 36 bytes offset to __r4k_wait

>  	jr	ra
>  	 nop
> @@ -136,9 +139,10 @@ LEAF(__r4k_wait)
>  	.set	push
>  	.set	noat
>  	MFC0	k0, CP0_EPC
> -	PTR_LA	k1, __r4k_wait
> -	ori	k0, 0x1f	/* 32 byte rollback region */
> -	xori	k0, 0x1f
> +	PTR_LA	k1, 1b

this is part of a macro, so I don't think using a commonly used label name
is a safe thing, that's why I want a named label here.

> +	/* 36 byte idle interrupt region. */
> +	ori 	k0, 0x1f
> +	PTR_ADDIU	k0, 5
>  	bne	k0, k1, \handler
>  	MTC0	k0, CP0_EPC
>  	.set pop
Frederic Weisbecker March 19, 2025, 2:06 p.m. UTC | #2
Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > MIPS re-enables interrupts on its idle routine and performs
> > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > 
> > The IRQs firing between the check and the 'wait' instruction may set the
> > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > interrupting __r4k_wait() rollback their return address to the
> > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > again before going back to sleep.
> > 
> > However idle IRQs can also queue timers that may require a tick
> > reprogramming through a new generic idle loop iteration but those timers
> > would go unnoticed here because __r4k_wait() only checks
> > TIF_NEED_RESCHED. It doesn't check for pending timers.
> 
> can you give a commit ID, when this change got introduced ?

That would be:

     Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")
Thomas Bogendoerfer March 19, 2025, 2:42 p.m. UTC | #3
On Wed, Mar 19, 2025 at 03:06:12PM +0100, Frederic Weisbecker wrote:
> Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> > On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > > MIPS re-enables interrupts on its idle routine and performs
> > > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > > 
> > > The IRQs firing between the check and the 'wait' instruction may set the
> > > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > > interrupting __r4k_wait() rollback their return address to the
> > > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > > again before going back to sleep.
> > > 
> > > However idle IRQs can also queue timers that may require a tick
> > > reprogramming through a new generic idle loop iteration but those timers
> > > would go unnoticed here because __r4k_wait() only checks
> > > TIF_NEED_RESCHED. It doesn't check for pending timers.
> > 
> > can you give a commit ID, when this change got introduced ?
> 
> That would be:
> 
>      Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")

hmm, so even then checking TIF_NEED_RESCHED wasn't enough  (we are talking
about 2.6.27) ?

Thomas.
Frederic Weisbecker March 19, 2025, 2:43 p.m. UTC | #4
Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > MIPS re-enables interrupts on its idle routine and performs
> > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > 
> > The IRQs firing between the check and the 'wait' instruction may set the
> > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > interrupting __r4k_wait() rollback their return address to the
> > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > again before going back to sleep.
> > 
> > However idle IRQs can also queue timers that may require a tick
> > reprogramming through a new generic idle loop iteration but those timers
> > would go unnoticed here because __r4k_wait() only checks
> > TIF_NEED_RESCHED. It doesn't check for pending timers.
> 
> can you give a commit ID, when this change got introduced ?
> 
> > Fix this with fast-forwarding idle IRQs return address to the end of the
> > idle routine instead of the beginning, so that the generic idle loop
> > handles both TIF_NEED_RESCHED and pending timers.
> > 
> > CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
> > There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
> > always 4 byte and remove the ifdef. Added ehb to make sure the hazard
> > is always cleared.
> > 
> > Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> > ---
> >  arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
> >  arch/mips/kernel/idle.c  |  1 -
> >  2 files changed, 23 insertions(+), 20 deletions(-)
> > 
> > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > index a572ce36a24f..4e012421d00f 100644
> > --- a/arch/mips/kernel/genex.S
> > +++ b/arch/mips/kernel/genex.S
> > @@ -104,27 +104,30 @@ handle_vcei:
> >  
> >  	__FINIT
> >  
> > -	.align	5	/* 32 byte rollback region */
> > +	.align	5
> >  LEAF(__r4k_wait)
> >  	.set	push
> >  	.set	noreorder
> > -	/* start of rollback region */
> > -	LONG_L	t0, TI_FLAGS($28)
> > -	nop
> > -	andi	t0, _TIF_NEED_RESCHED
> > -	bnez	t0, 1f
> > -	 nop
> > -	nop
> > -	nop
> > -#ifdef CONFIG_CPU_MICROMIPS
> > -	nop
> > -	nop
> > -	nop
> > -	nop
> > -#endif
> > +	/* Start of idle interrupt region. */
> > +	MFC0	t0, CP0_STATUS
> > +	/* Enable interrupt. */
> > +	ori 	t0, 0x1f
> > +	xori	t0, 0x1e
> > +	MTC0	t0, CP0_STATUS
> > +	_ssnop
> > +	_ssnop
> > +	_ssnop
> > +	_ehb
> >  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> > +	/*
> > +	 * If an interrupt lands here, between enabling interrupts above and
> > +	 * going idle on the next instruction, we must *NOT* go idle since the
> > +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > +	 * resched. Fall through -- see rollback_handler below -- and have
> > +	 * the idle loop take care of things.
> > +	 */
> >  	wait
> > -	/* end of rollback region (the region size must be power of two) */
> > +	/* End of idle interrupt region. */
> >  1:
> 
> please give this label a name for example __r4k_wait_exit and do a
> runtime check that it really has 36 bytes offset to __r4k_wait

Where would be the best place for that?

arch/mips/kernel/setup.c:setup_arch() maybe?

Thanks.
Frederic Weisbecker March 19, 2025, 3:01 p.m. UTC | #5
Le Wed, Mar 19, 2025 at 03:42:16PM +0100, Thomas Bogendoerfer a écrit :
> On Wed, Mar 19, 2025 at 03:06:12PM +0100, Frederic Weisbecker wrote:
> > Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> > > On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > > > MIPS re-enables interrupts on its idle routine and performs
> > > > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > > > 
> > > > The IRQs firing between the check and the 'wait' instruction may set the
> > > > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > > > interrupting __r4k_wait() rollback their return address to the
> > > > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > > > again before going back to sleep.
> > > > 
> > > > However idle IRQs can also queue timers that may require a tick
> > > > reprogramming through a new generic idle loop iteration but those timers
> > > > would go unnoticed here because __r4k_wait() only checks
> > > > TIF_NEED_RESCHED. It doesn't check for pending timers.
> > > 
> > > can you give a commit ID, when this change got introduced ?
> > 
> > That would be:
> > 
> >      Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")
> 
> hmm, so even then checking TIF_NEED_RESCHED wasn't enough  (we are talking
> about 2.6.27) ?

Right, the real issue dates even back earlier because the aforementioned fix only
handled the TIF_NEED_RESCHED part of the problem but the timer part of the problem
was there before.

However nohz was introduced only earlier the same year (2.6.21). We probably
don't need to dig much further...

> 
> Thomas.
> 
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Thomas Bogendoerfer March 19, 2025, 3:31 p.m. UTC | #6
On Wed, Mar 19, 2025 at 03:43:13PM +0100, Frederic Weisbecker wrote:
> > >  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> > > +	/*
> > > +	 * If an interrupt lands here, between enabling interrupts above and
> > > +	 * going idle on the next instruction, we must *NOT* go idle since the
> > > +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > > +	 * resched. Fall through -- see rollback_handler below -- and have
> > > +	 * the idle loop take care of things.
> > > +	 */
> > >  	wait
> > > -	/* end of rollback region (the region size must be power of two) */
> > > +	/* End of idle interrupt region. */
> > >  1:
> > 
> > please give this label a name for example __r4k_wait_exit and do a
> > runtime check that it really has 36 bytes offset to __r4k_wait
> 
> Where would be the best place for that?
> 
> arch/mips/kernel/setup.c:setup_arch() maybe?

scratch runtime check, a compile check is what I wanted to write...

something like

.if ((__r4k_wait_exit - __r4k_wait) != 36)
.err
.endif

Thomas.
Marco Crivellari March 20, 2025, 8:44 a.m. UTC | #7
Hi,

yesterday I made this changes:

@@ -128,7 +128,11 @@ LEAF(__r4k_wait)
        */
       wait
       /* End of idle interrupt region. */
-1:
+SYM_INNER_LABEL(__r4k_wait_exit, SYM_L_LOCAL)
+       /* Check idle interrupt region size. */
+       .ifne   __r4k_wait_exit - __r4k_wait - 36
+       .error  "Idle interrupt region size mismatch: expected 36 bytes."
+       .endif
       jr      ra
        nop
       .set    pop
@@ -139,7 +143,7 @@ LEAF(__r4k_wait)
       .set    push
       .set    noat
       MFC0    k0, CP0_EPC
-       PTR_LA  k1, 1b
+       PTR_LA  k1, __r4k_wait_exit
       /* 36 byte idle interrupt region. */
       ori     k0, 0x1f
       PTR_ADDIU       k0, 5


Would it be ok to have the check done in this way?

Thanks


On Wed, Mar 19, 2025 at 11:09 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Wed, Mar 19, 2025 at 03:43:13PM +0100, Frederic Weisbecker wrote:
> > > >   .set    MIPS_ISA_ARCH_LEVEL_RAW
> > > > + /*
> > > > +  * If an interrupt lands here, between enabling interrupts above and
> > > > +  * going idle on the next instruction, we must *NOT* go idle since the
> > > > +  * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > > > +  * resched. Fall through -- see rollback_handler below -- and have
> > > > +  * the idle loop take care of things.
> > > > +  */
> > > >   wait
> > > > - /* end of rollback region (the region size must be power of two) */
> > > > + /* End of idle interrupt region. */
> > > >  1:
> > >
> > > please give this label a name for example __r4k_wait_exit and do a
> > > runtime check that it really has 36 bytes offset to __r4k_wait
> >
> > Where would be the best place for that?
> >
> > arch/mips/kernel/setup.c:setup_arch() maybe?
>
> scratch runtime check, a compile check is what I wanted to write...
>
> something like
>
> .if ((__r4k_wait_exit - __r4k_wait) != 36)
> .err
> .endif
>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Thomas Bogendoerfer March 21, 2025, 9:38 a.m. UTC | #8
On Thu, Mar 20, 2025 at 09:44:23AM +0100, Marco Crivellari wrote:
> Hi,
> 
> yesterday I made this changes:
> 
> @@ -128,7 +128,11 @@ LEAF(__r4k_wait)
>         */
>        wait
>        /* End of idle interrupt region. */
> -1:
> +SYM_INNER_LABEL(__r4k_wait_exit, SYM_L_LOCAL)

do we really need that for a symbol local to that file ?

> +       /* Check idle interrupt region size. */
> +       .ifne   __r4k_wait_exit - __r4k_wait - 36

I have to say, that I prefer my .if statement, since this clearly spells out
the comparision in the expression. Is there a reason for your version ?

Thomas.
Marco Crivellari March 21, 2025, 10:44 a.m. UTC | #9
On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
> do we really need that for a symbol local to that file ?

Not really, I can use the label directly.

On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> > +       /* Check idle interrupt region size. */
> > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
>
> I have to say, that I prefer my .if statement, since this clearly spells out
> the comparision in the expression. Is there a reason for your version ?

Sure, let's keep your version.
Can we use the "error" message above? (""Idle interrupt region size
mismatch: expected 36 bytes."").
Or at least something similar, I think it is easier to understand the
reason for the error. What do you think?

Thanks!
Thomas Bogendoerfer March 21, 2025, 11:17 a.m. UTC | #10
On Fri, Mar 21, 2025 at 11:44:10AM +0100, Marco Crivellari wrote:
> On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> > do we really need that for a symbol local to that file ?
> 
> Not really, I can use the label directly.
> 
> On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> >
> > > +       /* Check idle interrupt region size. */
> > > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
> >
> > I have to say, that I prefer my .if statement, since this clearly spells out
> > the comparision in the expression. Is there a reason for your version ?
> 
> Sure, let's keep your version.
> Can we use the "error" message above? (""Idle interrupt region size
> mismatch: expected 36 bytes."").
> Or at least something similar, I think it is easier to understand the
> reason for the error. What do you think?

the error message works for me :-)

Thomas.
Maciej W. Rozycki March 21, 2025, 11:53 a.m. UTC | #11
On Sat, 15 Mar 2025, Marco Crivellari wrote:

> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index a572ce36a24f..4e012421d00f 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -104,27 +104,30 @@ handle_vcei:
>  
>  	__FINIT
>  
> -	.align	5	/* 32 byte rollback region */
> +	.align	5
>  LEAF(__r4k_wait)
>  	.set	push
>  	.set	noreorder
> -	/* start of rollback region */
> -	LONG_L	t0, TI_FLAGS($28)
> -	nop
> -	andi	t0, _TIF_NEED_RESCHED
> -	bnez	t0, 1f
> -	 nop
> -	nop
> -	nop
> -#ifdef CONFIG_CPU_MICROMIPS
> -	nop
> -	nop
> -	nop
> -	nop
> -#endif
> +	/* Start of idle interrupt region. */
> +	MFC0	t0, CP0_STATUS
> +	/* Enable interrupt. */
> +	ori 	t0, 0x1f

 This instruction sequence still suffers from the coprocessor move delay 
hazard.  How many times do I need to request to get it fixed (counting 
three so far)?

  Maciej
Maciej W. Rozycki March 21, 2025, 11:55 a.m. UTC | #12
On Fri, 21 Mar 2025, Thomas Bogendoerfer wrote:

> > > > +       /* Check idle interrupt region size. */
> > > > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
> > >
> > > I have to say, that I prefer my .if statement, since this clearly spells out
> > > the comparision in the expression. Is there a reason for your version ?
> > 
> > Sure, let's keep your version.
> > Can we use the "error" message above? (""Idle interrupt region size
> > mismatch: expected 36 bytes."").
> > Or at least something similar, I think it is easier to understand the
> > reason for the error. What do you think?
> 
> the error message works for me :-)

 I'm glad seeing this being sorted properly, thank you!

  Maciej
Marco Crivellari March 21, 2025, 4:15 p.m. UTC | #13
Hi Maciej,

On Fri, Mar 21, 2025 at 12:53 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
>  This instruction sequence still suffers from the coprocessor move delay
> hazard.  How many times do I need to request to get it fixed (counting
> three so far)?

Can I have more details about this?

I can see it is the same code present also in local_irq_enable()
(arch_local_irq_enable()),
and from the manual I've seen:

"The Spacing column shown in Table 2.6 and Table 2.7 indicates the
number of unrelated instructions (such as NOPs or SSNOPs) that,
prior to the capabilities of Release 2, would need to be placed
between the producer and consumer of the hazard in order to ensure
that the effects of the first instruction are seen by the second instruction."

The "Spacing column" value is 3, indeed.

"With the hazard elimination instructions available in Release 2, the
preferred method to eliminate hazards is to place one of the
instructions listed in Table 2.8 between the producer and consumer of the
hazard. Execution hazards can be removed by using the EHB [...]"

What am I missing?

Thanks in advance


--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
Maciej W. Rozycki March 21, 2025, 8:11 p.m. UTC | #14
On Fri, 21 Mar 2025, Marco Crivellari wrote:

> >  This instruction sequence still suffers from the coprocessor move delay
> > hazard.  How many times do I need to request to get it fixed (counting
> > three so far)?
> 
> Can I have more details about this?
> 
> I can see it is the same code present also in local_irq_enable()
> (arch_local_irq_enable()),

 Unlike `__r4k_wait' that code is not in a `.set noreorder' region and 
the assembler will therefore resolve the hazard by inserting a NOP where 
required by the architecture level requested (with `-march=...', etc.).  
Try compiling that function for a MIPS III configuration such as 
decstation_r4k_defconfig or just by hand with `-march=mips3' and see 
what machine code is produced.

> and from the manual I've seen:
> 
> "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> number of unrelated instructions (such as NOPs or SSNOPs) that,
> prior to the capabilities of Release 2, would need to be placed
> between the producer and consumer of the hazard in order to ensure
> that the effects of the first instruction are seen by the second instruction."
> 
> The "Spacing column" value is 3, indeed.
> 
> "With the hazard elimination instructions available in Release 2, the
> preferred method to eliminate hazards is to place one of the
> instructions listed in Table 2.8 between the producer and consumer of the
> hazard. Execution hazards can be removed by using the EHB [...]"

 Whatever manual you quote it refers to MIPS Release 2, which is only 
dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to 
continue handling older architecture revisions going back to MIPS III 
ISA from 1991.  We also support MIPS I ISA from 1985 which has further 
instruction scheduling requirements, but `__r4k_wait' is for newer 
processors only, because older ones had no WAIT instruction, so we can 
ignore them (but note that the MIPS I load delay slot is regardless 
observed in current code in the form of a NOP inserted after LONG_L).

> What am I missing?

 This is common MIPS knowledge really, encoded in the GNU toolchain and 
especially GAS since forever.  While I can't cite a canonical reference, 
the hazard is listed e.g. in Table 13.1 "Instructions with scheduling 
implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from 
"IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0, 
from October 1996.  I do believe the document is available online.

 I'm fairly sure the hazard is also listed there in Dominic Sweetman's 
"See MIPS Run Linux," but I don't have my copy handy right now.

 Best is to avoid using a `.set noreorder' region in the first place.  
But is it really needed here?  Does the rollback area have to be of a 
hardcoded size rather than one calculated by the assembler based on 
actual machine code produced?  It seems to me having it calculated would 
reduce complexity here and let us use the EI instruction where available 
as well.

 HTH,

  Maciej
Frederic Weisbecker March 21, 2025, 9:09 p.m. UTC | #15
Le Fri, Mar 21, 2025 at 11:53:54AM +0000, Maciej W. Rozycki a écrit :
> On Sat, 15 Mar 2025, Marco Crivellari wrote:
> 
> > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > index a572ce36a24f..4e012421d00f 100644
> > --- a/arch/mips/kernel/genex.S
> > +++ b/arch/mips/kernel/genex.S
> > @@ -104,27 +104,30 @@ handle_vcei:
> >  
> >  	__FINIT
> >  
> > -	.align	5	/* 32 byte rollback region */
> > +	.align	5
> >  LEAF(__r4k_wait)
> >  	.set	push
> >  	.set	noreorder
> > -	/* start of rollback region */
> > -	LONG_L	t0, TI_FLAGS($28)
> > -	nop
> > -	andi	t0, _TIF_NEED_RESCHED
> > -	bnez	t0, 1f
> > -	 nop
> > -	nop
> > -	nop
> > -#ifdef CONFIG_CPU_MICROMIPS
> > -	nop
> > -	nop
> > -	nop
> > -	nop
> > -#endif
> > +	/* Start of idle interrupt region. */
> > +	MFC0	t0, CP0_STATUS
> > +	/* Enable interrupt. */
> > +	ori 	t0, 0x1f
> 
>  This instruction sequence still suffers from the coprocessor move delay 
> hazard.  How many times do I need to request to get it fixed (counting 
> three so far)?

This is because your request had follow-ups from Huacai and Marco that
were left unanswered:

     https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/
     https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/

We have detected this longstanding architecture specific timer handling bug on
loongson and MIPS and we could have just dropped a report and let you guys deal with
it. Instead we decided to spend time ourselves (especially Marco) working on
fixes for these architectures we don't run and which we are not familiar with,
alongway taking reviews seriously and patiently re-iterating accordingly.

So please be gentle with us.

Thanks.
Maciej W. Rozycki March 22, 2025, 4:08 p.m. UTC | #16
On Fri, 21 Mar 2025, Frederic Weisbecker wrote:

> > > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > > index a572ce36a24f..4e012421d00f 100644
> > > --- a/arch/mips/kernel/genex.S
> > > +++ b/arch/mips/kernel/genex.S
> > > @@ -104,27 +104,30 @@ handle_vcei:
> > >  
> > >  	__FINIT
> > >  
> > > -	.align	5	/* 32 byte rollback region */
> > > +	.align	5
> > >  LEAF(__r4k_wait)
> > >  	.set	push
> > >  	.set	noreorder
> > > -	/* start of rollback region */
> > > -	LONG_L	t0, TI_FLAGS($28)
> > > -	nop
> > > -	andi	t0, _TIF_NEED_RESCHED
> > > -	bnez	t0, 1f
> > > -	 nop
> > > -	nop
> > > -	nop
> > > -#ifdef CONFIG_CPU_MICROMIPS
> > > -	nop
> > > -	nop
> > > -	nop
> > > -	nop
> > > -#endif
> > > +	/* Start of idle interrupt region. */
> > > +	MFC0	t0, CP0_STATUS
> > > +	/* Enable interrupt. */
> > > +	ori 	t0, 0x1f
> > 
> >  This instruction sequence still suffers from the coprocessor move delay 
> > hazard.  How many times do I need to request to get it fixed (counting 
> > three so far)?
> 
> This is because your request had follow-ups from Huacai and Marco that
> were left unanswered:
> 
>      https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/

 The conclusion made there is however wrong: `local_irq_enable' code 
plays no tricks with instruction scheduling and lets the toolchain 
resolve any pipeline hazards automatically, while `__r4k_wait' arranges 
for instructions to be scheduled by hand and any hazards resolved by the 
human writer of the code.  There's even explicit `.set reorder' in 
`local_irq_enable', which is redundant, because it's the default mode 
for inline assembly.

 And I can't emphasise it enough: manual instruction scheduling is tough
and ought to be restricted to cases where there is no other way really, 
such as for placing an instruction in a branch delay slot where there is 
a data antidependency between the branch and the delay-slot instruction.  
Yet this approach has often been used by code authors for other reasons 
(or I daresay no reason at all), leaving it up to the maintainers to 
keep the code working in the changing conditions while the submitter has 
long gone.  I converted some of such code in the past, but it also takes 
time and effort that does not come for free.

>      https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/

 I missed that one, sorry.  A ping would have helped, and I never have 
an issue with being pinged.  I do hope I have now addressed that concern 
with my other reply.

 Thank you for the pointers.

> We have detected this longstanding architecture specific timer handling bug on
> loongson and MIPS and we could have just dropped a report and let you guys deal with
> it. Instead we decided to spend time ourselves (especially Marco) working on
> fixes for these architectures we don't run and which we are not familiar with,
> alongway taking reviews seriously and patiently re-iterating accordingly.

 Thank you for your effort, really appreciated.  Any fixes need to be 
technically correct however, it makes no sense to get one bug replaced 
with another one.  We've got enough technical debt accumulated already 
with a platform that no longer has any commercial support and relies 
solely on voluteers keeping it alive in their limited spare time.  I do 
have a long list of outstanding issues to address and ever so little 
time to take care of them, with hardware problems additionally kicking 
in and distracting every so often too.

> So please be gentle with us.

 As always, but also emphatic on this occasion.  We're in the same boat 
really, striving against the lack of resources and issues piling, and 
now we've made some progress.  Thank you for your understanding.

  Maciej
Frederic Weisbecker March 25, 2025, 1:31 p.m. UTC | #17
Le Sat, Mar 22, 2025 at 04:08:31PM +0000, Maciej W. Rozycki a écrit :
> On Fri, 21 Mar 2025, Frederic Weisbecker wrote:
> 
> > > > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > > > index a572ce36a24f..4e012421d00f 100644
> > > > --- a/arch/mips/kernel/genex.S
> > > > +++ b/arch/mips/kernel/genex.S
> > > > @@ -104,27 +104,30 @@ handle_vcei:
> > > >  
> > > >  	__FINIT
> > > >  
> > > > -	.align	5	/* 32 byte rollback region */
> > > > +	.align	5
> > > >  LEAF(__r4k_wait)
> > > >  	.set	push
> > > >  	.set	noreorder
> > > > -	/* start of rollback region */
> > > > -	LONG_L	t0, TI_FLAGS($28)
> > > > -	nop
> > > > -	andi	t0, _TIF_NEED_RESCHED
> > > > -	bnez	t0, 1f
> > > > -	 nop
> > > > -	nop
> > > > -	nop
> > > > -#ifdef CONFIG_CPU_MICROMIPS
> > > > -	nop
> > > > -	nop
> > > > -	nop
> > > > -	nop
> > > > -#endif
> > > > +	/* Start of idle interrupt region. */
> > > > +	MFC0	t0, CP0_STATUS
> > > > +	/* Enable interrupt. */
> > > > +	ori 	t0, 0x1f
> > > 
> > >  This instruction sequence still suffers from the coprocessor move delay 
> > > hazard.  How many times do I need to request to get it fixed (counting 
> > > three so far)?
> > 
> > This is because your request had follow-ups from Huacai and Marco that
> > were left unanswered:
> > 
> >      https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/
> 
>  The conclusion made there is however wrong: `local_irq_enable' code 
> plays no tricks with instruction scheduling and lets the toolchain 
> resolve any pipeline hazards automatically, while `__r4k_wait' arranges 
> for instructions to be scheduled by hand and any hazards resolved by the 
> human writer of the code.  There's even explicit `.set reorder' in 
> `local_irq_enable', which is redundant, because it's the default mode 
> for inline assembly.
> 
>  And I can't emphasise it enough: manual instruction scheduling is tough
> and ought to be restricted to cases where there is no other way really, 
> such as for placing an instruction in a branch delay slot where there is 
> a data antidependency between the branch and the delay-slot instruction.  
> Yet this approach has often been used by code authors for other reasons 
> (or I daresay no reason at all), leaving it up to the maintainers to 
> keep the code working in the changing conditions while the submitter has 
> long gone.  I converted some of such code in the past, but it also takes 
> time and effort that does not come for free.
> 
> >      https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/
> 
>  I missed that one, sorry.  A ping would have helped, and I never have 
> an issue with being pinged.  I do hope I have now addressed that concern 
> with my other reply.

Hopefully, I'll let Marco follow-up on that as I must confess I'm lost
with these details. But your help has been very valuable!

> 
> > We have detected this longstanding architecture specific timer handling bug on
> > loongson and MIPS and we could have just dropped a report and let you guys deal with
> > it. Instead we decided to spend time ourselves (especially Marco) working on
> > fixes for these architectures we don't run and which we are not familiar with,
> > alongway taking reviews seriously and patiently re-iterating accordingly.
> 
>  Thank you for your effort, really appreciated.  Any fixes need to be 
> technically correct however, it makes no sense to get one bug replaced 
> with another one.  We've got enough technical debt accumulated already 
> with a platform that no longer has any commercial support and relies 
> solely on voluteers keeping it alive in their limited spare time.  I do 
> have a long list of outstanding issues to address and ever so little 
> time to take care of them, with hardware problems additionally kicking 
> in and distracting every so often too.

Yeah I totally understand that!

> 
> > So please be gentle with us.
> 
>  As always, but also emphatic on this occasion.  We're in the same boat 
> really, striving against the lack of resources and issues piling, and 
> now we've made some progress.  Thank you for your understanding.

Heh I know... Thanks a lot!

> 
>   Maciej
Marco Crivellari March 25, 2025, 2:08 p.m. UTC | #18
Hi Maciej,

Thanks a lot for all the information.

>  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> the assembler will therefore resolve the hazard by inserting a NOP where
> required by the architecture level requested (with `-march=...', etc.).
> Try compiling that function for a MIPS III configuration such as
> decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> what machine code is produced.

I tried with the configuration you suggested, and indeed I can see a NOP.

>  Whatever manual you quote it refers to MIPS Release 2, which is only
> dated 2003

About the MIPS manual, anyhow, it is "MIPS32 M4 Processor Core" (year 2008).
Maybe I've also picked the wrong manual.

I've also found the manual you mentioned online, thanks.

>  Best is to avoid using a `.set noreorder' region in the first place.
> But is it really needed here?  Does the rollback area have to be of a
> hardcoded size rather than one calculated by the assembler based on
> actual machine code produced?  It seems to me having it calculated would
> reduce complexity here and let us use the EI instruction where available
> as well.

Well, considering the complexity and how the code looks fragile even with
a small change, yes, that's likely better to avoid noreorder.

I think I'm going to need some guidance here.
Please, correct me where something is wrong.

1)
When you say "let us use the EI instruction where available" are you
referring to do
something like below?

#if defined(CONFIG_CPU_HAS_DIEI)
ei
#else
MFC0    t0, CP0_STATUS
ori     t0, 0x1f
xori    t0, 0x1e
MTC0    t0, CP0_STATUS
#endif

2)
Removing "noreorder" would let the compiler add "nops" where they are needed.
But that still means the 3 ssnop and ehb are still needed, right?

My subsequent dumb question is: there is the guarantee that the
compiler will not
reorder / change something we did?
This question also came after reading the manual you quoted (paragraph
"Coprocessor Hazards"):

"For example, after an mtc0 to the Status register which changes an
interrupt mask bit,
there will be two further instructions before the interrupt is
actually enabled or disabled.
[...]
To cope with these situations usually requires the programmer to take explicit
action to prevent the assembler from scheduling inappropriate
instructions after a
dangerous mtc0. This is done by using the .set noreorder directive,
discussed below"

3)
Considering the size is determined by the compiler, the check about
the idle interrupt
size region should not be needed, correct?

4)
ori and PTR_ADDIU should be removed of course from the rollback handler macro.
Can I have some hints about the needed change?
Using QEMU is always working, so I'm not sure if what I will change is
also correct.


Many thanks in advance, also for your time!




On Fri, Mar 21, 2025 at 9:11 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
>
> On Fri, 21 Mar 2025, Marco Crivellari wrote:
>
> > >  This instruction sequence still suffers from the coprocessor move delay
> > > hazard.  How many times do I need to request to get it fixed (counting
> > > three so far)?
> >
> > Can I have more details about this?
> >
> > I can see it is the same code present also in local_irq_enable()
> > (arch_local_irq_enable()),
>
>  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> the assembler will therefore resolve the hazard by inserting a NOP where
> required by the architecture level requested (with `-march=...', etc.).
> Try compiling that function for a MIPS III configuration such as
> decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> what machine code is produced.
>
> > and from the manual I've seen:
> >
> > "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> > number of unrelated instructions (such as NOPs or SSNOPs) that,
> > prior to the capabilities of Release 2, would need to be placed
> > between the producer and consumer of the hazard in order to ensure
> > that the effects of the first instruction are seen by the second instruction."
> >
> > The "Spacing column" value is 3, indeed.
> >
> > "With the hazard elimination instructions available in Release 2, the
> > preferred method to eliminate hazards is to place one of the
> > instructions listed in Table 2.8 between the producer and consumer of the
> > hazard. Execution hazards can be removed by using the EHB [...]"
>
>  Whatever manual you quote it refers to MIPS Release 2, which is only
> dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to
> continue handling older architecture revisions going back to MIPS III
> ISA from 1991.  We also support MIPS I ISA from 1985 which has further
> instruction scheduling requirements, but `__r4k_wait' is for newer
> processors only, because older ones had no WAIT instruction, so we can
> ignore them (but note that the MIPS I load delay slot is regardless
> observed in current code in the form of a NOP inserted after LONG_L).
>
> > What am I missing?
>
>  This is common MIPS knowledge really, encoded in the GNU toolchain and
> especially GAS since forever.  While I can't cite a canonical reference,
> the hazard is listed e.g. in Table 13.1 "Instructions with scheduling
> implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from
> "IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0,
> from October 1996.  I do believe the document is available online.
>
>  I'm fairly sure the hazard is also listed there in Dominic Sweetman's
> "See MIPS Run Linux," but I don't have my copy handy right now.
>
>  Best is to avoid using a `.set noreorder' region in the first place.
> But is it really needed here?  Does the rollback area have to be of a
> hardcoded size rather than one calculated by the assembler based on
> actual machine code produced?  It seems to me having it calculated would
> reduce complexity here and let us use the EI instruction where available
> as well.
>
>  HTH,
>
>   Maciej



--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
diff mbox series

Patch

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index a572ce36a24f..4e012421d00f 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -104,27 +104,30 @@  handle_vcei:
 
 	__FINIT
 
-	.align	5	/* 32 byte rollback region */
+	.align	5
 LEAF(__r4k_wait)
 	.set	push
 	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
+	/* Start of idle interrupt region. */
+	MFC0	t0, CP0_STATUS
+	/* Enable interrupt. */
+	ori 	t0, 0x1f
+	xori	t0, 0x1e
+	MTC0	t0, CP0_STATUS
+	_ssnop
+	_ssnop
+	_ssnop
+	_ehb
 	.set	MIPS_ISA_ARCH_LEVEL_RAW
+	/*
+	 * If an interrupt lands here, between enabling interrupts above and
+	 * going idle on the next instruction, we must *NOT* go idle since the
+	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
+	 * resched. Fall through -- see rollback_handler below -- and have
+	 * the idle loop take care of things.
+	 */
 	wait
-	/* end of rollback region (the region size must be power of two) */
+	/* End of idle interrupt region. */
 1:
 	jr	ra
 	 nop
@@ -136,9 +139,10 @@  LEAF(__r4k_wait)
 	.set	push
 	.set	noat
 	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
+	PTR_LA	k1, 1b
+	/* 36 byte idle interrupt region. */
+	ori 	k0, 0x1f
+	PTR_ADDIU	k0, 5
 	bne	k0, k1, \handler
 	MTC0	k0, CP0_EPC
 	.set pop
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 5abc8b7340f8..1f74c0589f7e 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -37,7 +37,6 @@  static void __cpuidle r3081_wait(void)
 
 void __cpuidle r4k_wait(void)
 {
-	raw_local_irq_enable();
 	__r4k_wait();
 	raw_local_irq_disable();
 }