diff mbox series

[v6,1/1] MIPS: Fix idle VS timer enqueue

Message ID 20250315194002.13778-2-marco.crivellari@suse.com (mailing list archive)
State New
Headers show
Series MIPS: Fix idle VS timer enqueue | expand

Commit Message

Marco Crivellari March 15, 2025, 7:40 p.m. UTC
MIPS re-enables interrupts on its idle routine and performs
a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.

The IRQs firing between the check and the 'wait' instruction may set the
TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
interrupting __r4k_wait() rollback their return address to the
beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
again before going back to sleep.

However idle IRQs can also queue timers that may require a tick
reprogramming through a new generic idle loop iteration but those timers
would go unnoticed here because __r4k_wait() only checks
TIF_NEED_RESCHED. It doesn't check for pending timers.

Fix this with fast-forwarding idle IRQs return address to the end of the
idle routine instead of the beginning, so that the generic idle loop
handles both TIF_NEED_RESCHED and pending timers.

CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
always 4 byte and remove the ifdef. Added ehb to make sure the hazard
is always cleared.

Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
---
 arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
 arch/mips/kernel/idle.c  |  1 -
 2 files changed, 23 insertions(+), 20 deletions(-)

Comments

Thomas Bogendoerfer March 19, 2025, 11:07 a.m. UTC | #1
On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> MIPS re-enables interrupts on its idle routine and performs
> a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> 
> The IRQs firing between the check and the 'wait' instruction may set the
> TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> interrupting __r4k_wait() rollback their return address to the
> beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> again before going back to sleep.
> 
> However idle IRQs can also queue timers that may require a tick
> reprogramming through a new generic idle loop iteration but those timers
> would go unnoticed here because __r4k_wait() only checks
> TIF_NEED_RESCHED. It doesn't check for pending timers.

can you give a commit ID, when this change got introduced ?

> Fix this with fast-forwarding idle IRQs return address to the end of the
> idle routine instead of the beginning, so that the generic idle loop
> handles both TIF_NEED_RESCHED and pending timers.
> 
> CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
> There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
> always 4 byte and remove the ifdef. Added ehb to make sure the hazard
> is always cleared.
> 
> Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> ---
>  arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
>  arch/mips/kernel/idle.c  |  1 -
>  2 files changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index a572ce36a24f..4e012421d00f 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -104,27 +104,30 @@ handle_vcei:
>  
>  	__FINIT
>  
> -	.align	5	/* 32 byte rollback region */
> +	.align	5
>  LEAF(__r4k_wait)
>  	.set	push
>  	.set	noreorder
> -	/* start of rollback region */
> -	LONG_L	t0, TI_FLAGS($28)
> -	nop
> -	andi	t0, _TIF_NEED_RESCHED
> -	bnez	t0, 1f
> -	 nop
> -	nop
> -	nop
> -#ifdef CONFIG_CPU_MICROMIPS
> -	nop
> -	nop
> -	nop
> -	nop
> -#endif
> +	/* Start of idle interrupt region. */
> +	MFC0	t0, CP0_STATUS
> +	/* Enable interrupt. */
> +	ori 	t0, 0x1f
> +	xori	t0, 0x1e
> +	MTC0	t0, CP0_STATUS
> +	_ssnop
> +	_ssnop
> +	_ssnop
> +	_ehb
>  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> +	/*
> +	 * If an interrupt lands here, between enabling interrupts above and
> +	 * going idle on the next instruction, we must *NOT* go idle since the
> +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> +	 * resched. Fall through -- see rollback_handler below -- and have
> +	 * the idle loop take care of things.
> +	 */
>  	wait
> -	/* end of rollback region (the region size must be power of two) */
> +	/* End of idle interrupt region. */
>  1:

please give this label a name for example __r4k_wait_exit and do a
runtime check that it really has 36 bytes offset to __r4k_wait

>  	jr	ra
>  	 nop
> @@ -136,9 +139,10 @@ LEAF(__r4k_wait)
>  	.set	push
>  	.set	noat
>  	MFC0	k0, CP0_EPC
> -	PTR_LA	k1, __r4k_wait
> -	ori	k0, 0x1f	/* 32 byte rollback region */
> -	xori	k0, 0x1f
> +	PTR_LA	k1, 1b

this is part of a macro, so I don't think using a commonly used label name
is a safe thing, that's why I want a named label here.

> +	/* 36 byte idle interrupt region. */
> +	ori 	k0, 0x1f
> +	PTR_ADDIU	k0, 5
>  	bne	k0, k1, \handler
>  	MTC0	k0, CP0_EPC
>  	.set pop
Frederic Weisbecker March 19, 2025, 2:06 p.m. UTC | #2
Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > MIPS re-enables interrupts on its idle routine and performs
> > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > 
> > The IRQs firing between the check and the 'wait' instruction may set the
> > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > interrupting __r4k_wait() rollback their return address to the
> > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > again before going back to sleep.
> > 
> > However idle IRQs can also queue timers that may require a tick
> > reprogramming through a new generic idle loop iteration but those timers
> > would go unnoticed here because __r4k_wait() only checks
> > TIF_NEED_RESCHED. It doesn't check for pending timers.
> 
> can you give a commit ID, when this change got introduced ?

That would be:

     Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")
Thomas Bogendoerfer March 19, 2025, 2:42 p.m. UTC | #3
On Wed, Mar 19, 2025 at 03:06:12PM +0100, Frederic Weisbecker wrote:
> Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> > On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > > MIPS re-enables interrupts on its idle routine and performs
> > > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > > 
> > > The IRQs firing between the check and the 'wait' instruction may set the
> > > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > > interrupting __r4k_wait() rollback their return address to the
> > > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > > again before going back to sleep.
> > > 
> > > However idle IRQs can also queue timers that may require a tick
> > > reprogramming through a new generic idle loop iteration but those timers
> > > would go unnoticed here because __r4k_wait() only checks
> > > TIF_NEED_RESCHED. It doesn't check for pending timers.
> > 
> > can you give a commit ID, when this change got introduced ?
> 
> That would be:
> 
>      Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")

hmm, so even then checking TIF_NEED_RESCHED wasn't enough  (we are talking
about 2.6.27) ?

Thomas.
Frederic Weisbecker March 19, 2025, 2:43 p.m. UTC | #4
Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > MIPS re-enables interrupts on its idle routine and performs
> > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > 
> > The IRQs firing between the check and the 'wait' instruction may set the
> > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > interrupting __r4k_wait() rollback their return address to the
> > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > again before going back to sleep.
> > 
> > However idle IRQs can also queue timers that may require a tick
> > reprogramming through a new generic idle loop iteration but those timers
> > would go unnoticed here because __r4k_wait() only checks
> > TIF_NEED_RESCHED. It doesn't check for pending timers.
> 
> can you give a commit ID, when this change got introduced ?
> 
> > Fix this with fast-forwarding idle IRQs return address to the end of the
> > idle routine instead of the beginning, so that the generic idle loop
> > handles both TIF_NEED_RESCHED and pending timers.
> > 
> > CONFIG_CPU_MICROMIPS has been removed along with the nop instructions.
> > There, NOPs are 2 byte in size, so change the code with 3 _ssnop which are
> > always 4 byte and remove the ifdef. Added ehb to make sure the hazard
> > is always cleared.
> > 
> > Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> > ---
> >  arch/mips/kernel/genex.S | 42 ++++++++++++++++++++++------------------
> >  arch/mips/kernel/idle.c  |  1 -
> >  2 files changed, 23 insertions(+), 20 deletions(-)
> > 
> > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > index a572ce36a24f..4e012421d00f 100644
> > --- a/arch/mips/kernel/genex.S
> > +++ b/arch/mips/kernel/genex.S
> > @@ -104,27 +104,30 @@ handle_vcei:
> >  
> >  	__FINIT
> >  
> > -	.align	5	/* 32 byte rollback region */
> > +	.align	5
> >  LEAF(__r4k_wait)
> >  	.set	push
> >  	.set	noreorder
> > -	/* start of rollback region */
> > -	LONG_L	t0, TI_FLAGS($28)
> > -	nop
> > -	andi	t0, _TIF_NEED_RESCHED
> > -	bnez	t0, 1f
> > -	 nop
> > -	nop
> > -	nop
> > -#ifdef CONFIG_CPU_MICROMIPS
> > -	nop
> > -	nop
> > -	nop
> > -	nop
> > -#endif
> > +	/* Start of idle interrupt region. */
> > +	MFC0	t0, CP0_STATUS
> > +	/* Enable interrupt. */
> > +	ori 	t0, 0x1f
> > +	xori	t0, 0x1e
> > +	MTC0	t0, CP0_STATUS
> > +	_ssnop
> > +	_ssnop
> > +	_ssnop
> > +	_ehb
> >  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> > +	/*
> > +	 * If an interrupt lands here, between enabling interrupts above and
> > +	 * going idle on the next instruction, we must *NOT* go idle since the
> > +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > +	 * resched. Fall through -- see rollback_handler below -- and have
> > +	 * the idle loop take care of things.
> > +	 */
> >  	wait
> > -	/* end of rollback region (the region size must be power of two) */
> > +	/* End of idle interrupt region. */
> >  1:
> 
> please give this label a name for example __r4k_wait_exit and do a
> runtime check that it really has 36 bytes offset to __r4k_wait

Where would be the best place for that?

arch/mips/kernel/setup.c:setup_arch() maybe?

Thanks.
Frederic Weisbecker March 19, 2025, 3:01 p.m. UTC | #5
Le Wed, Mar 19, 2025 at 03:42:16PM +0100, Thomas Bogendoerfer a écrit :
> On Wed, Mar 19, 2025 at 03:06:12PM +0100, Frederic Weisbecker wrote:
> > Le Wed, Mar 19, 2025 at 12:07:07PM +0100, Thomas Bogendoerfer a écrit :
> > > On Sat, Mar 15, 2025 at 08:40:02PM +0100, Marco Crivellari wrote:
> > > > MIPS re-enables interrupts on its idle routine and performs
> > > > a TIF_NEED_RESCHED check afterwards before putting the CPU to sleep.
> > > > 
> > > > The IRQs firing between the check and the 'wait' instruction may set the
> > > > TIF_NEED_RESCHED flag. In order to deal with this possible race, IRQs
> > > > interrupting __r4k_wait() rollback their return address to the
> > > > beginning of __r4k_wait() so that TIF_NEED_RESCHED is checked
> > > > again before going back to sleep.
> > > > 
> > > > However idle IRQs can also queue timers that may require a tick
> > > > reprogramming through a new generic idle loop iteration but those timers
> > > > would go unnoticed here because __r4k_wait() only checks
> > > > TIF_NEED_RESCHED. It doesn't check for pending timers.
> > > 
> > > can you give a commit ID, when this change got introduced ?
> > 
> > That would be:
> > 
> >      Fixes: c65a5480ff29 ("[MIPS] Fix potential latency problem due to non-atomic cpu_wait.")
> 
> hmm, so even then checking TIF_NEED_RESCHED wasn't enough  (we are talking
> about 2.6.27) ?

Right, the real issue dates even back earlier because the aforementioned fix only
handled the TIF_NEED_RESCHED part of the problem but the timer part of the problem
was there before.

However nohz was introduced only earlier the same year (2.6.21). We probably
don't need to dig much further...

> 
> Thomas.
> 
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Thomas Bogendoerfer March 19, 2025, 3:31 p.m. UTC | #6
On Wed, Mar 19, 2025 at 03:43:13PM +0100, Frederic Weisbecker wrote:
> > >  	.set	MIPS_ISA_ARCH_LEVEL_RAW
> > > +	/*
> > > +	 * If an interrupt lands here, between enabling interrupts above and
> > > +	 * going idle on the next instruction, we must *NOT* go idle since the
> > > +	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > > +	 * resched. Fall through -- see rollback_handler below -- and have
> > > +	 * the idle loop take care of things.
> > > +	 */
> > >  	wait
> > > -	/* end of rollback region (the region size must be power of two) */
> > > +	/* End of idle interrupt region. */
> > >  1:
> > 
> > please give this label a name for example __r4k_wait_exit and do a
> > runtime check that it really has 36 bytes offset to __r4k_wait
> 
> Where would be the best place for that?
> 
> arch/mips/kernel/setup.c:setup_arch() maybe?

scratch runtime check, a compile check is what I wanted to write...

something like

.if ((__r4k_wait_exit - __r4k_wait) != 36)
.err
.endif

Thomas.
Marco Crivellari March 20, 2025, 8:44 a.m. UTC | #7
Hi,

yesterday I made this changes:

@@ -128,7 +128,11 @@ LEAF(__r4k_wait)
        */
       wait
       /* End of idle interrupt region. */
-1:
+SYM_INNER_LABEL(__r4k_wait_exit, SYM_L_LOCAL)
+       /* Check idle interrupt region size. */
+       .ifne   __r4k_wait_exit - __r4k_wait - 36
+       .error  "Idle interrupt region size mismatch: expected 36 bytes."
+       .endif
       jr      ra
        nop
       .set    pop
@@ -139,7 +143,7 @@ LEAF(__r4k_wait)
       .set    push
       .set    noat
       MFC0    k0, CP0_EPC
-       PTR_LA  k1, 1b
+       PTR_LA  k1, __r4k_wait_exit
       /* 36 byte idle interrupt region. */
       ori     k0, 0x1f
       PTR_ADDIU       k0, 5


Would it be ok to have the check done in this way?

Thanks


On Wed, Mar 19, 2025 at 11:09 PM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Wed, Mar 19, 2025 at 03:43:13PM +0100, Frederic Weisbecker wrote:
> > > >   .set    MIPS_ISA_ARCH_LEVEL_RAW
> > > > + /*
> > > > +  * If an interrupt lands here, between enabling interrupts above and
> > > > +  * going idle on the next instruction, we must *NOT* go idle since the
> > > > +  * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
> > > > +  * resched. Fall through -- see rollback_handler below -- and have
> > > > +  * the idle loop take care of things.
> > > > +  */
> > > >   wait
> > > > - /* end of rollback region (the region size must be power of two) */
> > > > + /* End of idle interrupt region. */
> > > >  1:
> > >
> > > please give this label a name for example __r4k_wait_exit and do a
> > > runtime check that it really has 36 bytes offset to __r4k_wait
> >
> > Where would be the best place for that?
> >
> > arch/mips/kernel/setup.c:setup_arch() maybe?
>
> scratch runtime check, a compile check is what I wanted to write...
>
> something like
>
> .if ((__r4k_wait_exit - __r4k_wait) != 36)
> .err
> .endif
>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Thomas Bogendoerfer March 21, 2025, 9:38 a.m. UTC | #8
On Thu, Mar 20, 2025 at 09:44:23AM +0100, Marco Crivellari wrote:
> Hi,
> 
> yesterday I made this changes:
> 
> @@ -128,7 +128,11 @@ LEAF(__r4k_wait)
>         */
>        wait
>        /* End of idle interrupt region. */
> -1:
> +SYM_INNER_LABEL(__r4k_wait_exit, SYM_L_LOCAL)

do we really need that for a symbol local to that file ?

> +       /* Check idle interrupt region size. */
> +       .ifne   __r4k_wait_exit - __r4k_wait - 36

I have to say, that I prefer my .if statement, since this clearly spells out
the comparision in the expression. Is there a reason for your version ?

Thomas.
Marco Crivellari March 21, 2025, 10:44 a.m. UTC | #9
On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
> do we really need that for a symbol local to that file ?

Not really, I can use the label directly.

On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> > +       /* Check idle interrupt region size. */
> > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
>
> I have to say, that I prefer my .if statement, since this clearly spells out
> the comparision in the expression. Is there a reason for your version ?

Sure, let's keep your version.
Can we use the "error" message above? (""Idle interrupt region size
mismatch: expected 36 bytes."").
Or at least something similar, I think it is easier to understand the
reason for the error. What do you think?

Thanks!
Thomas Bogendoerfer March 21, 2025, 11:17 a.m. UTC | #10
On Fri, Mar 21, 2025 at 11:44:10AM +0100, Marco Crivellari wrote:
> On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> > do we really need that for a symbol local to that file ?
> 
> Not really, I can use the label directly.
> 
> On Fri, Mar 21, 2025 at 10:38 AM Thomas Bogendoerfer
> <tsbogend@alpha.franken.de> wrote:
> >
> > > +       /* Check idle interrupt region size. */
> > > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
> >
> > I have to say, that I prefer my .if statement, since this clearly spells out
> > the comparision in the expression. Is there a reason for your version ?
> 
> Sure, let's keep your version.
> Can we use the "error" message above? (""Idle interrupt region size
> mismatch: expected 36 bytes."").
> Or at least something similar, I think it is easier to understand the
> reason for the error. What do you think?

the error message works for me :-)

Thomas.
Maciej W. Rozycki March 21, 2025, 11:53 a.m. UTC | #11
On Sat, 15 Mar 2025, Marco Crivellari wrote:

> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index a572ce36a24f..4e012421d00f 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -104,27 +104,30 @@ handle_vcei:
>  
>  	__FINIT
>  
> -	.align	5	/* 32 byte rollback region */
> +	.align	5
>  LEAF(__r4k_wait)
>  	.set	push
>  	.set	noreorder
> -	/* start of rollback region */
> -	LONG_L	t0, TI_FLAGS($28)
> -	nop
> -	andi	t0, _TIF_NEED_RESCHED
> -	bnez	t0, 1f
> -	 nop
> -	nop
> -	nop
> -#ifdef CONFIG_CPU_MICROMIPS
> -	nop
> -	nop
> -	nop
> -	nop
> -#endif
> +	/* Start of idle interrupt region. */
> +	MFC0	t0, CP0_STATUS
> +	/* Enable interrupt. */
> +	ori 	t0, 0x1f

 This instruction sequence still suffers from the coprocessor move delay 
hazard.  How many times do I need to request to get it fixed (counting 
three so far)?

  Maciej
Maciej W. Rozycki March 21, 2025, 11:55 a.m. UTC | #12
On Fri, 21 Mar 2025, Thomas Bogendoerfer wrote:

> > > > +       /* Check idle interrupt region size. */
> > > > +       .ifne   __r4k_wait_exit - __r4k_wait - 36
> > >
> > > I have to say, that I prefer my .if statement, since this clearly spells out
> > > the comparision in the expression. Is there a reason for your version ?
> > 
> > Sure, let's keep your version.
> > Can we use the "error" message above? (""Idle interrupt region size
> > mismatch: expected 36 bytes."").
> > Or at least something similar, I think it is easier to understand the
> > reason for the error. What do you think?
> 
> the error message works for me :-)

 I'm glad seeing this being sorted properly, thank you!

  Maciej
Marco Crivellari March 21, 2025, 4:15 p.m. UTC | #13
Hi Maciej,

On Fri, Mar 21, 2025 at 12:53 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
>  This instruction sequence still suffers from the coprocessor move delay
> hazard.  How many times do I need to request to get it fixed (counting
> three so far)?

Can I have more details about this?

I can see it is the same code present also in local_irq_enable()
(arch_local_irq_enable()),
and from the manual I've seen:

"The Spacing column shown in Table 2.6 and Table 2.7 indicates the
number of unrelated instructions (such as NOPs or SSNOPs) that,
prior to the capabilities of Release 2, would need to be placed
between the producer and consumer of the hazard in order to ensure
that the effects of the first instruction are seen by the second instruction."

The "Spacing column" value is 3, indeed.

"With the hazard elimination instructions available in Release 2, the
preferred method to eliminate hazards is to place one of the
instructions listed in Table 2.8 between the producer and consumer of the
hazard. Execution hazards can be removed by using the EHB [...]"

What am I missing?

Thanks in advance


--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
Maciej W. Rozycki March 21, 2025, 8:11 p.m. UTC | #14
On Fri, 21 Mar 2025, Marco Crivellari wrote:

> >  This instruction sequence still suffers from the coprocessor move delay
> > hazard.  How many times do I need to request to get it fixed (counting
> > three so far)?
> 
> Can I have more details about this?
> 
> I can see it is the same code present also in local_irq_enable()
> (arch_local_irq_enable()),

 Unlike `__r4k_wait' that code is not in a `.set noreorder' region and 
the assembler will therefore resolve the hazard by inserting a NOP where 
required by the architecture level requested (with `-march=...', etc.).  
Try compiling that function for a MIPS III configuration such as 
decstation_r4k_defconfig or just by hand with `-march=mips3' and see 
what machine code is produced.

> and from the manual I've seen:
> 
> "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> number of unrelated instructions (such as NOPs or SSNOPs) that,
> prior to the capabilities of Release 2, would need to be placed
> between the producer and consumer of the hazard in order to ensure
> that the effects of the first instruction are seen by the second instruction."
> 
> The "Spacing column" value is 3, indeed.
> 
> "With the hazard elimination instructions available in Release 2, the
> preferred method to eliminate hazards is to place one of the
> instructions listed in Table 2.8 between the producer and consumer of the
> hazard. Execution hazards can be removed by using the EHB [...]"

 Whatever manual you quote it refers to MIPS Release 2, which is only 
dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to 
continue handling older architecture revisions going back to MIPS III 
ISA from 1991.  We also support MIPS I ISA from 1985 which has further 
instruction scheduling requirements, but `__r4k_wait' is for newer 
processors only, because older ones had no WAIT instruction, so we can 
ignore them (but note that the MIPS I load delay slot is regardless 
observed in current code in the form of a NOP inserted after LONG_L).

> What am I missing?

 This is common MIPS knowledge really, encoded in the GNU toolchain and 
especially GAS since forever.  While I can't cite a canonical reference, 
the hazard is listed e.g. in Table 13.1 "Instructions with scheduling 
implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from 
"IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0, 
from October 1996.  I do believe the document is available online.

 I'm fairly sure the hazard is also listed there in Dominic Sweetman's 
"See MIPS Run Linux," but I don't have my copy handy right now.

 Best is to avoid using a `.set noreorder' region in the first place.  
But is it really needed here?  Does the rollback area have to be of a 
hardcoded size rather than one calculated by the assembler based on 
actual machine code produced?  It seems to me having it calculated would 
reduce complexity here and let us use the EI instruction where available 
as well.

 HTH,

  Maciej
Frederic Weisbecker March 21, 2025, 9:09 p.m. UTC | #15
Le Fri, Mar 21, 2025 at 11:53:54AM +0000, Maciej W. Rozycki a écrit :
> On Sat, 15 Mar 2025, Marco Crivellari wrote:
> 
> > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > index a572ce36a24f..4e012421d00f 100644
> > --- a/arch/mips/kernel/genex.S
> > +++ b/arch/mips/kernel/genex.S
> > @@ -104,27 +104,30 @@ handle_vcei:
> >  
> >  	__FINIT
> >  
> > -	.align	5	/* 32 byte rollback region */
> > +	.align	5
> >  LEAF(__r4k_wait)
> >  	.set	push
> >  	.set	noreorder
> > -	/* start of rollback region */
> > -	LONG_L	t0, TI_FLAGS($28)
> > -	nop
> > -	andi	t0, _TIF_NEED_RESCHED
> > -	bnez	t0, 1f
> > -	 nop
> > -	nop
> > -	nop
> > -#ifdef CONFIG_CPU_MICROMIPS
> > -	nop
> > -	nop
> > -	nop
> > -	nop
> > -#endif
> > +	/* Start of idle interrupt region. */
> > +	MFC0	t0, CP0_STATUS
> > +	/* Enable interrupt. */
> > +	ori 	t0, 0x1f
> 
>  This instruction sequence still suffers from the coprocessor move delay 
> hazard.  How many times do I need to request to get it fixed (counting 
> three so far)?

This is because your request had follow-ups from Huacai and Marco that
were left unanswered:

     https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/
     https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/

We have detected this longstanding architecture specific timer handling bug on
loongson and MIPS and we could have just dropped a report and let you guys deal with
it. Instead we decided to spend time ourselves (especially Marco) working on
fixes for these architectures we don't run and which we are not familiar with,
alongway taking reviews seriously and patiently re-iterating accordingly.

So please be gentle with us.

Thanks.
Maciej W. Rozycki March 22, 2025, 4:08 p.m. UTC | #16
On Fri, 21 Mar 2025, Frederic Weisbecker wrote:

> > > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > > index a572ce36a24f..4e012421d00f 100644
> > > --- a/arch/mips/kernel/genex.S
> > > +++ b/arch/mips/kernel/genex.S
> > > @@ -104,27 +104,30 @@ handle_vcei:
> > >  
> > >  	__FINIT
> > >  
> > > -	.align	5	/* 32 byte rollback region */
> > > +	.align	5
> > >  LEAF(__r4k_wait)
> > >  	.set	push
> > >  	.set	noreorder
> > > -	/* start of rollback region */
> > > -	LONG_L	t0, TI_FLAGS($28)
> > > -	nop
> > > -	andi	t0, _TIF_NEED_RESCHED
> > > -	bnez	t0, 1f
> > > -	 nop
> > > -	nop
> > > -	nop
> > > -#ifdef CONFIG_CPU_MICROMIPS
> > > -	nop
> > > -	nop
> > > -	nop
> > > -	nop
> > > -#endif
> > > +	/* Start of idle interrupt region. */
> > > +	MFC0	t0, CP0_STATUS
> > > +	/* Enable interrupt. */
> > > +	ori 	t0, 0x1f
> > 
> >  This instruction sequence still suffers from the coprocessor move delay 
> > hazard.  How many times do I need to request to get it fixed (counting 
> > three so far)?
> 
> This is because your request had follow-ups from Huacai and Marco that
> were left unanswered:
> 
>      https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/

 The conclusion made there is however wrong: `local_irq_enable' code 
plays no tricks with instruction scheduling and lets the toolchain 
resolve any pipeline hazards automatically, while `__r4k_wait' arranges 
for instructions to be scheduled by hand and any hazards resolved by the 
human writer of the code.  There's even explicit `.set reorder' in 
`local_irq_enable', which is redundant, because it's the default mode 
for inline assembly.

 And I can't emphasise it enough: manual instruction scheduling is tough
and ought to be restricted to cases where there is no other way really, 
such as for placing an instruction in a branch delay slot where there is 
a data antidependency between the branch and the delay-slot instruction.  
Yet this approach has often been used by code authors for other reasons 
(or I daresay no reason at all), leaving it up to the maintainers to 
keep the code working in the changing conditions while the submitter has 
long gone.  I converted some of such code in the past, but it also takes 
time and effort that does not come for free.

>      https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/

 I missed that one, sorry.  A ping would have helped, and I never have 
an issue with being pinged.  I do hope I have now addressed that concern 
with my other reply.

 Thank you for the pointers.

> We have detected this longstanding architecture specific timer handling bug on
> loongson and MIPS and we could have just dropped a report and let you guys deal with
> it. Instead we decided to spend time ourselves (especially Marco) working on
> fixes for these architectures we don't run and which we are not familiar with,
> alongway taking reviews seriously and patiently re-iterating accordingly.

 Thank you for your effort, really appreciated.  Any fixes need to be 
technically correct however, it makes no sense to get one bug replaced 
with another one.  We've got enough technical debt accumulated already 
with a platform that no longer has any commercial support and relies 
solely on voluteers keeping it alive in their limited spare time.  I do 
have a long list of outstanding issues to address and ever so little 
time to take care of them, with hardware problems additionally kicking 
in and distracting every so often too.

> So please be gentle with us.

 As always, but also emphatic on this occasion.  We're in the same boat 
really, striving against the lack of resources and issues piling, and 
now we've made some progress.  Thank you for your understanding.

  Maciej
Frederic Weisbecker March 25, 2025, 1:31 p.m. UTC | #17
Le Sat, Mar 22, 2025 at 04:08:31PM +0000, Maciej W. Rozycki a écrit :
> On Fri, 21 Mar 2025, Frederic Weisbecker wrote:
> 
> > > > diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> > > > index a572ce36a24f..4e012421d00f 100644
> > > > --- a/arch/mips/kernel/genex.S
> > > > +++ b/arch/mips/kernel/genex.S
> > > > @@ -104,27 +104,30 @@ handle_vcei:
> > > >  
> > > >  	__FINIT
> > > >  
> > > > -	.align	5	/* 32 byte rollback region */
> > > > +	.align	5
> > > >  LEAF(__r4k_wait)
> > > >  	.set	push
> > > >  	.set	noreorder
> > > > -	/* start of rollback region */
> > > > -	LONG_L	t0, TI_FLAGS($28)
> > > > -	nop
> > > > -	andi	t0, _TIF_NEED_RESCHED
> > > > -	bnez	t0, 1f
> > > > -	 nop
> > > > -	nop
> > > > -	nop
> > > > -#ifdef CONFIG_CPU_MICROMIPS
> > > > -	nop
> > > > -	nop
> > > > -	nop
> > > > -	nop
> > > > -#endif
> > > > +	/* Start of idle interrupt region. */
> > > > +	MFC0	t0, CP0_STATUS
> > > > +	/* Enable interrupt. */
> > > > +	ori 	t0, 0x1f
> > > 
> > >  This instruction sequence still suffers from the coprocessor move delay 
> > > hazard.  How many times do I need to request to get it fixed (counting 
> > > three so far)?
> > 
> > This is because your request had follow-ups from Huacai and Marco that
> > were left unanswered:
> > 
> >      https://lore.kernel.org/all/CAAhV-H5ptAzHTPAr1bxrgByZrnFmMK8zJ68Z++RwC=NHWjqZmw@mail.gmail.com/
> 
>  The conclusion made there is however wrong: `local_irq_enable' code 
> plays no tricks with instruction scheduling and lets the toolchain 
> resolve any pipeline hazards automatically, while `__r4k_wait' arranges 
> for instructions to be scheduled by hand and any hazards resolved by the 
> human writer of the code.  There's even explicit `.set reorder' in 
> `local_irq_enable', which is redundant, because it's the default mode 
> for inline assembly.
> 
>  And I can't emphasise it enough: manual instruction scheduling is tough
> and ought to be restricted to cases where there is no other way really, 
> such as for placing an instruction in a branch delay slot where there is 
> a data antidependency between the branch and the delay-slot instruction.  
> Yet this approach has often been used by code authors for other reasons 
> (or I daresay no reason at all), leaving it up to the maintainers to 
> keep the code working in the changing conditions while the submitter has 
> long gone.  I converted some of such code in the past, but it also takes 
> time and effort that does not come for free.
> 
> >      https://lore.kernel.org/all/CAAofZF4HAczyRmuRe-JmQ2wcZatevLwGTOMLf1V1okGbj7q5Wg@mail.gmail.com/
> 
>  I missed that one, sorry.  A ping would have helped, and I never have 
> an issue with being pinged.  I do hope I have now addressed that concern 
> with my other reply.

Hopefully, I'll let Marco follow-up on that as I must confess I'm lost
with these details. But your help has been very valuable!

> 
> > We have detected this longstanding architecture specific timer handling bug on
> > loongson and MIPS and we could have just dropped a report and let you guys deal with
> > it. Instead we decided to spend time ourselves (especially Marco) working on
> > fixes for these architectures we don't run and which we are not familiar with,
> > alongway taking reviews seriously and patiently re-iterating accordingly.
> 
>  Thank you for your effort, really appreciated.  Any fixes need to be 
> technically correct however, it makes no sense to get one bug replaced 
> with another one.  We've got enough technical debt accumulated already 
> with a platform that no longer has any commercial support and relies 
> solely on voluteers keeping it alive in their limited spare time.  I do 
> have a long list of outstanding issues to address and ever so little 
> time to take care of them, with hardware problems additionally kicking 
> in and distracting every so often too.

Yeah I totally understand that!

> 
> > So please be gentle with us.
> 
>  As always, but also emphatic on this occasion.  We're in the same boat 
> really, striving against the lack of resources and issues piling, and 
> now we've made some progress.  Thank you for your understanding.

Heh I know... Thanks a lot!

> 
>   Maciej
Marco Crivellari March 25, 2025, 2:08 p.m. UTC | #18
Hi Maciej,

Thanks a lot for all the information.

>  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> the assembler will therefore resolve the hazard by inserting a NOP where
> required by the architecture level requested (with `-march=...', etc.).
> Try compiling that function for a MIPS III configuration such as
> decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> what machine code is produced.

I tried with the configuration you suggested, and indeed I can see a NOP.

>  Whatever manual you quote it refers to MIPS Release 2, which is only
> dated 2003

About the MIPS manual, anyhow, it is "MIPS32 M4 Processor Core" (year 2008).
Maybe I've also picked the wrong manual.

I've also found the manual you mentioned online, thanks.

>  Best is to avoid using a `.set noreorder' region in the first place.
> But is it really needed here?  Does the rollback area have to be of a
> hardcoded size rather than one calculated by the assembler based on
> actual machine code produced?  It seems to me having it calculated would
> reduce complexity here and let us use the EI instruction where available
> as well.

Well, considering the complexity and how the code looks fragile even with
a small change, yes, that's likely better to avoid noreorder.

I think I'm going to need some guidance here.
Please, correct me where something is wrong.

1)
When you say "let us use the EI instruction where available" are you
referring to do
something like below?

#if defined(CONFIG_CPU_HAS_DIEI)
ei
#else
MFC0    t0, CP0_STATUS
ori     t0, 0x1f
xori    t0, 0x1e
MTC0    t0, CP0_STATUS
#endif

2)
Removing "noreorder" would let the compiler add "nops" where they are needed.
But that still means the 3 ssnop and ehb are still needed, right?

My subsequent dumb question is: there is the guarantee that the
compiler will not
reorder / change something we did?
This question also came after reading the manual you quoted (paragraph
"Coprocessor Hazards"):

"For example, after an mtc0 to the Status register which changes an
interrupt mask bit,
there will be two further instructions before the interrupt is
actually enabled or disabled.
[...]
To cope with these situations usually requires the programmer to take explicit
action to prevent the assembler from scheduling inappropriate
instructions after a
dangerous mtc0. This is done by using the .set noreorder directive,
discussed below"

3)
Considering the size is determined by the compiler, the check about
the idle interrupt
size region should not be needed, correct?

4)
ori and PTR_ADDIU should be removed of course from the rollback handler macro.
Can I have some hints about the needed change?
Using QEMU is always working, so I'm not sure if what I will change is
also correct.


Many thanks in advance, also for your time!




On Fri, Mar 21, 2025 at 9:11 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
>
> On Fri, 21 Mar 2025, Marco Crivellari wrote:
>
> > >  This instruction sequence still suffers from the coprocessor move delay
> > > hazard.  How many times do I need to request to get it fixed (counting
> > > three so far)?
> >
> > Can I have more details about this?
> >
> > I can see it is the same code present also in local_irq_enable()
> > (arch_local_irq_enable()),
>
>  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> the assembler will therefore resolve the hazard by inserting a NOP where
> required by the architecture level requested (with `-march=...', etc.).
> Try compiling that function for a MIPS III configuration such as
> decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> what machine code is produced.
>
> > and from the manual I've seen:
> >
> > "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> > number of unrelated instructions (such as NOPs or SSNOPs) that,
> > prior to the capabilities of Release 2, would need to be placed
> > between the producer and consumer of the hazard in order to ensure
> > that the effects of the first instruction are seen by the second instruction."
> >
> > The "Spacing column" value is 3, indeed.
> >
> > "With the hazard elimination instructions available in Release 2, the
> > preferred method to eliminate hazards is to place one of the
> > instructions listed in Table 2.8 between the producer and consumer of the
> > hazard. Execution hazards can be removed by using the EHB [...]"
>
>  Whatever manual you quote it refers to MIPS Release 2, which is only
> dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to
> continue handling older architecture revisions going back to MIPS III
> ISA from 1991.  We also support MIPS I ISA from 1985 which has further
> instruction scheduling requirements, but `__r4k_wait' is for newer
> processors only, because older ones had no WAIT instruction, so we can
> ignore them (but note that the MIPS I load delay slot is regardless
> observed in current code in the form of a NOP inserted after LONG_L).
>
> > What am I missing?
>
>  This is common MIPS knowledge really, encoded in the GNU toolchain and
> especially GAS since forever.  While I can't cite a canonical reference,
> the hazard is listed e.g. in Table 13.1 "Instructions with scheduling
> implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from
> "IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0,
> from October 1996.  I do believe the document is available online.
>
>  I'm fairly sure the hazard is also listed there in Dominic Sweetman's
> "See MIPS Run Linux," but I don't have my copy handy right now.
>
>  Best is to avoid using a `.set noreorder' region in the first place.
> But is it really needed here?  Does the rollback area have to be of a
> hardcoded size rather than one calculated by the assembler based on
> actual machine code produced?  It seems to me having it calculated would
> reduce complexity here and let us use the EI instruction where available
> as well.
>
>  HTH,
>
>   Maciej



--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
Huacai Chen March 26, 2025, 1:20 a.m. UTC | #19
On Tue, Mar 25, 2025 at 10:09 PM Marco Crivellari
<marco.crivellari@suse.com> wrote:
>
> Hi Maciej,
>
> Thanks a lot for all the information.
>
> >  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> > the assembler will therefore resolve the hazard by inserting a NOP where
> > required by the architecture level requested (with `-march=...', etc.).
> > Try compiling that function for a MIPS III configuration such as
> > decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> > what machine code is produced.
>
> I tried with the configuration you suggested, and indeed I can see a NOP.
>
> >  Whatever manual you quote it refers to MIPS Release 2, which is only
> > dated 2003
>
> About the MIPS manual, anyhow, it is "MIPS32 M4 Processor Core" (year 2008).
> Maybe I've also picked the wrong manual.
>
> I've also found the manual you mentioned online, thanks.
>
> >  Best is to avoid using a `.set noreorder' region in the first place.
> > But is it really needed here?  Does the rollback area have to be of a
> > hardcoded size rather than one calculated by the assembler based on
> > actual machine code produced?  It seems to me having it calculated would
> > reduce complexity here and let us use the EI instruction where available
> > as well.
>
> Well, considering the complexity and how the code looks fragile even with
> a small change, yes, that's likely better to avoid noreorder.
In my opinion keeping "noreorder" is the simplest, which means just
add an "nop" after MFC0 in the current version.

Huacai

>
> I think I'm going to need some guidance here.
> Please, correct me where something is wrong.
>
> 1)
> When you say "let us use the EI instruction where available" are you
> referring to do
> something like below?
>
> #if defined(CONFIG_CPU_HAS_DIEI)
> ei
> #else
> MFC0    t0, CP0_STATUS
> ori     t0, 0x1f
> xori    t0, 0x1e
> MTC0    t0, CP0_STATUS
> #endif
>
> 2)
> Removing "noreorder" would let the compiler add "nops" where they are needed.
> But that still means the 3 ssnop and ehb are still needed, right?
>
> My subsequent dumb question is: there is the guarantee that the
> compiler will not
> reorder / change something we did?
> This question also came after reading the manual you quoted (paragraph
> "Coprocessor Hazards"):
>
> "For example, after an mtc0 to the Status register which changes an
> interrupt mask bit,
> there will be two further instructions before the interrupt is
> actually enabled or disabled.
> [...]
> To cope with these situations usually requires the programmer to take explicit
> action to prevent the assembler from scheduling inappropriate
> instructions after a
> dangerous mtc0. This is done by using the .set noreorder directive,
> discussed below"
>
> 3)
> Considering the size is determined by the compiler, the check about
> the idle interrupt
> size region should not be needed, correct?
>
> 4)
> ori and PTR_ADDIU should be removed of course from the rollback handler macro.
> Can I have some hints about the needed change?
> Using QEMU is always working, so I'm not sure if what I will change is
> also correct.
>
>
> Many thanks in advance, also for your time!
>
>
>
>
> On Fri, Mar 21, 2025 at 9:11 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
> >
> > On Fri, 21 Mar 2025, Marco Crivellari wrote:
> >
> > > >  This instruction sequence still suffers from the coprocessor move delay
> > > > hazard.  How many times do I need to request to get it fixed (counting
> > > > three so far)?
> > >
> > > Can I have more details about this?
> > >
> > > I can see it is the same code present also in local_irq_enable()
> > > (arch_local_irq_enable()),
> >
> >  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> > the assembler will therefore resolve the hazard by inserting a NOP where
> > required by the architecture level requested (with `-march=...', etc.).
> > Try compiling that function for a MIPS III configuration such as
> > decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> > what machine code is produced.
> >
> > > and from the manual I've seen:
> > >
> > > "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> > > number of unrelated instructions (such as NOPs or SSNOPs) that,
> > > prior to the capabilities of Release 2, would need to be placed
> > > between the producer and consumer of the hazard in order to ensure
> > > that the effects of the first instruction are seen by the second instruction."
> > >
> > > The "Spacing column" value is 3, indeed.
> > >
> > > "With the hazard elimination instructions available in Release 2, the
> > > preferred method to eliminate hazards is to place one of the
> > > instructions listed in Table 2.8 between the producer and consumer of the
> > > hazard. Execution hazards can be removed by using the EHB [...]"
> >
> >  Whatever manual you quote it refers to MIPS Release 2, which is only
> > dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to
> > continue handling older architecture revisions going back to MIPS III
> > ISA from 1991.  We also support MIPS I ISA from 1985 which has further
> > instruction scheduling requirements, but `__r4k_wait' is for newer
> > processors only, because older ones had no WAIT instruction, so we can
> > ignore them (but note that the MIPS I load delay slot is regardless
> > observed in current code in the form of a NOP inserted after LONG_L).
> >
> > > What am I missing?
> >
> >  This is common MIPS knowledge really, encoded in the GNU toolchain and
> > especially GAS since forever.  While I can't cite a canonical reference,
> > the hazard is listed e.g. in Table 13.1 "Instructions with scheduling
> > implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from
> > "IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0,
> > from October 1996.  I do believe the document is available online.
> >
> >  I'm fairly sure the hazard is also listed there in Dominic Sweetman's
> > "See MIPS Run Linux," but I don't have my copy handy right now.
> >
> >  Best is to avoid using a `.set noreorder' region in the first place.
> > But is it really needed here?  Does the rollback area have to be of a
> > hardcoded size rather than one calculated by the assembler based on
> > actual machine code produced?  It seems to me having it calculated would
> > reduce complexity here and let us use the EI instruction where available
> > as well.
> >
> >  HTH,
> >
> >   Maciej
>
>
>
> --
>
> Marco Crivellari
>
> L3 Support Engineer, Technology & Product
>
>
>
>
> marco.crivellari@suse.com
Marco Crivellari March 26, 2025, 9:46 a.m. UTC | #20
I'm mostly thinking about future changes in this part of the code.
But if it is ok what we have now, and future changes are not a
problem, let's keep this version.

Would this be ok with you @Maciej?

If so, the region is now 40 bytes. This is what I did yesterday:

@@ -110,6 +110,7 @@ LEAF(__r4k_wait)
       .set    noreorder
       /* Start of idle interrupt region. */
       MFC0    t0, CP0_STATUS
+       nop
       /* Enable interrupt. */
       ori     t0, 0x1f
       xori    t0, 0x1e
@@ -128,7 +129,11 @@ LEAF(__r4k_wait)
        */
       wait
       /* End of idle interrupt region. */
-1:
+__r4k_wait_exit:
+       /* Check idle interrupt region size. */
+       .if ((__r4k_wait_exit - __r4k_wait) != 40)
+       .error  "Idle interrupt region size mismatch: expected 40 bytes."
+       .endif
       jr      ra
        nop
       .set    pop
@@ -139,10 +144,10 @@ LEAF(__r4k_wait)
       .set    push
       .set    noat
       MFC0    k0, CP0_EPC
-       PTR_LA  k1, 1b
-       /* 36 byte idle interrupt region. */
+       PTR_LA  k1, __r4k_wait_exit
+       /* 40 byte idle interrupt region. */
       ori     k0, 0x1f
-       PTR_ADDIU       k0, 5
+       PTR_ADDIU       k0, 9
       bne     k0, k1, \handler
       MTC0    k0, CP0_EPC
       .set pop

Under QEMU is working, but I'm not so sure the latest part is correct.

Thanks!



On Wed, Mar 26, 2025 at 2:20 AM Huacai Chen <chenhuacai@kernel.org> wrote:
>
> On Tue, Mar 25, 2025 at 10:09 PM Marco Crivellari
> <marco.crivellari@suse.com> wrote:
> >
> > Hi Maciej,
> >
> > Thanks a lot for all the information.
> >
> > >  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> > > the assembler will therefore resolve the hazard by inserting a NOP where
> > > required by the architecture level requested (with `-march=...', etc.).
> > > Try compiling that function for a MIPS III configuration such as
> > > decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> > > what machine code is produced.
> >
> > I tried with the configuration you suggested, and indeed I can see a NOP.
> >
> > >  Whatever manual you quote it refers to MIPS Release 2, which is only
> > > dated 2003
> >
> > About the MIPS manual, anyhow, it is "MIPS32 M4 Processor Core" (year 2008).
> > Maybe I've also picked the wrong manual.
> >
> > I've also found the manual you mentioned online, thanks.
> >
> > >  Best is to avoid using a `.set noreorder' region in the first place.
> > > But is it really needed here?  Does the rollback area have to be of a
> > > hardcoded size rather than one calculated by the assembler based on
> > > actual machine code produced?  It seems to me having it calculated would
> > > reduce complexity here and let us use the EI instruction where available
> > > as well.
> >
> > Well, considering the complexity and how the code looks fragile even with
> > a small change, yes, that's likely better to avoid noreorder.
> In my opinion keeping "noreorder" is the simplest, which means just
> add an "nop" after MFC0 in the current version.
>
> Huacai
>
> >
> > I think I'm going to need some guidance here.
> > Please, correct me where something is wrong.
> >
> > 1)
> > When you say "let us use the EI instruction where available" are you
> > referring to do
> > something like below?
> >
> > #if defined(CONFIG_CPU_HAS_DIEI)
> > ei
> > #else
> > MFC0    t0, CP0_STATUS
> > ori     t0, 0x1f
> > xori    t0, 0x1e
> > MTC0    t0, CP0_STATUS
> > #endif
> >
> > 2)
> > Removing "noreorder" would let the compiler add "nops" where they are needed.
> > But that still means the 3 ssnop and ehb are still needed, right?
> >
> > My subsequent dumb question is: there is the guarantee that the
> > compiler will not
> > reorder / change something we did?
> > This question also came after reading the manual you quoted (paragraph
> > "Coprocessor Hazards"):
> >
> > "For example, after an mtc0 to the Status register which changes an
> > interrupt mask bit,
> > there will be two further instructions before the interrupt is
> > actually enabled or disabled.
> > [...]
> > To cope with these situations usually requires the programmer to take explicit
> > action to prevent the assembler from scheduling inappropriate
> > instructions after a
> > dangerous mtc0. This is done by using the .set noreorder directive,
> > discussed below"
> >
> > 3)
> > Considering the size is determined by the compiler, the check about
> > the idle interrupt
> > size region should not be needed, correct?
> >
> > 4)
> > ori and PTR_ADDIU should be removed of course from the rollback handler macro.
> > Can I have some hints about the needed change?
> > Using QEMU is always working, so I'm not sure if what I will change is
> > also correct.
> >
> >
> > Many thanks in advance, also for your time!
> >
> >
> >
> >
> > On Fri, Mar 21, 2025 at 9:11 PM Maciej W. Rozycki <macro@orcam.me.uk> wrote:
> > >
> > > On Fri, 21 Mar 2025, Marco Crivellari wrote:
> > >
> > > > >  This instruction sequence still suffers from the coprocessor move delay
> > > > > hazard.  How many times do I need to request to get it fixed (counting
> > > > > three so far)?
> > > >
> > > > Can I have more details about this?
> > > >
> > > > I can see it is the same code present also in local_irq_enable()
> > > > (arch_local_irq_enable()),
> > >
> > >  Unlike `__r4k_wait' that code is not in a `.set noreorder' region and
> > > the assembler will therefore resolve the hazard by inserting a NOP where
> > > required by the architecture level requested (with `-march=...', etc.).
> > > Try compiling that function for a MIPS III configuration such as
> > > decstation_r4k_defconfig or just by hand with `-march=mips3' and see
> > > what machine code is produced.
> > >
> > > > and from the manual I've seen:
> > > >
> > > > "The Spacing column shown in Table 2.6 and Table 2.7 indicates the
> > > > number of unrelated instructions (such as NOPs or SSNOPs) that,
> > > > prior to the capabilities of Release 2, would need to be placed
> > > > between the producer and consumer of the hazard in order to ensure
> > > > that the effects of the first instruction are seen by the second instruction."
> > > >
> > > > The "Spacing column" value is 3, indeed.
> > > >
> > > > "With the hazard elimination instructions available in Release 2, the
> > > > preferred method to eliminate hazards is to place one of the
> > > > instructions listed in Table 2.8 between the producer and consumer of the
> > > > hazard. Execution hazards can be removed by using the EHB [...]"
> > >
> > >  Whatever manual you quote it refers to MIPS Release 2, which is only
> > > dated 2003 (following Release 1 from 2001), but `__r4k_wait' has to
> > > continue handling older architecture revisions going back to MIPS III
> > > ISA from 1991.  We also support MIPS I ISA from 1985 which has further
> > > instruction scheduling requirements, but `__r4k_wait' is for newer
> > > processors only, because older ones had no WAIT instruction, so we can
> > > ignore them (but note that the MIPS I load delay slot is regardless
> > > observed in current code in the form of a NOP inserted after LONG_L).
> > >
> > > > What am I missing?
> > >
> > >  This is common MIPS knowledge really, encoded in the GNU toolchain and
> > > especially GAS since forever.  While I can't cite a canonical reference,
> > > the hazard is listed e.g. in Table 13.1 "Instructions with scheduling
> > > implications" and Table 13.3 "R4xxx/R5000 Coprocessor 0 Hazards" from
> > > "IDT MIPS Microprocessor Family Software Reference Manual," Version 2.0,
> > > from October 1996.  I do believe the document is available online.
> > >
> > >  I'm fairly sure the hazard is also listed there in Dominic Sweetman's
> > > "See MIPS Run Linux," but I don't have my copy handy right now.
> > >
> > >  Best is to avoid using a `.set noreorder' region in the first place.
> > > But is it really needed here?  Does the rollback area have to be of a
> > > hardcoded size rather than one calculated by the assembler based on
> > > actual machine code produced?  It seems to me having it calculated would
> > > reduce complexity here and let us use the EI instruction where available
> > > as well.
> > >
> > >  HTH,
> > >
> > >   Maciej
> >
> >
> >
> > --
> >
> > Marco Crivellari
> >
> > L3 Support Engineer, Technology & Product
> >
> >
> >
> >
> > marco.crivellari@suse.com



--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com
Thomas Bogendoerfer March 26, 2025, 11:34 a.m. UTC | #21
On Wed, Mar 26, 2025 at 10:46:08AM +0100, Marco Crivellari wrote:
> I'm mostly thinking about future changes in this part of the code.
> But if it is ok what we have now, and future changes are not a
> problem, let's keep this version.
> 
> Would this be ok with you @Maciej?
> 
> If so, the region is now 40 bytes. This is what I did yesterday:
> 
> @@ -110,6 +110,7 @@ LEAF(__r4k_wait)
>        .set    noreorder
>        /* Start of idle interrupt region. */
>        MFC0    t0, CP0_STATUS
> +       nop
>        /* Enable interrupt. */
>        ori     t0, 0x1f
>        xori    t0, 0x1e
> @@ -128,7 +129,11 @@ LEAF(__r4k_wait)
>         */
>        wait
>        /* End of idle interrupt region. */
> -1:
> +__r4k_wait_exit:
> +       /* Check idle interrupt region size. */
> +       .if ((__r4k_wait_exit - __r4k_wait) != 40)
> +       .error  "Idle interrupt region size mismatch: expected 40 bytes."
> +       .endif
>        jr      ra
>         nop
>        .set    pop
> @@ -139,10 +144,10 @@ LEAF(__r4k_wait)
>        .set    push
>        .set    noat
>        MFC0    k0, CP0_EPC
> -       PTR_LA  k1, 1b
> -       /* 36 byte idle interrupt region. */
> +       PTR_LA  k1, __r4k_wait_exit
> +       /* 40 byte idle interrupt region. */

IMHO, we can't extend this above 36 bytes, because the interrupt could
hit at the instruction before the wait, which is then in the
next 32byte block.

I was thinking about aligning __r4k_wait 16 byte earlier and place
the wait at the end of the 

Something like:

	.align 5
	.align 4
LEAF(__r4k_wait)
	// irq enable sequence
	wait // align to end of the 32byte block
_r4k_wait_exit:

>        ori     k0, 0x1f
> -       PTR_ADDIU       k0, 5
> +       PTR_ADDIU       k0, 9

this needs to
	PTR_ADDIU       k0, 1

then. and __r4k_wait_exit - __r4k_wait is 48

But there might better ways...

Thomas.
Maciej W. Rozycki March 26, 2025, 4:01 p.m. UTC | #22
On Wed, 26 Mar 2025, Marco Crivellari wrote:

> I'm mostly thinking about future changes in this part of the code.
> But if it is ok what we have now, and future changes are not a
> problem, let's keep this version.
> 
> Would this be ok with you @Maciej?

 Nope, I think it's the wrong direction and I have a longer reply in the 
queue, but I yet need some time to complete it.  I just got off the plane 
having been away from home for a month and I have other stuff to do first.  
If it was broken for ~30 years, it can wait a couple days yet.

  Maciej
Maciej W. Rozycki March 28, 2025, 10:42 a.m. UTC | #23
Hi Marco,

> >  Whatever manual you quote it refers to MIPS Release 2, which is only
> > dated 2003
> 
> About the MIPS manual, anyhow, it is "MIPS32 M4 Processor Core" (year 2008).
> Maybe I've also picked the wrong manual.

 It has to be M4K there, and that isn't the most fortunate choice, because 
it's a manual for the specific microarchitecture and then one that doesn't 
usually run Linux, because it has no TLB.  For MIPS Release 1 onwards the 
architecture manuals are a better choice, but still they don't necessarily 
get the details right for older legacy MIPS ISA revisions.

 For your reference the architecture manuals are:

- "MIPS32 Architecture For Programmers, Volume I: Introduction to the 
   MIPS32 Architecture",

- "MIPS32 Architecture For Programmers, Volume II: The MIPS32 Instruction 
   Set",

- "MIPS32 Architecture For Programmers, Volume III: The MIPS32 Privileged 
   Resource Architecture",

and their MIPS64 counterparts, released repeatedly as the architecture 
evolved with the document's major revision number matching the respective 
architecture release.  I do believe the most recent revisions continue 
being available from the MIPS web site.  But as the titles imply these 
manuals document the architecture and not the intricacies of the MIPS 
assembly language dialect or the various aspects of programming for the 
architecture.  For that you need another book.

> >  Best is to avoid using a `.set noreorder' region in the first place.
> > But is it really needed here?  Does the rollback area have to be of a
> > hardcoded size rather than one calculated by the assembler based on
> > actual machine code produced?  It seems to me having it calculated would
> > reduce complexity here and let us use the EI instruction where available
> > as well.
> 
> Well, considering the complexity and how the code looks fragile even with
> a small change, yes, that's likely better to avoid noreorder.

 FAOD this is my general observation, regardless of its applicability 
here.

> I think I'm going to need some guidance here.
> Please, correct me where something is wrong.
> 
> 1)
> When you say "let us use the EI instruction where available" are you
> referring to do
> something like below?
> 
> #if defined(CONFIG_CPU_HAS_DIEI)
> ei
> #else
> MFC0    t0, CP0_STATUS
> ori     t0, 0x1f
> xori    t0, 0x1e
> MTC0    t0, CP0_STATUS
> #endif

 I think let's just simplify this stuff a lot and make use of the existing 
code infrastructure.  Since we move interrupt enabling into `__r4k_wait', 
we can just get rid of the C wrapper the existence of which makes no sense 
anymore along with the extra nesting of function calls, and have something 
along the lines of:

	.section .cpuidle.text,"ax"
	/* Align to 32 bytes for the maximum idle interrupt region size.  */
	.align	5
LEAF(r4k_wait)
	/* Keep the ISA bit clear for calculations on local labels here.  */
0:	.fill	0
	/* Start of idle interrupt region.  */
	local_irq_enable
	/*
	 * If an interrupt lands here, before going idle on the next
	 * instruction, we must *NOT* go idle since the interrupt could
	 * have set TIF_NEED_RESCHED or caused a timer to need resched.
	 * Fall through -- see rollback_handler below -- and have the
	 * idle loop take care of things.
	 */
1:	.fill	0
	/* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up.  */
	.if	1b - 0b > 32
	.error	"overlong idle interrupt region"
	.elseif	1b - 0b > 8
	.align	4
	.endif
2:	.fill	0
	.equ	r4k_wait_idle_size, 2b - 0b
	/* End of idle interrupt region; size has to be a power of 2.  */
	.set	MIPS_ISA_ARCH_LEVEL_RAW
r4k_wait_insn:
	wait
	.set	mips0
	local_irq_disable
	jr	ra
	END(r4k_wait)
	.previous

	.macro	BUILD_ROLLBACK_PROLOGUE handler
	FEXPORT(rollback_\handler)
	.set	push
	.set	noat
	MFC0	k0, CP0_EPC
	/* Subtract 2 to let the ISA bit propagate through the mask.  */
	PTR_LA	k1, r4k_wait_insn - 2
	ori	k0, r4k_wait_idle_size - 2
	bne	k0, k1, \handler
	MTC0	k0, CP0_EPC
	.set pop
	.endm

There's some complication here coming from the need to factor in the ISA 
bit in the microMIPS mode; something that hasn't been discussed so far.  
The `.fill 0' approach is a hack and it has struck me that we need to add 
a `.noinsn' pseudo-op to GAS for this purpose, complementing `.insn', but 
we need to stick with the hack for now anyway as it will take years until 
we can rely on a new feature in the assembler.

 NB don't refer to a local label from a macro as such a reference may end 
up pointing not where you want it to from the place the macro is pasted 
at.  For example with your v6 code BUILD_ROLLBACK_PROLOGUE refers to the 
intended label in `__r4k_wait' when pasted for `handle_int', but then to 
the label in `except_vec4' instead when pasted for `except_vec_vi' further 
down.

> 2)
> Removing "noreorder" would let the compiler add "nops" where they are needed.

 The assembler, not the compiler.

> But that still means the 3 ssnop and ehb are still needed, right?

 Yes, in the pessimistic case, which the code above avoids where not 
needed.  E.g. for R2 we now have:

807f1700 <r4k_wait>:
807f1700:	41606020 	ei
807f1704:	000000c0 	ehb

807f1708 <r4k_wait_insn>:
807f1708:	42000020 	wait
807f170c:	41606000 	di
807f1710:	000000c0 	ehb
807f1714:	03e00008 	jr	ra
807f1718:	00000000 	nop
	...

-- nice and sweet, and for the R10k likewise:

a80000000084e540 <r4k_wait>:
a80000000084e540:	400c6000 	mfc0	t0,$12
a80000000084e544:	358c0001 	ori	t0,t0,0x1
a80000000084e548:	408c6000 	mtc0	t0,$12
a80000000084e54c:	00000000 	nop

a80000000084e550 <r4k_wait_insn>:
a80000000084e550:	42000020 	wait
a80000000084e554: 	400c6000 	mfc0	t0,$12
a80000000084e558: 	358c0001 	ori	t0,t0,0x1
a80000000084e55c: 	398c0001 	xori	t0,t0,0x1
a80000000084e560: 	408c6000 	mtc0	t0,$12
a80000000084e564: 	03e00008 	jr	ra
a80000000084e568: 	00000000 	nop
	...

-- because it's fully interlocked, so no extra NOPs other than to pad the 
idle interrupt region to a power-of-two size.

> My subsequent dumb question is: there is the guarantee that the
> compiler will not
> reorder / change something we did?

 Not in this case; the assembler isn't that smart (which is why compiled 
code is usually a better choice where feasible) and except for one case 
all it can do is adding extra NOPs between instructions to avoid pipeline 
hazards (and then ones coming from data dependencies in register use 
only), including ones between non-adjacent instruction pairs.

 The only exception are jumps/branches where the assembler will schedule 
their delay slot where possible by swapping the jump/branch with the 
immediately preceding instruction where no data dependency exists between 
the two instructions.  Otherwise the delay slot will be filled with a NOP.

 For example with the code sequences above the last instruction produced 
by `local_irq_disable' could be scheduled into the delay slot of `jr ra'.  
It wouldn't be an issue here however and it doesn't actually happen with 
GAS, likely because `local_irq_disable' is a user macro.  For built-in 
instruction macros such as `la' the swapping of the final instruction does 
happen.

> This question also came after reading the manual you quoted (paragraph
> "Coprocessor Hazards"):
> 
> "For example, after an mtc0 to the Status register which changes an
> interrupt mask bit,
> there will be two further instructions before the interrupt is
> actually enabled or disabled.
> [...]
> To cope with these situations usually requires the programmer to take explicit
> action to prevent the assembler from scheduling inappropriate
> instructions after a
> dangerous mtc0. This is done by using the .set noreorder directive,
> discussed below"

 Technically it is correct and likely the original MIPSCO assembler from 
1985 or one supplied with IRIX were smarter, but GAS won't itself ever 
reorder instructions other than to fill branch delay slots, so we don't 
have to be worried.  Here's the relevant comment from GAS sources:

      /* There are a lot of optimizations we could do that we don't.
	 In particular, we do not, in general, reorder instructions.
	 If you use gcc with optimization, it will reorder
	 instructions and generally do much more optimization then we
	 do here; repeating all that work in the assembler would only
	 benefit hand written assembly code, and does not seem worth
	 it.  */

And in cases like this where we just don't want a bunch of instructions to 
be moved across a certain point we don't actually have to schedule any 
code by hand as a lone:

	.set	noreorder
	.set	reorder

barrier will do (or actually just a label should as well).

> 3)
> Considering the size is determined by the compiler, the check about
> the idle interrupt
> size region should not be needed, correct?

 I gave it some thought and concluded that the interrupt handling path has 
to be optimised for performance and the idle routine does not.  Therefore 
I think we need to stick to the power-of-two size for the idle interrupt 
region, because in that case the check for an interrupt arriving within 
`r4k_wait' but ahead of the WAIT instruction can be done with a pair of 
ALU operations and a single branch.  Anything more complex would require 
more operations in the interrupt handling path.

> 4)
> ori and PTR_ADDIU should be removed of course from the rollback handler macro.

 I can't imagine how we'd advance past WAIT without these instructions, 
what do you have in mind?

> Can I have some hints about the needed change?
> Using QEMU is always working, so I'm not sure if what I will change is
> also correct.

 Any verification with real hardware can only be based on the probability 
of an interrupt arriving at the right time and the right conditions for a 
pending timer.  I'd expect such an event to be relatively rare, so for the 
large part I think we need to rely on correct code generation rather than 
run-time verification.  Making use of the existing infrastructure rather 
than having an ad-hoc handcoded variant improves robustness here and also 
means no change will likely be needed should any further platform variant 
of the various hazard resolution macros be added in the future.

 NB how do you actually verify this stuff with QEMU?  Is it by injecting 
an interrupt by hand at a chosen code location via GDB attached to QEMU's 
built-in debug stub?

> Many thanks in advance, also for your time!

 You're welcome.

 Below I've included a complete change based on the outline above.  It 
seems to do the right thing for a couple of my configurations, but I've 
only eyeballed the resulting code and haven't tried running it.  Most of 
my hardware doesn't implement the WAIT instruction anyway.

 The missing `.cpuidle.text' section assignment is a separate preexisting 
problem and the fix might be worth splitting off into a preparatory patch, 
for backporting or documentation purposes.

 You can add my:

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>

on top of yours when using this code, since it's still ultimately yours, 
just massaged a little.  FWIW I went ahead and chose to cook this piece up 
myself since I realised how complex this issue actually is and that it 
would take us forever to get all the individual aspects nailed over e-mail 
communication.

 Let me know if you find anything here unclear or have any questions or 
comments.

  Maciej

---
 arch/mips/include/asm/idle.h |    3 --
 arch/mips/kernel/genex.S     |   59 ++++++++++++++++++++++++-------------------
 arch/mips/kernel/idle.c      |    7 -----
 3 files changed, 34 insertions(+), 35 deletions(-)

linux-mips-idle-vs-timer.diff
Index: linux-macro/arch/mips/include/asm/idle.h
===================================================================
--- linux-macro.orig/arch/mips/include/asm/idle.h
+++ linux-macro/arch/mips/include/asm/idle.h
@@ -6,8 +6,7 @@
 #include <linux/linkage.h>
 
 extern void (*cpu_wait)(void);
-extern void r4k_wait(void);
-extern asmlinkage void __r4k_wait(void);
+extern asmlinkage void r4k_wait(void);
 extern void r4k_wait_irqoff(void);
 
 static inline int using_rollback_handler(void)
Index: linux-macro/arch/mips/kernel/genex.S
===================================================================
--- linux-macro.orig/arch/mips/kernel/genex.S
+++ linux-macro/arch/mips/kernel/genex.S
@@ -104,41 +104,48 @@ NESTED(except_vec3_r4000, 0, sp)
 
 	__FINIT
 
-	.align	5	/* 32 byte rollback region */
-LEAF(__r4k_wait)
-	.set	push
-	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
+	.section .cpuidle.text,"ax"
+	/* Align to 32 bytes for the maximum idle interrupt region size.  */
+	.align	5
+LEAF(r4k_wait)
+	/* Keep the ISA bit clear for calculations on local labels here.  */
+0:	.fill	0
+	/* Start of idle interrupt region.  */
+	local_irq_enable
+	/*
+	 * If an interrupt lands here, before going idle on the next
+	 * instruction, we must *NOT* go idle since the interrupt could
+	 * have set TIF_NEED_RESCHED or caused a timer to need resched.
+	 * Fall through -- see rollback_handler below -- and have the
+	 * idle loop take care of things.
+	 */
+1:	.fill	0
+	/* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up.  */
+	.if	1b - 0b > 32
+	.error	"overlong idle interrupt region"
+	.elseif	1b - 0b > 8
+	.align	4
+	.endif
+2:	.fill	0
+	.equ	r4k_wait_idle_size, 2b - 0b
+	/* End of idle interrupt region; size has to be a power of 2.  */
 	.set	MIPS_ISA_ARCH_LEVEL_RAW
+r4k_wait_insn:
 	wait
-	/* end of rollback region (the region size must be power of two) */
-1:
+	.set	mips0
+	local_irq_disable
 	jr	ra
-	 nop
-	.set	pop
-	END(__r4k_wait)
+	END(r4k_wait)
+	.previous
 
 	.macro	BUILD_ROLLBACK_PROLOGUE handler
 	FEXPORT(rollback_\handler)
 	.set	push
 	.set	noat
 	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
+	/* Subtract 2 to let the ISA bit propagate through the mask.  */
+	PTR_LA	k1, r4k_wait_insn - 2
+	ori	k0, r4k_wait_idle_size - 2
 	bne	k0, k1, \handler
 	MTC0	k0, CP0_EPC
 	.set pop
Index: linux-macro/arch/mips/kernel/idle.c
===================================================================
--- linux-macro.orig/arch/mips/kernel/idle.c
+++ linux-macro/arch/mips/kernel/idle.c
@@ -35,13 +35,6 @@ static void __cpuidle r3081_wait(void)
 	write_c0_conf(cfg | R30XX_CONF_HALT);
 }
 
-void __cpuidle r4k_wait(void)
-{
-	raw_local_irq_enable();
-	__r4k_wait();
-	raw_local_irq_disable();
-}
-
 /*
  * This variant is preferable as it allows testing need_resched and going to
  * sleep depending on the outcome atomically.  Unfortunately the "It is
Maciej W. Rozycki March 28, 2025, 2:18 p.m. UTC | #24
On Fri, 28 Mar 2025, Maciej W. Rozycki wrote:

> just massaged a little.  FWIW I went ahead and chose to cook this piece up 
> myself since I realised how complex this issue actually is and that it 
> would take us forever to get all the individual aspects nailed over e-mail 
> communication.

 And yet after this many of internal iterations I did manage to miss one 
bit.  In the optimised version proposed we need to explicitly skip over 
the WAIT instruction like this:

r4k_wait_insn:
	wait
r4k_wait_exit:

and then:

	.set	noreorder
	bne	k0, k1, \handler
	 PTR_ADDIU	k0, r4k_wait_exit - r4k_wait_insn + 2
	.set	reorder

(and here we have a legitimate use for `.set noreorder' to avoid wasting a 
NOP for the branch delay slot due to a data antidependency on $k0; it's 
fine to clobber $k0 in the branch-taken case as by definition the register 
is dead at the exit from this macro).

 Updated patch follows.  I think we also need to replace "rollback" with 
another name as with new code we don't roll back anymore.

  Maciej

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
---
 arch/mips/include/asm/idle.h |    3 --
 arch/mips/kernel/genex.S     |   63 +++++++++++++++++++++++++------------------
 arch/mips/kernel/idle.c      |    7 ----
 3 files changed, 38 insertions(+), 35 deletions(-)

linux-mips-idle-vs-timer.diff
Index: linux-macro/arch/mips/include/asm/idle.h
===================================================================
--- linux-macro.orig/arch/mips/include/asm/idle.h
+++ linux-macro/arch/mips/include/asm/idle.h
@@ -6,8 +6,7 @@
 #include <linux/linkage.h>
 
 extern void (*cpu_wait)(void);
-extern void r4k_wait(void);
-extern asmlinkage void __r4k_wait(void);
+extern asmlinkage void r4k_wait(void);
 extern void r4k_wait_irqoff(void);
 
 static inline int using_rollback_handler(void)
Index: linux-macro/arch/mips/kernel/genex.S
===================================================================
--- linux-macro.orig/arch/mips/kernel/genex.S
+++ linux-macro/arch/mips/kernel/genex.S
@@ -104,42 +104,53 @@ NESTED(except_vec3_r4000, 0, sp)
 
 	__FINIT
 
-	.align	5	/* 32 byte rollback region */
-LEAF(__r4k_wait)
-	.set	push
-	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
+	.section .cpuidle.text,"ax"
+	/* Align to 32 bytes for the maximum idle interrupt region size.  */
+	.align	5
+LEAF(r4k_wait)
+	/* Keep the ISA bit clear for calculations on local labels here.  */
+0:	.fill	0
+	/* Start of idle interrupt region.  */
+	local_irq_enable
+	/*
+	 * If an interrupt lands here, before going idle on the next
+	 * instruction, we must *NOT* go idle since the interrupt could
+	 * have set TIF_NEED_RESCHED or caused a timer to need resched.
+	 * Fall through -- see rollback_handler below -- and have the
+	 * idle loop take care of things.
+	 */
+1:	.fill	0
+	/* The R2 EI/EHB sequence takes 8 bytes, otherwise pad up.  */
+	.if	1b - 0b > 32
+	.error	"overlong idle interrupt region"
+	.elseif	1b - 0b > 8
+	.align	4
+	.endif
+2:	.fill	0
+	.equ	r4k_wait_idle_size, 2b - 0b
+	/* End of idle interrupt region; size has to be a power of 2.  */
 	.set	MIPS_ISA_ARCH_LEVEL_RAW
+r4k_wait_insn:
 	wait
-	/* end of rollback region (the region size must be power of two) */
-1:
+r4k_wait_exit:
+	.set	mips0
+	local_irq_disable
 	jr	ra
-	 nop
-	.set	pop
-	END(__r4k_wait)
+	END(r4k_wait)
+	.previous
 
 	.macro	BUILD_ROLLBACK_PROLOGUE handler
 	FEXPORT(rollback_\handler)
 	.set	push
 	.set	noat
 	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
+	/* Subtract/add 2 to let the ISA bit propagate through the mask.  */
+	PTR_LA	k1, r4k_wait_insn - 2
+	ori	k0, r4k_wait_idle_size - 2
+	.set	noreorder
 	bne	k0, k1, \handler
+	 PTR_ADDIU	k0, r4k_wait_exit - r4k_wait_insn + 2
+	.set	reorder
 	MTC0	k0, CP0_EPC
 	.set pop
 	.endm
Index: linux-macro/arch/mips/kernel/idle.c
===================================================================
--- linux-macro.orig/arch/mips/kernel/idle.c
+++ linux-macro/arch/mips/kernel/idle.c
@@ -35,13 +35,6 @@ static void __cpuidle r3081_wait(void)
 	write_c0_conf(cfg | R30XX_CONF_HALT);
 }
 
-void __cpuidle r4k_wait(void)
-{
-	raw_local_irq_enable();
-	__r4k_wait();
-	raw_local_irq_disable();
-}
-
 /*
  * This variant is preferable as it allows testing need_resched and going to
  * sleep depending on the outcome atomically.  Unfortunately the "It is
diff mbox series

Patch

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index a572ce36a24f..4e012421d00f 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -104,27 +104,30 @@  handle_vcei:
 
 	__FINIT
 
-	.align	5	/* 32 byte rollback region */
+	.align	5
 LEAF(__r4k_wait)
 	.set	push
 	.set	noreorder
-	/* start of rollback region */
-	LONG_L	t0, TI_FLAGS($28)
-	nop
-	andi	t0, _TIF_NEED_RESCHED
-	bnez	t0, 1f
-	 nop
-	nop
-	nop
-#ifdef CONFIG_CPU_MICROMIPS
-	nop
-	nop
-	nop
-	nop
-#endif
+	/* Start of idle interrupt region. */
+	MFC0	t0, CP0_STATUS
+	/* Enable interrupt. */
+	ori 	t0, 0x1f
+	xori	t0, 0x1e
+	MTC0	t0, CP0_STATUS
+	_ssnop
+	_ssnop
+	_ssnop
+	_ehb
 	.set	MIPS_ISA_ARCH_LEVEL_RAW
+	/*
+	 * If an interrupt lands here, between enabling interrupts above and
+	 * going idle on the next instruction, we must *NOT* go idle since the
+	 * interrupt could have set TIF_NEED_RESCHED or caused a timer to need
+	 * resched. Fall through -- see rollback_handler below -- and have
+	 * the idle loop take care of things.
+	 */
 	wait
-	/* end of rollback region (the region size must be power of two) */
+	/* End of idle interrupt region. */
 1:
 	jr	ra
 	 nop
@@ -136,9 +139,10 @@  LEAF(__r4k_wait)
 	.set	push
 	.set	noat
 	MFC0	k0, CP0_EPC
-	PTR_LA	k1, __r4k_wait
-	ori	k0, 0x1f	/* 32 byte rollback region */
-	xori	k0, 0x1f
+	PTR_LA	k1, 1b
+	/* 36 byte idle interrupt region. */
+	ori 	k0, 0x1f
+	PTR_ADDIU	k0, 5
 	bne	k0, k1, \handler
 	MTC0	k0, CP0_EPC
 	.set pop
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 5abc8b7340f8..1f74c0589f7e 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -37,7 +37,6 @@  static void __cpuidle r3081_wait(void)
 
 void __cpuidle r4k_wait(void)
 {
-	raw_local_irq_enable();
 	__r4k_wait();
 	raw_local_irq_disable();
 }