diff mbox series

[1/2] MIPS: Loongson-3: Enable COP2 usage in kernel

Message ID 1588153121-28507-1-git-send-email-chenhc@lemote.com (mailing list archive)
State Superseded
Headers show
Series [1/2] MIPS: Loongson-3: Enable COP2 usage in kernel | expand

Commit Message

Huacai Chen April 29, 2020, 9:38 a.m. UTC
From: chenj <chenj@lemote.com>

Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
mode by default. However, gslq/gssq (16-bytes load/store instructions)
overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
gssq for optimization in kernel, we should enable COP2 usage in kernel.
---
 arch/mips/boot/compressed/head.S   |  7 +++++++
 arch/mips/include/asm/mipsregs.h   |  1 +
 arch/mips/include/asm/stackframe.h | 12 ++++++++++++
 arch/mips/kernel/head.S            | 16 ++++++++++++++++
 arch/mips/kernel/r4k_switch.S      |  3 +++
 arch/mips/kernel/traps.c           |  3 +++
 6 files changed, 42 insertions(+)

Comments

Thomas Bogendoerfer April 29, 2020, 6:33 p.m. UTC | #1
On Wed, Apr 29, 2020 at 05:38:40PM +0800, Huacai Chen wrote:
> From: chenj <chenj@lemote.com>
> 
> Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
> mode by default. However, gslq/gssq (16-bytes load/store instructions)
> overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
> gssq for optimization in kernel, we should enable COP2 usage in kernel.

What aboout context switches ? Or is the copro only used by one kernel
driver ?

> diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> index ce40fbf..0f71540 100644
> --- a/arch/mips/include/asm/mipsregs.h
> +++ b/arch/mips/include/asm/mipsregs.h
> @@ -386,6 +386,7 @@
>  #define ST0_CU1			0x20000000
>  #define ST0_CU2			0x40000000
>  #define ST0_CU3			0x80000000
> +#define ST0_MM			0x40000000	/* Loongson-3 naming */

please use ST0_CU2, so everybody understands it's COO2

> @@ -450,7 +450,11 @@
>   */
>  		.macro	CLI
>  		mfc0	t0, CP0_STATUS
> +#ifdef CONFIG_CPU_LOONGSON64
> +		li	t1, ST0_CU0 | ST0_MM | STATMASK
> +#else
>  		li	t1, ST0_CU0 | STATMASK
> +#endif

you are doing this three time in this file. How about doing

#ifdef CONFIG_CPU_LOONGSON64
#define ST0_MASK	ST0_CU0 | ST0_CU2
#else
#define ST0_MASK	ST0_CU0
#endif

and use ST0_MASK ?

> diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
> index 3b02ffe..cdac82d 100644
> --- a/arch/mips/kernel/head.S
> +++ b/arch/mips/kernel/head.S
> @@ -45,18 +45,34 @@
>  
>  	.macro	setup_c0_status_pri
>  #ifdef CONFIG_64BIT
> +#ifdef CONFIG_CPU_LOONGSON64
> +	setup_c0_status ST0_KX|ST0_MM 0
> +#else
>  	setup_c0_status ST0_KX 0
> +#endif

same thing here.

> --- a/arch/mips/kernel/r4k_switch.S
> +++ b/arch/mips/kernel/r4k_switch.S
> @@ -53,6 +53,9 @@
>  	nor	a3, $0, a3
>  	and	a2, a3
>  	or	a2, t1
> +#ifdef CONFIG_CPU_LOONGSON64
> +	or	a2, ST0_MM
> +#endif

this looks wrong. If THERAD_STATUS is setup correct, you don't need
to mess with ST0_CU2 here.

Thomas.
Huacai Chen April 30, 2020, 7:54 a.m. UTC | #2
Hi, Thomas,


On Thu, Apr 30, 2020 at 2:33 AM Thomas Bogendoerfer
<tsbogend@alpha.franken.de> wrote:
>
> On Wed, Apr 29, 2020 at 05:38:40PM +0800, Huacai Chen wrote:
> > From: chenj <chenj@lemote.com>
> >
> > Loongson-3's COP2 is Multi-Media coprocessor, it is disabled in kernel
> > mode by default. However, gslq/gssq (16-bytes load/store instructions)
> > overrides the instruction format of lwc2/swc2. If we wan't to use gslq/
> > gssq for optimization in kernel, we should enable COP2 usage in kernel.
>
> What aboout context switches ? Or is the copro only used by one kernel
> driver ?
This patch only enable Multi-Media coprocessor (COP2) in the kernel,
which means it will lose ST0_CU2 when a process go back to user space
(user space use COP2 will trigger an exception and then grab COP2,
which is similar to FPU). And as a result, we need to modify
r4k_switch.S because the new process doesn't contain CU2 in
THERAD_STATUS.
>
> > diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> > index ce40fbf..0f71540 100644
> > --- a/arch/mips/include/asm/mipsregs.h
> > +++ b/arch/mips/include/asm/mipsregs.h
> > @@ -386,6 +386,7 @@
> >  #define ST0_CU1                      0x20000000
> >  #define ST0_CU2                      0x40000000
> >  #define ST0_CU3                      0x80000000
> > +#define ST0_MM                       0x40000000      /* Loongson-3 naming */
>
> please use ST0_CU2, so everybody understands it's COO2
I see that there is already an alias  ST0_XX for ST0_CU3, and I think
use a ST0_MM for ST0_CU2 is more meaningful in some places (at least
in traps.c where ST0_XX is also used). If there are places only used
to describe the CU Mask (such as in stackframe.h), I will use ST0_CU2.

>
> > @@ -450,7 +450,11 @@
> >   */
> >               .macro  CLI
> >               mfc0    t0, CP0_STATUS
> > +#ifdef CONFIG_CPU_LOONGSON64
> > +             li      t1, ST0_CU0 | ST0_MM | STATMASK
> > +#else
> >               li      t1, ST0_CU0 | STATMASK
> > +#endif
>
> you are doing this three time in this file. How about doing
>
> #ifdef CONFIG_CPU_LOONGSON64
> #define ST0_MASK        ST0_CU0 | ST0_CU2
> #else
> #define ST0_MASK        ST0_CU0
> #endif
>
> and use ST0_MASK ?
OK, I will define and use ST0_CUMASK here.

>
> > diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
> > index 3b02ffe..cdac82d 100644
> > --- a/arch/mips/kernel/head.S
> > +++ b/arch/mips/kernel/head.S
> > @@ -45,18 +45,34 @@
> >
> >       .macro  setup_c0_status_pri
> >  #ifdef CONFIG_64BIT
> > +#ifdef CONFIG_CPU_LOONGSON64
> > +     setup_c0_status ST0_KX|ST0_MM 0
> > +#else
> >       setup_c0_status ST0_KX 0
> > +#endif
>
> same thing here.
OK, I will define and use ST0_SET here.

>
> > --- a/arch/mips/kernel/r4k_switch.S
> > +++ b/arch/mips/kernel/r4k_switch.S
> > @@ -53,6 +53,9 @@
> >       nor     a3, $0, a3
> >       and     a2, a3
> >       or      a2, t1
> > +#ifdef CONFIG_CPU_LOONGSON64
> > +     or      a2, ST0_MM
> > +#endif
>
> this looks wrong. If THERAD_STATUS is setup correct, you don't need
> to mess with ST0_CU2 here.
See above please, THERAD_STATUS doesn't include ST0_CU2 because it
lose COP2 in user-space.

>
> Thomas.
>
> --
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Maciej W. Rozycki May 1, 2020, 12:44 a.m. UTC | #3
On Thu, 30 Apr 2020, Huacai Chen wrote:

> > > diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> > > index ce40fbf..0f71540 100644
> > > --- a/arch/mips/include/asm/mipsregs.h
> > > +++ b/arch/mips/include/asm/mipsregs.h
> > > @@ -386,6 +386,7 @@
> > >  #define ST0_CU1                      0x20000000
> > >  #define ST0_CU2                      0x40000000
> > >  #define ST0_CU3                      0x80000000
> > > +#define ST0_MM                       0x40000000      /* Loongson-3 naming */
> >
> > please use ST0_CU2, so everybody understands it's COO2
> I see that there is already an alias  ST0_XX for ST0_CU3, and I think
> use a ST0_MM for ST0_CU2 is more meaningful in some places (at least
> in traps.c where ST0_XX is also used). If there are places only used
> to describe the CU Mask (such as in stackframe.h), I will use ST0_CU2.

 Well ST0_XX is not an alias, because the bit has a different meaning that 
has nothing to do with CP3.  It just happens to share the bit position in 
CP0.Status with ST0_CU3.  Yes, ST0_XX is misplaced and misnamed as it 
applies to R10k processors only, but it is our legacy from the old days of 
chaos and some three processor types supported.  This is similar to say 
ST0_ERL vs ST0_IEP, which also share the bit position in CP0.Status, but 
have different meanings each.

 All this could have been cleaned up (e.g. s/ST0_XX/R10K_ST0_XX/) if 
someone had the incentive; I occasionally had and poked at these macros, 
but apparently missed this one and a couple of other ones.  Maybe on some 
rainy autumn evening...

 However ST0_MM does enable CP2, even if a specific implementation, making 
it no different from ST0_CU2 really.

  Maciej
Huacai Chen May 14, 2020, 5:44 a.m. UTC | #4
Hi, Maiej,

On Thu, May 14, 2020 at 6:42 AM Maciej W. Rozycki <macro@linux-mips.org> wrote:
>
> On Thu, 30 Apr 2020, Huacai Chen wrote:
>
> > > > diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
> > > > index ce40fbf..0f71540 100644
> > > > --- a/arch/mips/include/asm/mipsregs.h
> > > > +++ b/arch/mips/include/asm/mipsregs.h
> > > > @@ -386,6 +386,7 @@
> > > >  #define ST0_CU1                      0x20000000
> > > >  #define ST0_CU2                      0x40000000
> > > >  #define ST0_CU3                      0x80000000
> > > > +#define ST0_MM                       0x40000000      /* Loongson-3 naming */
> > >
> > > please use ST0_CU2, so everybody understands it's COO2
> > I see that there is already an alias  ST0_XX for ST0_CU3, and I think
> > use a ST0_MM for ST0_CU2 is more meaningful in some places (at least
> > in traps.c where ST0_XX is also used). If there are places only used
> > to describe the CU Mask (such as in stackframe.h), I will use ST0_CU2.
>
>  Well ST0_XX is not an alias, because the bit has a different meaning that
> has nothing to do with CP3.  It just happens to share the bit position in
> CP0.Status with ST0_CU3.  Yes, ST0_XX is misplaced and misnamed as it
> applies to R10k processors only, but it is our legacy from the old days of
> chaos and some three processor types supported.  This is similar to say
> ST0_ERL vs ST0_IEP, which also share the bit position in CP0.Status, but
> have different meanings each.
>
>  All this could have been cleaned up (e.g. s/ST0_XX/R10K_ST0_XX/) if
> someone had the incentive; I occasionally had and poked at these macros,
> but apparently missed this one and a couple of other ones.  Maybe on some
> rainy autumn evening...
>
>  However ST0_MM does enable CP2, even if a specific implementation, making
> it no different from ST0_CU2 really.
I have send a new version, could you please review that?Thank you.

>
>   Maciej
>
>
Huacai
diff mbox series

Patch

diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
index 409cb48..855ca8c 100644
--- a/arch/mips/boot/compressed/head.S
+++ b/arch/mips/boot/compressed/head.S
@@ -14,11 +14,18 @@ 
 
 #include <asm/asm.h>
 #include <asm/regdef.h>
+#include <asm/mipsregs.h>
 
 	.set noreorder
 	.cprestore
 	LEAF(start)
 start:
+#ifdef CONFIG_CPU_LOONGSON64
+	mfc0    t0, CP0_STATUS
+	or	t0, ST0_MM
+	mtc0    t0, CP0_STATUS
+#endif
+
 	/* Save boot rom start args */
 	move	s0, a0
 	move	s1, a1
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index ce40fbf..0f71540 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -386,6 +386,7 @@ 
 #define ST0_CU1			0x20000000
 #define ST0_CU2			0x40000000
 #define ST0_CU3			0x80000000
+#define ST0_MM			0x40000000	/* Loongson-3 naming */
 #define ST0_XX			0x80000000	/* MIPS IV naming */
 
 /*
diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h
index 4d6ad90..c204850 100644
--- a/arch/mips/include/asm/stackframe.h
+++ b/arch/mips/include/asm/stackframe.h
@@ -450,7 +450,11 @@ 
  */
 		.macro	CLI
 		mfc0	t0, CP0_STATUS
+#ifdef CONFIG_CPU_LOONGSON64
+		li	t1, ST0_CU0 | ST0_MM | STATMASK
+#else
 		li	t1, ST0_CU0 | STATMASK
+#endif
 		or	t0, t1
 		xori	t0, STATMASK
 		mtc0	t0, CP0_STATUS
@@ -463,7 +467,11 @@ 
  */
 		.macro	STI
 		mfc0	t0, CP0_STATUS
+#ifdef CONFIG_CPU_LOONGSON64
+		li	t1, ST0_CU0 | ST0_MM | STATMASK
+#else
 		li	t1, ST0_CU0 | STATMASK
+#endif
 		or	t0, t1
 		xori	t0, STATMASK & ~1
 		mtc0	t0, CP0_STATUS
@@ -477,7 +485,11 @@ 
  */
 		.macro	KMODE
 		mfc0	t0, CP0_STATUS
+#ifdef CONFIG_CPU_LOONGSON64
+		li	t1, ST0_CU0 | ST0_MM | (STATMASK & ~1)
+#else
 		li	t1, ST0_CU0 | (STATMASK & ~1)
+#endif
 #if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
 		andi	t2, t0, ST0_IEP
 		srl	t2, 2
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 3b02ffe..cdac82d 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -45,18 +45,34 @@ 
 
 	.macro	setup_c0_status_pri
 #ifdef CONFIG_64BIT
+#ifdef CONFIG_CPU_LOONGSON64
+	setup_c0_status ST0_KX|ST0_MM 0
+#else
 	setup_c0_status ST0_KX 0
+#endif
+#else
+#ifdef CONFIG_CPU_LOONGSON64
+	setup_c0_status ST0_MM 0
 #else
 	setup_c0_status 0 0
 #endif
+#endif
 	.endm
 
 	.macro	setup_c0_status_sec
 #ifdef CONFIG_64BIT
+#ifdef CONFIG_CPU_LOONGSON64
+	setup_c0_status ST0_KX|ST0_MM ST0_BEV
+#else
 	setup_c0_status ST0_KX ST0_BEV
+#endif
+#else
+#ifdef CONFIG_CPU_LOONGSON64
+	setup_c0_status ST0_MM ST0_BEV
 #else
 	setup_c0_status 0 ST0_BEV
 #endif
+#endif
 	.endm
 
 #ifndef CONFIG_NO_EXCEPT_FILL
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 58232ae..154ae7d 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -53,6 +53,9 @@ 
 	nor	a3, $0, a3
 	and	a2, a3
 	or	a2, t1
+#ifdef CONFIG_CPU_LOONGSON64
+	or	a2, ST0_MM
+#endif
 	mtc0	a2, CP0_STATUS
 	move	v0, a0
 	jr	ra
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 31968cb..1731436 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2114,6 +2114,9 @@  static void configure_status(void)
 #ifdef CONFIG_64BIT
 	status_set |= ST0_FR|ST0_KX|ST0_SX|ST0_UX;
 #endif
+#ifdef CONFIG_CPU_LOONGSON64
+	status_set |= ST0_MM;
+#endif
 	if (current_cpu_data.isa_level & MIPS_CPU_ISA_IV)
 		status_set |= ST0_XX;
 	if (cpu_has_dsp)