diff mbox series

[RFC,1/6] powerpc/64s: Fix assembly to support larger values of THREAD_SIZE

Message ID 20221104172737.391978-2-ajd@linux.ibm.com (mailing list archive)
State Handled Elsewhere
Headers show
Series VMAP_STACK support for book3s64 | expand

Commit Message

Andrew Donnellan Nov. 4, 2022, 5:27 p.m. UTC
When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at least the
size of a page.

There's a few bits of assembly in the book3s64 code that use THREAD_SIZE in
immediate mode instructions, which can only take an operand of up to 16
bits signed, which isn't quite large enough.

Fix these spots to use a scratch register or use two immediate mode
instructions instead, so we can later enable VMAP_STACK.

Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
---
 arch/powerpc/include/asm/asm-compat.h   | 2 ++
 arch/powerpc/kernel/entry_64.S          | 4 +++-
 arch/powerpc/kernel/irq.c               | 8 ++++++--
 arch/powerpc/kernel/misc_64.S           | 4 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++-
 5 files changed, 16 insertions(+), 5 deletions(-)

Comments

Christophe Leroy Nov. 4, 2022, 5:51 p.m. UTC | #1
Le 04/11/2022 à 18:27, Andrew Donnellan a écrit :
> When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at least the
> size of a page.
> 
> There's a few bits of assembly in the book3s64 code that use THREAD_SIZE in
> immediate mode instructions, which can only take an operand of up to 16
> bits signed, which isn't quite large enough.
> 
> Fix these spots to use a scratch register or use two immediate mode
> instructions instead, so we can later enable VMAP_STACK.
> 
> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/asm-compat.h   | 2 ++
>   arch/powerpc/kernel/entry_64.S          | 4 +++-
>   arch/powerpc/kernel/irq.c               | 8 ++++++--
>   arch/powerpc/kernel/misc_64.S           | 4 +++-
>   arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++-
>   5 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
> index 2bc53c646ccd..30dd7813bf3b 100644
> --- a/arch/powerpc/include/asm/asm-compat.h
> +++ b/arch/powerpc/include/asm/asm-compat.h
> @@ -11,6 +11,7 @@
>   #define PPC_LL		stringify_in_c(ld)
>   #define PPC_STL		stringify_in_c(std)
>   #define PPC_STLU	stringify_in_c(stdu)
> +#define PPC_STLUX	stringify_in_c(stdux)
>   #define PPC_LCMPI	stringify_in_c(cmpdi)
>   #define PPC_LCMPLI	stringify_in_c(cmpldi)
>   #define PPC_LCMP	stringify_in_c(cmpd)
> @@ -45,6 +46,7 @@
>   #define PPC_LL		stringify_in_c(lwz)
>   #define PPC_STL		stringify_in_c(stw)
>   #define PPC_STLU	stringify_in_c(stwu)
> +#define PPC_STLUX	stringify_in_c(stwux)
>   #define PPC_LCMPI	stringify_in_c(cmpwi)
>   #define PPC_LCMPLI	stringify_in_c(cmplwi)
>   #define PPC_LCMP	stringify_in_c(cmpw)
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 3e2e37e6ecab..af25db6e0205 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -238,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>   	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
>   	   because we don't need to leave the 288-byte ABI gap at the
>   	   top of the kernel stack. */
> -	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
> +	li	r9,0
> +	ori	r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE
> +	add	r7,r7,r9

So you assume THREAD_SIZE is never more than 64k ? Is that a valid 
assumption ?

What about the below instead:

	addis	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@ha
	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@l

>   
>   	/*
>   	 * PMU interrupts in radix may come in here. They will use r1, not
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 9ede61a5a469..098cf6adceec 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -204,7 +204,9 @@ static __always_inline void call_do_softirq(const void *sp)
>   {
>   	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
>   	asm volatile (
> -		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
> +		"li		%%r0, 0			;"
> +		"ori		%%r0, %%r0, %[offset]	;"

Same, you assume offset to be max 64k, is that correct ?

What about
		lis		r0, offset@h
		ori		r0, r0, offset@l

> +		 PPC_STLUX "	%%r1, %[sp], %%r0	;"
>   		"mr		%%r1, %[sp]		;"
>   		"bl		%[callee]		;"
>   		 PPC_LL "	%%r1, 0(%%r1)		;"
> @@ -256,7 +258,9 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
>   
>   	/* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
>   	asm volatile (
> -		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
> +		"li		%%r0, 0			;"
> +		"ori		%%r0, %%r0, %[offset]	;"
> +		 PPC_STLUX "	%%r1, %[sp], %%r0	;"

Same

>   		"mr		%%r4, %%r1		;"
>   		"mr		%%r1, %[sp]		;"
>   		"bl		%[callee]		;"
> diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
> index 36184cada00b..ff71b98500a3 100644
> --- a/arch/powerpc/kernel/misc_64.S
> +++ b/arch/powerpc/kernel/misc_64.S
> @@ -384,7 +384,9 @@ _GLOBAL(kexec_sequence)
>   	std	r0,16(r1)
>   
>   	/* switch stacks to newstack -- &kexec_stack.stack */
> -	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
> +	li	r0,0
> +	ori	r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD
> +	stdux	r1,r3,r0

Same

>   	mr	r1,r3
>   
>   	li	r0,0
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 37f50861dd98..d05e3d324f4d 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -2686,7 +2686,8 @@ kvmppc_bad_host_intr:
>   	mr	r9, r1
>   	std	r1, PACAR1(r13)
>   	ld	r1, PACAEMERGSP(r13)
> -	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
> +	subi	r1, r1, THREAD_SIZE/2
> +	subi	r1, r1, INT_FRAME_SIZE

Same, what about

	subis	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@ha
	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@l

>   	std	r9, 0(r1)
>   	std	r0, GPR0(r1)
>   	std	r9, GPR1(r1)
Andrew Donnellan April 26, 2023, 7:03 a.m. UTC | #2
On Fri, 2022-11-04 at 17:51 +0000, Christophe Leroy wrote:
> 
> 
> Le 04/11/2022 à 18:27, Andrew Donnellan a écrit :
> > When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at
> > least the
> > size of a page.
> > 
> > There's a few bits of assembly in the book3s64 code that use
> > THREAD_SIZE in
> > immediate mode instructions, which can only take an operand of up
> > to 16
> > bits signed, which isn't quite large enough.
> > 
> > Fix these spots to use a scratch register or use two immediate mode
> > instructions instead, so we can later enable VMAP_STACK.
> > 
> > Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
> > ---
> >   arch/powerpc/include/asm/asm-compat.h   | 2 ++
> >   arch/powerpc/kernel/entry_64.S          | 4 +++-
> >   arch/powerpc/kernel/irq.c               | 8 ++++++--
> >   arch/powerpc/kernel/misc_64.S           | 4 +++-
> >   arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++-
> >   5 files changed, 16 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/asm-compat.h
> > b/arch/powerpc/include/asm/asm-compat.h
> > index 2bc53c646ccd..30dd7813bf3b 100644
> > --- a/arch/powerpc/include/asm/asm-compat.h
> > +++ b/arch/powerpc/include/asm/asm-compat.h
> > @@ -11,6 +11,7 @@
> >   #define PPC_LL                stringify_in_c(ld)
> >   #define PPC_STL               stringify_in_c(std)
> >   #define PPC_STLU      stringify_in_c(stdu)
> > +#define PPC_STLUX      stringify_in_c(stdux)
> >   #define PPC_LCMPI     stringify_in_c(cmpdi)
> >   #define PPC_LCMPLI    stringify_in_c(cmpldi)
> >   #define PPC_LCMP      stringify_in_c(cmpd)
> > @@ -45,6 +46,7 @@
> >   #define PPC_LL                stringify_in_c(lwz)
> >   #define PPC_STL               stringify_in_c(stw)
> >   #define PPC_STLU      stringify_in_c(stwu)
> > +#define PPC_STLUX      stringify_in_c(stwux)
> >   #define PPC_LCMPI     stringify_in_c(cmpwi)
> >   #define PPC_LCMPLI    stringify_in_c(cmplwi)
> >   #define PPC_LCMP      stringify_in_c(cmpw)
> > diff --git a/arch/powerpc/kernel/entry_64.S
> > b/arch/powerpc/kernel/entry_64.S
> > index 3e2e37e6ecab..af25db6e0205 100644
> > --- a/arch/powerpc/kernel/entry_64.S
> > +++ b/arch/powerpc/kernel/entry_64.S
> > @@ -238,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
> >         /* Note: this uses SWITCH_FRAME_SIZE rather than
> > INT_FRAME_SIZE
> >            because we don't need to leave the 288-byte ABI gap at
> > the
> >            top of the kernel stack. */
> > -       addi    r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
> > +       li      r9,0
> > +       ori     r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE
> > +       add     r7,r7,r9
> 
> So you assume THREAD_SIZE is never more than 64k ? Is that a valid 
> assumption ?

It looks like PPC_PAGE_SHIFT can be up to 18, which would make
THREAD_SIZE 256K, but that's only if you have 256K pages, which is a
44x specific feature. Otherwise AFAICT you can't get THREAD_SHIFT
larger than 16 and therefore THREAD_SIZE <= 64K.


> 
> What about the below instead:
> 
>         addis   r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@ha
>         addi    r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@l

That looks better anyway, thanks.

> 
> >   
> >         /*
> >          * PMU interrupts in radix may come in here. They will use
> > r1, not
> > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> > index 9ede61a5a469..098cf6adceec 100644
> > --- a/arch/powerpc/kernel/irq.c
> > +++ b/arch/powerpc/kernel/irq.c
> > @@ -204,7 +204,9 @@ static __always_inline void
> > call_do_softirq(const void *sp)
> >   {
> >         /* Temporarily switch r1 to sp, call __do_softirq() then
> > restore r1. */
> >         asm volatile (
> > -                PPC_STLU "     %%r1, %[offset](%[sp])  ;"
> > +               "li             %%r0, 0                 ;"
> > +               "ori            %%r0, %%r0, %[offset]   ;"
> 
> Same, you assume offset to be max 64k, is that correct ?
> 
> What about
>                 lis             r0, offset@h
>                 ori             r0, r0, offset@l
> 
> > +                PPC_STLUX "    %%r1, %[sp], %%r0       ;"
> >                 "mr             %%r1, %[sp]             ;"
> >                 "bl             %[callee]               ;"
> >                  PPC_LL "       %%r1, 0(%%r1)           ;"
> > @@ -256,7 +258,9 @@ static __always_inline void call_do_irq(struct
> > pt_regs *regs, void *sp)
> >   
> >         /* Temporarily switch r1 to sp, call __do_irq() then
> > restore r1. */
> >         asm volatile (
> > -                PPC_STLU "     %%r1, %[offset](%[sp])  ;"
> > +               "li             %%r0, 0                 ;"
> > +               "ori            %%r0, %%r0, %[offset]   ;"
> > +                PPC_STLUX "    %%r1, %[sp], %%r0       ;"
> 
> Same
> 
> >                 "mr             %%r4, %%r1              ;"
> >                 "mr             %%r1, %[sp]             ;"
> >                 "bl             %[callee]               ;"
> > diff --git a/arch/powerpc/kernel/misc_64.S
> > b/arch/powerpc/kernel/misc_64.S
> > index 36184cada00b..ff71b98500a3 100644
> > --- a/arch/powerpc/kernel/misc_64.S
> > +++ b/arch/powerpc/kernel/misc_64.S
> > @@ -384,7 +384,9 @@ _GLOBAL(kexec_sequence)
> >         std     r0,16(r1)
> >   
> >         /* switch stacks to newstack -- &kexec_stack.stack */
> > -       stdu    r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
> > +       li      r0,0
> > +       ori     r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD
> > +       stdux   r1,r3,r0
> 
> Same
> 
> >         mr      r1,r3
> >   
> >         li      r0,0
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index 37f50861dd98..d05e3d324f4d 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -2686,7 +2686,8 @@ kvmppc_bad_host_intr:
> >         mr      r9, r1
> >         std     r1, PACAR1(r13)
> >         ld      r1, PACAEMERGSP(r13)
> > -       subi    r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
> > +       subi    r1, r1, THREAD_SIZE/2
> > +       subi    r1, r1, INT_FRAME_SIZE
> 
> Same, what about
> 
>         subis   r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@ha
>         subi    r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@l
> 
> >         std     r9, 0(r1)
> >         std     r0, GPR0(r1)
> >         std     r9, GPR1(r1)
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 2bc53c646ccd..30dd7813bf3b 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -11,6 +11,7 @@ 
 #define PPC_LL		stringify_in_c(ld)
 #define PPC_STL		stringify_in_c(std)
 #define PPC_STLU	stringify_in_c(stdu)
+#define PPC_STLUX	stringify_in_c(stdux)
 #define PPC_LCMPI	stringify_in_c(cmpdi)
 #define PPC_LCMPLI	stringify_in_c(cmpldi)
 #define PPC_LCMP	stringify_in_c(cmpd)
@@ -45,6 +46,7 @@ 
 #define PPC_LL		stringify_in_c(lwz)
 #define PPC_STL		stringify_in_c(stw)
 #define PPC_STLU	stringify_in_c(stwu)
+#define PPC_STLUX	stringify_in_c(stwux)
 #define PPC_LCMPI	stringify_in_c(cmpwi)
 #define PPC_LCMPLI	stringify_in_c(cmplwi)
 #define PPC_LCMP	stringify_in_c(cmpw)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3e2e37e6ecab..af25db6e0205 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -238,7 +238,9 @@  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
 	   because we don't need to leave the 288-byte ABI gap at the
 	   top of the kernel stack. */
-	addi	r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+	li	r9,0
+	ori	r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE
+	add	r7,r7,r9
 
 	/*
 	 * PMU interrupts in radix may come in here. They will use r1, not
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9ede61a5a469..098cf6adceec 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -204,7 +204,9 @@  static __always_inline void call_do_softirq(const void *sp)
 {
 	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
 	asm volatile (
-		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"li		%%r0, 0			;"
+		"ori		%%r0, %%r0, %[offset]	;"
+		 PPC_STLUX "	%%r1, %[sp], %%r0	;"
 		"mr		%%r1, %[sp]		;"
 		"bl		%[callee]		;"
 		 PPC_LL "	%%r1, 0(%%r1)		;"
@@ -256,7 +258,9 @@  static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
 
 	/* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
 	asm volatile (
-		 PPC_STLU "	%%r1, %[offset](%[sp])	;"
+		"li		%%r0, 0			;"
+		"ori		%%r0, %%r0, %[offset]	;"
+		 PPC_STLUX "	%%r1, %[sp], %%r0	;"
 		"mr		%%r4, %%r1		;"
 		"mr		%%r1, %[sp]		;"
 		"bl		%[callee]		;"
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 36184cada00b..ff71b98500a3 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -384,7 +384,9 @@  _GLOBAL(kexec_sequence)
 	std	r0,16(r1)
 
 	/* switch stacks to newstack -- &kexec_stack.stack */
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+	li	r0,0
+	ori	r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	stdux	r1,r3,r0
 	mr	r1,r3
 
 	li	r0,0
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 37f50861dd98..d05e3d324f4d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2686,7 +2686,8 @@  kvmppc_bad_host_intr:
 	mr	r9, r1
 	std	r1, PACAR1(r13)
 	ld	r1, PACAEMERGSP(r13)
-	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
+	subi	r1, r1, THREAD_SIZE/2
+	subi	r1, r1, INT_FRAME_SIZE
 	std	r9, 0(r1)
 	std	r0, GPR0(r1)
 	std	r9, GPR1(r1)