Message ID | 20221104172737.391978-2-ajd@linux.ibm.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | VMAP_STACK support for book3s64 | expand |
Le 04/11/2022 à 18:27, Andrew Donnellan a écrit : > When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at least the > size of a page. > > There's a few bits of assembly in the book3s64 code that use THREAD_SIZE in > immediate mode instructions, which can only take an operand of up to 16 > bits signed, which isn't quite large enough. > > Fix these spots to use a scratch register or use two immediate mode > instructions instead, so we can later enable VMAP_STACK. > > Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com> > --- > arch/powerpc/include/asm/asm-compat.h | 2 ++ > arch/powerpc/kernel/entry_64.S | 4 +++- > arch/powerpc/kernel/irq.c | 8 ++++++-- > arch/powerpc/kernel/misc_64.S | 4 +++- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++- > 5 files changed, 16 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h > index 2bc53c646ccd..30dd7813bf3b 100644 > --- a/arch/powerpc/include/asm/asm-compat.h > +++ b/arch/powerpc/include/asm/asm-compat.h > @@ -11,6 +11,7 @@ > #define PPC_LL stringify_in_c(ld) > #define PPC_STL stringify_in_c(std) > #define PPC_STLU stringify_in_c(stdu) > +#define PPC_STLUX stringify_in_c(stdux) > #define PPC_LCMPI stringify_in_c(cmpdi) > #define PPC_LCMPLI stringify_in_c(cmpldi) > #define PPC_LCMP stringify_in_c(cmpd) > @@ -45,6 +46,7 @@ > #define PPC_LL stringify_in_c(lwz) > #define PPC_STL stringify_in_c(stw) > #define PPC_STLU stringify_in_c(stwu) > +#define PPC_STLUX stringify_in_c(stwux) > #define PPC_LCMPI stringify_in_c(cmpwi) > #define PPC_LCMPLI stringify_in_c(cmplwi) > #define PPC_LCMP stringify_in_c(cmpw) > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index 3e2e37e6ecab..af25db6e0205 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -238,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) > /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE > because we don't need to leave the 288-byte ABI gap at the > top of the kernel stack. */ > - addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE > + li r9,0 > + ori r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE > + add r7,r7,r9 So you assume THREAD_SIZE is never more than 64k ? Is that a valid assumption ? What about the below instead: addis r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@ha addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@l > > /* > * PMU interrupts in radix may come in here. They will use r1, not > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > index 9ede61a5a469..098cf6adceec 100644 > --- a/arch/powerpc/kernel/irq.c > +++ b/arch/powerpc/kernel/irq.c > @@ -204,7 +204,9 @@ static __always_inline void call_do_softirq(const void *sp) > { > /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ > asm volatile ( > - PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "li %%r0, 0 ;" > + "ori %%r0, %%r0, %[offset] ;" Same, you assume offset to be max 64k, is that correct ? What about lis r0, offset@h ori r0, r0, offset@l > + PPC_STLUX " %%r1, %[sp], %%r0 ;" > "mr %%r1, %[sp] ;" > "bl %[callee] ;" > PPC_LL " %%r1, 0(%%r1) ;" > @@ -256,7 +258,9 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) > > /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ > asm volatile ( > - PPC_STLU " %%r1, %[offset](%[sp]) ;" > + "li %%r0, 0 ;" > + "ori %%r0, %%r0, %[offset] ;" > + PPC_STLUX " %%r1, %[sp], %%r0 ;" Same > "mr %%r4, %%r1 ;" > "mr %%r1, %[sp] ;" > "bl %[callee] ;" > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > index 36184cada00b..ff71b98500a3 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -384,7 +384,9 @@ _GLOBAL(kexec_sequence) > std r0,16(r1) > > /* switch stacks to newstack -- &kexec_stack.stack */ > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > + li r0,0 > + ori r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD > + stdux r1,r3,r0 Same > mr r1,r3 > > li r0,0 > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 37f50861dd98..d05e3d324f4d 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -2686,7 +2686,8 @@ kvmppc_bad_host_intr: > mr r9, r1 > std r1, PACAR1(r13) > ld r1, PACAEMERGSP(r13) > - subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE > + subi r1, r1, THREAD_SIZE/2 > + subi r1, r1, INT_FRAME_SIZE Same, what about subis r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@ha subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@l > std r9, 0(r1) > std r0, GPR0(r1) > std r9, GPR1(r1)
On Fri, 2022-11-04 at 17:51 +0000, Christophe Leroy wrote: > > > Le 04/11/2022 à 18:27, Andrew Donnellan a écrit : > > When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at > > least the > > size of a page. > > > > There's a few bits of assembly in the book3s64 code that use > > THREAD_SIZE in > > immediate mode instructions, which can only take an operand of up > > to 16 > > bits signed, which isn't quite large enough. > > > > Fix these spots to use a scratch register or use two immediate mode > > instructions instead, so we can later enable VMAP_STACK. > > > > Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com> > > --- > > arch/powerpc/include/asm/asm-compat.h | 2 ++ > > arch/powerpc/kernel/entry_64.S | 4 +++- > > arch/powerpc/kernel/irq.c | 8 ++++++-- > > arch/powerpc/kernel/misc_64.S | 4 +++- > > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++- > > 5 files changed, 16 insertions(+), 5 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/asm-compat.h > > b/arch/powerpc/include/asm/asm-compat.h > > index 2bc53c646ccd..30dd7813bf3b 100644 > > --- a/arch/powerpc/include/asm/asm-compat.h > > +++ b/arch/powerpc/include/asm/asm-compat.h > > @@ -11,6 +11,7 @@ > > #define PPC_LL stringify_in_c(ld) > > #define PPC_STL stringify_in_c(std) > > #define PPC_STLU stringify_in_c(stdu) > > +#define PPC_STLUX stringify_in_c(stdux) > > #define PPC_LCMPI stringify_in_c(cmpdi) > > #define PPC_LCMPLI stringify_in_c(cmpldi) > > #define PPC_LCMP stringify_in_c(cmpd) > > @@ -45,6 +46,7 @@ > > #define PPC_LL stringify_in_c(lwz) > > #define PPC_STL stringify_in_c(stw) > > #define PPC_STLU stringify_in_c(stwu) > > +#define PPC_STLUX stringify_in_c(stwux) > > #define PPC_LCMPI stringify_in_c(cmpwi) > > #define PPC_LCMPLI stringify_in_c(cmplwi) > > #define PPC_LCMP stringify_in_c(cmpw) > > diff --git a/arch/powerpc/kernel/entry_64.S > > b/arch/powerpc/kernel/entry_64.S > > index 3e2e37e6ecab..af25db6e0205 100644 > > --- a/arch/powerpc/kernel/entry_64.S > > +++ b/arch/powerpc/kernel/entry_64.S > > @@ -238,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) > > /* Note: this uses SWITCH_FRAME_SIZE rather than > > INT_FRAME_SIZE > > because we don't need to leave the 288-byte ABI gap at > > the > > top of the kernel stack. */ > > - addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE > > + li r9,0 > > + ori r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE > > + add r7,r7,r9 > > So you assume THREAD_SIZE is never more than 64k ? Is that a valid > assumption ? It looks like PPC_PAGE_SHIFT can be up to 18, which would make THREAD_SIZE 256K, but that's only if you have 256K pages, which is a 44x specific feature. Otherwise AFAICT you can't get THREAD_SHIFT larger than 16 and therefore THREAD_SIZE <= 64K. > > What about the below instead: > > addis r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@ha > addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE@l That looks better anyway, thanks. > > > > > /* > > * PMU interrupts in radix may come in here. They will use > > r1, not > > diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c > > index 9ede61a5a469..098cf6adceec 100644 > > --- a/arch/powerpc/kernel/irq.c > > +++ b/arch/powerpc/kernel/irq.c > > @@ -204,7 +204,9 @@ static __always_inline void > > call_do_softirq(const void *sp) > > { > > /* Temporarily switch r1 to sp, call __do_softirq() then > > restore r1. */ > > asm volatile ( > > - PPC_STLU " %%r1, %[offset](%[sp]) ;" > > + "li %%r0, 0 ;" > > + "ori %%r0, %%r0, %[offset] ;" > > Same, you assume offset to be max 64k, is that correct ? > > What about > lis r0, offset@h > ori r0, r0, offset@l > > > + PPC_STLUX " %%r1, %[sp], %%r0 ;" > > "mr %%r1, %[sp] ;" > > "bl %[callee] ;" > > PPC_LL " %%r1, 0(%%r1) ;" > > @@ -256,7 +258,9 @@ static __always_inline void call_do_irq(struct > > pt_regs *regs, void *sp) > > > > /* Temporarily switch r1 to sp, call __do_irq() then > > restore r1. */ > > asm volatile ( > > - PPC_STLU " %%r1, %[offset](%[sp]) ;" > > + "li %%r0, 0 ;" > > + "ori %%r0, %%r0, %[offset] ;" > > + PPC_STLUX " %%r1, %[sp], %%r0 ;" > > Same > > > "mr %%r4, %%r1 ;" > > "mr %%r1, %[sp] ;" > > "bl %[callee] ;" > > diff --git a/arch/powerpc/kernel/misc_64.S > > b/arch/powerpc/kernel/misc_64.S > > index 36184cada00b..ff71b98500a3 100644 > > --- a/arch/powerpc/kernel/misc_64.S > > +++ b/arch/powerpc/kernel/misc_64.S > > @@ -384,7 +384,9 @@ _GLOBAL(kexec_sequence) > > std r0,16(r1) > > > > /* switch stacks to newstack -- &kexec_stack.stack */ > > - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) > > + li r0,0 > > + ori r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD > > + stdux r1,r3,r0 > > Same > > > mr r1,r3 > > > > li r0,0 > > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > index 37f50861dd98..d05e3d324f4d 100644 > > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > > @@ -2686,7 +2686,8 @@ kvmppc_bad_host_intr: > > mr r9, r1 > > std r1, PACAR1(r13) > > ld r1, PACAEMERGSP(r13) > > - subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE > > + subi r1, r1, THREAD_SIZE/2 > > + subi r1, r1, INT_FRAME_SIZE > > Same, what about > > subis r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@ha > subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE@l > > > std r9, 0(r1) > > std r0, GPR0(r1) > > std r9, GPR1(r1)
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 2bc53c646ccd..30dd7813bf3b 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -11,6 +11,7 @@ #define PPC_LL stringify_in_c(ld) #define PPC_STL stringify_in_c(std) #define PPC_STLU stringify_in_c(stdu) +#define PPC_STLUX stringify_in_c(stdux) #define PPC_LCMPI stringify_in_c(cmpdi) #define PPC_LCMPLI stringify_in_c(cmpldi) #define PPC_LCMP stringify_in_c(cmpd) @@ -45,6 +46,7 @@ #define PPC_LL stringify_in_c(lwz) #define PPC_STL stringify_in_c(stw) #define PPC_STLU stringify_in_c(stwu) +#define PPC_STLUX stringify_in_c(stwux) #define PPC_LCMPI stringify_in_c(cmpwi) #define PPC_LCMPLI stringify_in_c(cmplwi) #define PPC_LCMP stringify_in_c(cmpw) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3e2e37e6ecab..af25db6e0205 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -238,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE because we don't need to leave the 288-byte ABI gap at the top of the kernel stack. */ - addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + li r9,0 + ori r9,r9,THREAD_SIZE-SWITCH_FRAME_SIZE + add r7,r7,r9 /* * PMU interrupts in radix may come in here. They will use r1, not diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9ede61a5a469..098cf6adceec 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -204,7 +204,9 @@ static __always_inline void call_do_softirq(const void *sp) { /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" + "li %%r0, 0 ;" + "ori %%r0, %%r0, %[offset] ;" + PPC_STLUX " %%r1, %[sp], %%r0 ;" "mr %%r1, %[sp] ;" "bl %[callee] ;" PPC_LL " %%r1, 0(%%r1) ;" @@ -256,7 +258,9 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" + "li %%r0, 0 ;" + "ori %%r0, %%r0, %[offset] ;" + PPC_STLUX " %%r1, %[sp], %%r0 ;" "mr %%r4, %%r1 ;" "mr %%r1, %[sp] ;" "bl %[callee] ;" diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 36184cada00b..ff71b98500a3 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -384,7 +384,9 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + li r0,0 + ori r0,r0,THREAD_SIZE-STACK_FRAME_OVERHEAD + stdux r1,r3,r0 mr r1,r3 li r0,0 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 37f50861dd98..d05e3d324f4d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2686,7 +2686,8 @@ kvmppc_bad_host_intr: mr r9, r1 std r1, PACAR1(r13) ld r1, PACAEMERGSP(r13) - subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE + subi r1, r1, THREAD_SIZE/2 + subi r1, r1, INT_FRAME_SIZE std r9, 0(r1) std r0, GPR0(r1) std r9, GPR1(r1)
When CONFIG_VMAP_STACK is enabled, we set THREAD_SIZE to be at least the size of a page. There's a few bits of assembly in the book3s64 code that use THREAD_SIZE in immediate mode instructions, which can only take an operand of up to 16 bits signed, which isn't quite large enough. Fix these spots to use a scratch register or use two immediate mode instructions instead, so we can later enable VMAP_STACK. Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com> --- arch/powerpc/include/asm/asm-compat.h | 2 ++ arch/powerpc/kernel/entry_64.S | 4 +++- arch/powerpc/kernel/irq.c | 8 ++++++-- arch/powerpc/kernel/misc_64.S | 4 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++- 5 files changed, 16 insertions(+), 5 deletions(-)