Message ID | 1537970184-44348-4-git-send-email-julien.thierry@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Ensure stack is aligned for kernel entries | expand |
On Wed, Sep 26, 2018 at 02:56:20PM +0100, Julien Thierry wrote: > Arm64 SP needs to be aligned to 16 bytes before being used as base address > for loads and stores. When taking some valid exceptions from EL1 (e.g. irq, > dbg, data abort), there is no guarantee that SP_EL1 was aligned when > taking the exception. > > Pad the stack on EL1 entries when misaligned. > > Signed-off-by: Julien Thierry <julien.thierry@arm.com> > --- > arch/arm64/include/asm/assembler.h | 9 +++++++++ > arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++++++++++++-- > 2 files changed, 39 insertions(+), 2 deletions(-) > > diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h > index 0bcc98d..a0a5415 100644 > --- a/arch/arm64/include/asm/assembler.h > +++ b/arch/arm64/include/asm/assembler.h > @@ -701,4 +701,13 @@ > .Lyield_out_\@ : > .endm > > +/* > + * Echange content of register xt with sp. Exchange contents of register Xt with SP > + */ > + .macro xchg_sp xt > + add sp, sp, \xt // sp' = sp + xt > + sub \xt, sp, \xt // xt' = sp' - xt = sp + xt - xt = sp > + sub sp, sp, \xt // sp'' = sp' - xt' = sp + xt - sp = xt > + .endm Having said that, this is clearly inspired by the code in kernel_ventry for dealing with stack overflow, yet the macro you've got here cannot be reused there. Maybe we're better off only macro-ising: .macro sp_fetch_add xt add sp, sp, \xt sub \xt, sp, \xt .endm and then letting the callers handle the rest, or build an swizzle_sp macro on top of it? > + > #endif /* __ASM_ASSEMBLER_H */ > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index fc5842b..8fb66e4 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -59,6 +59,19 @@ > .endr > .endm > > + .macro force_stack_align > + xchg_sp x0 > + str x1, [x0] // store x1 far away from S_SP So this temporarily trashes pt_regs.regs[0] if the stack is already aligned, right? I think that's fine, but we should comment /that/ rather than vaguely saying it's far away from S_SP. In fact, having a block comment above this macro saying exactly what is clobbered would generally be a good idea (esp as you nobble the flags in your next patch). > + > + // aligned_sp[S_SP] = old_sp > + bic x1, x0, #0xf // align down to 16-byte > + str x0, [x1, #S_SP] > + > + ldr x1, [x0] > + bic x0, x0, #0xf // x0 = aligned_sp > + xchg_sp x0 > + .endm > + > /* > * Bad Abort numbers > *----------------- > @@ -158,6 +171,10 @@ alternative_cb_end > .if \regsize == 32 > mov w0, w0 // zero upper 32 bits of x0 > .endif > + .if \el != 0 > + force_stack_align > + .endif > + > stp x0, x1, [sp, #16 * 0] > stp x2, x3, [sp, #16 * 1] > stp x4, x5, [sp, #16 * 2] > @@ -184,7 +201,8 @@ alternative_cb_end > apply_ssbd 1, x22, x23 > > .else > - add x21, sp, #S_FRAME_SIZE > + ldr x21, [sp, #S_SP] > + add x21, x21, #S_FRAME_SIZE // adjust stored sp Can we not just store the corrected SP in force_stack_align, rather than store something bogus and then have to add the frame size back here? Will
On 07/11/18 21:59, Will Deacon wrote: > On Wed, Sep 26, 2018 at 02:56:20PM +0100, Julien Thierry wrote: >> Arm64 SP needs to be aligned to 16 bytes before being used as base address >> for loads and stores. When taking some valid exceptions from EL1 (e.g. irq, >> dbg, data abort), there is no guarantee that SP_EL1 was aligned when >> taking the exception. >> >> Pad the stack on EL1 entries when misaligned. >> >> Signed-off-by: Julien Thierry <julien.thierry@arm.com> >> --- >> arch/arm64/include/asm/assembler.h | 9 +++++++++ >> arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++++++++++++-- >> 2 files changed, 39 insertions(+), 2 deletions(-) >> >> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h >> index 0bcc98d..a0a5415 100644 >> --- a/arch/arm64/include/asm/assembler.h >> +++ b/arch/arm64/include/asm/assembler.h >> @@ -701,4 +701,13 @@ >> .Lyield_out_\@ : >> .endm >> >> +/* >> + * Echange content of register xt with sp. > > Exchange contents of register Xt with SP > >> + */ >> + .macro xchg_sp xt >> + add sp, sp, \xt // sp' = sp + xt >> + sub \xt, sp, \xt // xt' = sp' - xt = sp + xt - xt = sp >> + sub sp, sp, \xt // sp'' = sp' - xt' = sp + xt - sp = xt >> + .endm > > Having said that, this is clearly inspired by the code in kernel_ventry for > dealing with stack overflow, yet the macro you've got here cannot be reused > there. > > Maybe we're better off only macro-ising: > > .macro sp_fetch_add xt > add sp, sp, \xt > sub \xt, sp, \xt > .endm > > and then letting the callers handle the rest, or build an swizzle_sp macro > on top of it? > Hmm, I'll try to do that and see how it looks. >> + >> #endif /* __ASM_ASSEMBLER_H */ >> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S >> index fc5842b..8fb66e4 100644 >> --- a/arch/arm64/kernel/entry.S >> +++ b/arch/arm64/kernel/entry.S >> @@ -59,6 +59,19 @@ >> .endr >> .endm >> >> + .macro force_stack_align >> + xchg_sp x0 >> + str x1, [x0] // store x1 far away from S_SP > > So this temporarily trashes pt_regs.regs[0] if the stack is already aligned, > right? I think that's fine, but we should comment /that/ rather than vaguely > saying it's far away from S_SP. In fact, having a block comment above this > macro saying exactly what is clobbered would generally be a good idea (esp > as you nobble the flags in your next patch). > Is it a big deal to trash pt_regs.regs[0] if the stack is already aligned? This is a freshly allocated stack frame, and we expect pt_regs.regs[0] to be overwritten afterwards. I don't mind adding a comment about that, I'm just wondering about the relevance of it. However, I do agree that I should add a comment about what this does with pt_regs.sp since the code using that macro needs to be aware of it. >> + >> + // aligned_sp[S_SP] = old_sp >> + bic x1, x0, #0xf // align down to 16-byte >> + str x0, [x1, #S_SP] >> + >> + ldr x1, [x0] >> + bic x0, x0, #0xf // x0 = aligned_sp >> + xchg_sp x0 >> + .endm >> + >> /* >> * Bad Abort numbers >> *----------------- >> @@ -158,6 +171,10 @@ alternative_cb_end >> .if \regsize == 32 >> mov w0, w0 // zero upper 32 bits of x0 >> .endif >> + .if \el != 0 >> + force_stack_align >> + .endif >> + >> stp x0, x1, [sp, #16 * 0] >> stp x2, x3, [sp, #16 * 1] >> stp x4, x5, [sp, #16 * 2] >> @@ -184,7 +201,8 @@ alternative_cb_end >> apply_ssbd 1, x22, x23 >> >> .else >> - add x21, sp, #S_FRAME_SIZE >> + ldr x21, [sp, #S_SP] >> + add x21, x21, #S_FRAME_SIZE // adjust stored sp > > Can we not just store the corrected SP in force_stack_align, rather than > store something bogus and then have to add the frame size back here? We can, it would add 2 instructions to the alignment macro. I think I mainly tried to keep the alignment stuff short in case we want to add the fast path from the next patch, but if we're keeping it anyway we might as well add the code to save the correct SP. Thanks,
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bcc98d..a0a5415 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -701,4 +701,13 @@ .Lyield_out_\@ : .endm +/* + * Echange content of register xt with sp. + */ + .macro xchg_sp xt + add sp, sp, \xt // sp' = sp + xt + sub \xt, sp, \xt // xt' = sp' - xt = sp + xt - xt = sp + sub sp, sp, \xt // sp'' = sp' - xt' = sp + xt - sp = xt + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index fc5842b..8fb66e4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -59,6 +59,19 @@ .endr .endm + .macro force_stack_align + xchg_sp x0 + str x1, [x0] // store x1 far away from S_SP + + // aligned_sp[S_SP] = old_sp + bic x1, x0, #0xf // align down to 16-byte + str x0, [x1, #S_SP] + + ldr x1, [x0] + bic x0, x0, #0xf // x0 = aligned_sp + xchg_sp x0 + .endm + /* * Bad Abort numbers *----------------- @@ -158,6 +171,10 @@ alternative_cb_end .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif + .if \el != 0 + force_stack_align + .endif + stp x0, x1, [sp, #16 * 0] stp x2, x3, [sp, #16 * 1] stp x4, x5, [sp, #16 * 2] @@ -184,7 +201,8 @@ alternative_cb_end apply_ssbd 1, x22, x23 .else - add x21, sp, #S_FRAME_SIZE + ldr x21, [sp, #S_SP] + add x21, x21, #S_FRAME_SIZE // adjust stored sp get_thread_info tsk /* Save the task's original addr_limit and set USER_DS */ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] @@ -327,7 +345,6 @@ alternative_else_nop_endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 - ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] ldp x4, x5, [sp, #16 * 2] ldp x6, x7, [sp, #16 * 3] @@ -343,7 +360,18 @@ alternative_else_nop_endif ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] + + /* Restore x0, x1 and sp */ + .if \el != 0 + mov x1, sp + ldr x0, [sp, #S_SP] + mov sp, x0 + ldp x0, x1, [x1, #16 * 0] + .else + ldp x0, x1, [sp, #16 * 0] add sp, sp, #S_FRAME_SIZE // restore sp + .endif + /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization * when returning from IPI handler, and when returning to user-space.
Arm64 SP needs to be aligned to 16 bytes before being used as base address for loads and stores. When taking some valid exceptions from EL1 (e.g. irq, dbg, data abort), there is no guarantee that SP_EL1 was aligned when taking the exception. Pad the stack on EL1 entries when misaligned. Signed-off-by: Julien Thierry <julien.thierry@arm.com> --- arch/arm64/include/asm/assembler.h | 9 +++++++++ arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-)