diff mbox

[resend,1/2] arm64: assembler: add utility macros to push/pop stack frames

Message ID 20180328124129.6459-2-ard.biesheuvel@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Ard Biesheuvel March 28, 2018, 12:41 p.m. UTC
We are going to add code to all the NEON crypto routines that will
turn them into non-leaf functions, so we need to manage the stack
frames. To make this less tedious and error prone, add some macros
that take the number of callee saved registers to preserve and the
extra size to allocate in the stack frame (for locals) and emit
the ldp/stp sequences.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/include/asm/assembler.h | 58 ++++++++++++++++++++
 1 file changed, 58 insertions(+)

Comments

Dave Martin March 28, 2018, 4:34 p.m. UTC | #1
On Wed, Mar 28, 2018 at 02:41:28PM +0200, Ard Biesheuvel wrote:
> We are going to add code to all the NEON crypto routines that will
> turn them into non-leaf functions, so we need to manage the stack
> frames. To make this less tedious and error prone, add some macros
> that take the number of callee saved registers to preserve and the

Apologies for the delay in looking at these patches...

Anyway:

Nit: for all instances of "callee saved" in this patch, do you mean             "caller saved"?

A few stylistic comments below, but I don't consider them essential to
address unless someone feels like it.

Otherwise,
Reviewed-by: Dave Martin <Dave.Martin@arm.com>

> extra size to allocate in the stack frame (for locals) and emit
> the ldp/stp sequences.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm64/include/asm/assembler.h | 58 ++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 053d83e8db6f..d354eb7f2f0c 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -565,4 +565,62 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
>  #endif
>  	.endm
>  
> +	/*
> +	 * frame_push - Push @regcount callee saved registers to the stack,
> +	 *              starting at x19, as well as x29/x30, and set x29 to
> +	 *              the new value of sp. Add @extra bytes of stack space
> +	 *              for locals.
> +	 */
> +	.macro		frame_push, regcount:req, extra
> +	__frame		st, \regcount, \extra
> +	.endm
> +
> +	/*
> +	 * frame_pop  - Pop the callee saved registers from the stack that were
> +	 *              pushed in the most recent call to frame_push, as well
> +	 *              as x29/x30 and any extra stack space that may have been
> +	 *              allocated.
> +	 */
> +	.macro		frame_pop
> +	__frame		ld
> +	.endm
> +
> +	.macro		__frame_regs, reg1, reg2, op, num
> +	.if		.Lframe_regcount == \num
> +	\op\()r		\reg1, [sp, #(\num + 1) * 8]
> +	.elseif		.Lframe_regcount > \num
> +	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
> +	.endif
> +	.endm
> +
> +	.macro		__frame, op, regcount, extra=0
> +	.ifc		\op, st
> +	.if		(\regcount) < 0 || (\regcount) > 10
> +	.error		"regcount should be in the range [0 ... 10]"
> +	.endif
> +	.if		((\extra) % 16) != 0
> +	.error		"extra should be a multiple of 16 bytes"
> +	.endif
> +	.set		.Lframe_regcount, \regcount
> +	.set		.Lframe_extra, \extra
> +	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
> +	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
> +	mov		x29, sp
> +	.endif
> +
> +	__frame_regs	x19, x20, \op, 1
> +	__frame_regs	x21, x22, \op, 3
> +	__frame_regs	x23, x24, \op, 5
> +	__frame_regs	x25, x26, \op, 7
> +	__frame_regs	x27, x28, \op, 9
> +
> +	.ifc		\op, ld
> +	.if		.Lframe_regcount == -1

We could also have

	.ifc		\op, st
	.ifdef		.Lframe_regcount
	.if		.Lframe_regcount != -1
	.error [...]

on the push side, which would trip on the first nested frame_push
rather than waiting until a frame_pop appears.

Your existing code could be retained to guard against a double pop.

> +	.error		"frame_push/frame_pop may not be nested"
> +	.endif
> +	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
> +	.set		.Lframe_regcount, -1
> +	.endif
> +	.endm
> +
>  #endif	/* __ASM_ASSEMBLER_H */
> -- 
> 2.11.0
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Ard Biesheuvel March 29, 2018, 8:54 a.m. UTC | #2
On 28 March 2018 at 17:34, Dave Martin <Dave.Martin@arm.com> wrote:
> On Wed, Mar 28, 2018 at 02:41:28PM +0200, Ard Biesheuvel wrote:
>> We are going to add code to all the NEON crypto routines that will
>> turn them into non-leaf functions, so we need to manage the stack
>> frames. To make this less tedious and error prone, add some macros
>> that take the number of callee saved registers to preserve and the
>
> Apologies for the delay in looking at these patches...
>
> Anyway:
>
> Nit: for all instances of "callee saved" in this patch, do you mean             "caller saved"?
>

'Caller saved' means that the caller needs to stack/unstack a register
itself if it needs its value to be preserved across a function call.
'Callee saved' means that the caller can rely on the callee to ensure
that the register will retain its value.

So we are dealing with the latter here, afaict. Or am I missing something?

> A few stylistic comments below, but I don't consider them essential to
> address unless someone feels like it.
>
> Otherwise,
> Reviewed-by: Dave Martin <Dave.Martin@arm.com>
>

Thanks.

>> extra size to allocate in the stack frame (for locals) and emit
>> the ldp/stp sequences.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm64/include/asm/assembler.h | 58 ++++++++++++++++++++
>>  1 file changed, 58 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
>> index 053d83e8db6f..d354eb7f2f0c 100644
>> --- a/arch/arm64/include/asm/assembler.h
>> +++ b/arch/arm64/include/asm/assembler.h
>> @@ -565,4 +565,62 @@ USER(\label, ic  ivau, \tmp2)                    // invalidate I line PoU
>>  #endif
>>       .endm
>>
>> +     /*
>> +      * frame_push - Push @regcount callee saved registers to the stack,
>> +      *              starting at x19, as well as x29/x30, and set x29 to
>> +      *              the new value of sp. Add @extra bytes of stack space
>> +      *              for locals.
>> +      */
>> +     .macro          frame_push, regcount:req, extra
>> +     __frame         st, \regcount, \extra
>> +     .endm
>> +
>> +     /*
>> +      * frame_pop  - Pop the callee saved registers from the stack that were
>> +      *              pushed in the most recent call to frame_push, as well
>> +      *              as x29/x30 and any extra stack space that may have been
>> +      *              allocated.
>> +      */
>> +     .macro          frame_pop
>> +     __frame         ld
>> +     .endm
>> +
>> +     .macro          __frame_regs, reg1, reg2, op, num
>> +     .if             .Lframe_regcount == \num
>> +     \op\()r         \reg1, [sp, #(\num + 1) * 8]
>> +     .elseif         .Lframe_regcount > \num
>> +     \op\()p         \reg1, \reg2, [sp, #(\num + 1) * 8]
>> +     .endif
>> +     .endm
>> +
>> +     .macro          __frame, op, regcount, extra=0
>> +     .ifc            \op, st
>> +     .if             (\regcount) < 0 || (\regcount) > 10
>> +     .error          "regcount should be in the range [0 ... 10]"
>> +     .endif
>> +     .if             ((\extra) % 16) != 0
>> +     .error          "extra should be a multiple of 16 bytes"
>> +     .endif
>> +     .set            .Lframe_regcount, \regcount
>> +     .set            .Lframe_extra, \extra
>> +     .set            .Lframe_local_offset, ((\regcount + 3) / 2) * 16
>> +     stp             x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
>> +     mov             x29, sp
>> +     .endif
>> +
>> +     __frame_regs    x19, x20, \op, 1
>> +     __frame_regs    x21, x22, \op, 3
>> +     __frame_regs    x23, x24, \op, 5
>> +     __frame_regs    x25, x26, \op, 7
>> +     __frame_regs    x27, x28, \op, 9
>> +
>> +     .ifc            \op, ld
>> +     .if             .Lframe_regcount == -1
>
> We could also have
>
>         .ifc            \op, st
>         .ifdef          .Lframe_regcount
>         .if             .Lframe_regcount != -1
>         .error [...]
>
> on the push side, which would trip on the first nested frame_push
> rather than waiting until a frame_pop appears.
>
> Your existing code could be retained to guard against a double pop.
>

Nice. I'll try that.

>> +     .error          "frame_push/frame_pop may not be nested"
>> +     .endif
>> +     ldp             x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
>> +     .set            .Lframe_regcount, -1
>> +     .endif
>> +     .endm
>> +
>>  #endif       /* __ASM_ASSEMBLER_H */
>> --
>> 2.11.0
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Dave Martin March 29, 2018, 9:28 a.m. UTC | #3
On Thu, Mar 29, 2018 at 09:54:31AM +0100, Ard Biesheuvel wrote:
> On 28 March 2018 at 17:34, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Wed, Mar 28, 2018 at 02:41:28PM +0200, Ard Biesheuvel wrote:
> >> We are going to add code to all the NEON crypto routines that will
> >> turn them into non-leaf functions, so we need to manage the stack
> >> frames. To make this less tedious and error prone, add some macros
> >> that take the number of callee saved registers to preserve and the
> >
> > Apologies for the delay in looking at these patches...
> >
> > Anyway:
> >
> > Nit: for all instances of "callee saved" in this patch, do you mean             "caller saved"?
> >
> 
> 'Caller saved' means that the caller needs to stack/unstack a register
> itself if it needs its value to be preserved across a function call.
> 'Callee saved' means that the caller can rely on the callee to ensure
> that the register will retain its value.
> 
> So we are dealing with the latter here, afaict. Or am I missing something?

Yes, I confused myself.  In preparation for calling kernel_neon_begin
etc., we would potentially need to save some caller-save registers.  But
that's not what the macros in this patch are about.

> > A few stylistic comments below, but I don't consider them essential to
> > address unless someone feels like it.
> >
> > Otherwise,
> > Reviewed-by: Dave Martin <Dave.Martin@arm.com>
> >
> 
> Thanks.
> 
> >> extra size to allocate in the stack frame (for locals) and emit
> >> the ldp/stp sequences.
> >>
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >>  arch/arm64/include/asm/assembler.h | 58 ++++++++++++++++++++
> >>  1 file changed, 58 insertions(+)
> >>
> >> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h

[...]

> >> +     .macro          __frame, op, regcount, extra=0

[...]

> >> +     .ifc            \op, ld
> >> +     .if             .Lframe_regcount == -1
> >
> > We could also have
> >
> >         .ifc            \op, st
> >         .ifdef          .Lframe_regcount
> >         .if             .Lframe_regcount != -1
> >         .error [...]
> >
> > on the push side, which would trip on the first nested frame_push
> > rather than waiting until a frame_pop appears.
> >
> > Your existing code could be retained to guard against a double pop.
> >
> 
> Nice. I'll try that.

OK, cool

[...]

Cheers
---Dave
diff mbox

Patch

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 053d83e8db6f..d354eb7f2f0c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -565,4 +565,62 @@  USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 #endif
 	.endm
 
+	/*
+	 * frame_push - Push @regcount callee saved registers to the stack,
+	 *              starting at x19, as well as x29/x30, and set x29 to
+	 *              the new value of sp. Add @extra bytes of stack space
+	 *              for locals.
+	 */
+	.macro		frame_push, regcount:req, extra
+	__frame		st, \regcount, \extra
+	.endm
+
+	/*
+	 * frame_pop  - Pop the callee saved registers from the stack that were
+	 *              pushed in the most recent call to frame_push, as well
+	 *              as x29/x30 and any extra stack space that may have been
+	 *              allocated.
+	 */
+	.macro		frame_pop
+	__frame		ld
+	.endm
+
+	.macro		__frame_regs, reg1, reg2, op, num
+	.if		.Lframe_regcount == \num
+	\op\()r		\reg1, [sp, #(\num + 1) * 8]
+	.elseif		.Lframe_regcount > \num
+	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
+	.endif
+	.endm
+
+	.macro		__frame, op, regcount, extra=0
+	.ifc		\op, st
+	.if		(\regcount) < 0 || (\regcount) > 10
+	.error		"regcount should be in the range [0 ... 10]"
+	.endif
+	.if		((\extra) % 16) != 0
+	.error		"extra should be a multiple of 16 bytes"
+	.endif
+	.set		.Lframe_regcount, \regcount
+	.set		.Lframe_extra, \extra
+	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
+	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
+	mov		x29, sp
+	.endif
+
+	__frame_regs	x19, x20, \op, 1
+	__frame_regs	x21, x22, \op, 3
+	__frame_regs	x23, x24, \op, 5
+	__frame_regs	x25, x26, \op, 7
+	__frame_regs	x27, x28, \op, 9
+
+	.ifc		\op, ld
+	.if		.Lframe_regcount == -1
+	.error		"frame_push/frame_pop may not be nested"
+	.endif
+	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
+	.set		.Lframe_regcount, -1
+	.endif
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */