[02/30] ARM: assembler: introduce adr_l, ldr_l and str_l macros
diff mbox

Message ID 20170814125411.22604-3-ard.biesheuvel@linaro.org
State New
Headers show

Commit Message

Ard Biesheuvel Aug. 14, 2017, 12:53 p.m. UTC
Like arm64, ARM supports position independent code sequences that
produce symbol references with a greater reach than the ordinary
adr/ldr instructions.

Currently, we use open coded instruction sequences involving literals
and arithmetic operations. Instead, we can use movw/movt pairs on v7
CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
the literal into a subsection, allowing it to be emitted out of line
while retaining the ability to perform arithmetic on label offsets.

E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:

       ldr          <reg>, 222f
  111: add          <reg>, <reg>, pc
       .subsection  1
  222: .long        <sym> - (111b + 8)
       .previous

This is allowed by the assembler because, unlike ordinary sections,
subsections are combined into a single section into the object file,
and so the label references are not true cross-section references that
are visible as relocations. Note that we could even do something like

       add          <reg>, pc, #(222f - 111f) & ~0xfff
       ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
  111: add          <reg>, <reg>, pc
       .subsection  1
  222: .long        <sym> - (111b + 8)
       .previous

if it turns out that the 4 KB range of the ldr instruction is insufficient
to reach the literal in the subsection, although this is currently not a
problem (of the 98 objects built from .S files in a multi_v7_defconfig
build, only 11 have .text sections that are over 1 KB, and the largest one
[entry-armv.o] is 3308 bytes)

Subsections have been available in binutils since 2004 at least, so
they should not cause any issues with older toolchains.

So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm
to load multiple literals at once), ldr_l and str_l, all of which will
use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
otherwise.

Cc: Russell King <linux@armlinux.org.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/include/asm/assembler.h | 71 ++++++++++++++++++++
 1 file changed, 71 insertions(+)

Comments

Dave Martin Aug. 14, 2017, 3:29 p.m. UTC | #1
On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:
> Like arm64, ARM supports position independent code sequences that
> produce symbol references with a greater reach than the ordinary
> adr/ldr instructions.
> 
> Currently, we use open coded instruction sequences involving literals
> and arithmetic operations. Instead, we can use movw/movt pairs on v7
> CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
> the literal into a subsection, allowing it to be emitted out of line
> while retaining the ability to perform arithmetic on label offsets.
> 
> E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:
> 
>        ldr          <reg>, 222f
>   111: add          <reg>, <reg>, pc
>        .subsection  1
>   222: .long        <sym> - (111b + 8)
>        .previous
> 
> This is allowed by the assembler because, unlike ordinary sections,
> subsections are combined into a single section into the object file,
> and so the label references are not true cross-section references that
> are visible as relocations. Note that we could even do something like
> 
>        add          <reg>, pc, #(222f - 111f) & ~0xfff
>        ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
>   111: add          <reg>, <reg>, pc
>        .subsection  1
>   222: .long        <sym> - (111b + 8)
>        .previous

This is reinventing ldr=

I seem to remember ldr= barfing on things that .long happily accepts
though, was this the reason?


> if it turns out that the 4 KB range of the ldr instruction is insufficient
> to reach the literal in the subsection, although this is currently not a
> problem (of the 98 objects built from .S files in a multi_v7_defconfig
> build, only 11 have .text sections that are over 1 KB, and the largest one
> [entry-armv.o] is 3308 bytes)
> 
> Subsections have been available in binutils since 2004 at least, so
> they should not cause any issues with older toolchains.

(I also believe this to be an ancient feature, but I've not done the
digging to prove it.)

> So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm
> to load multiple literals at once), ldr_l and str_l, all of which will
> use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
> otherwise.
> 
> Cc: Russell King <linux@armlinux.org.uk>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm/include/asm/assembler.h | 71 ++++++++++++++++++++
>  1 file changed, 71 insertions(+)
> 
> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index ad301f107dd2..516ebaf4ff38 100644
> --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h
> @@ -518,4 +518,75 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
>  #endif
>  	.endm
>  
> +#ifdef CONFIG_THUMB2_KERNEL
> +#define	ARM_PC_BIAS		4
> +#else
> +#define	ARM_PC_BIAS		8
> +#endif
> +
> +	.macro		__adldst_l, op, reg, sym, tmp, c
> +	.if		__LINUX_ARM_ARCH__ < 7
> +	ldr\c		\tmp, 111f
> +	.subsection	1
> +	.align		2
> +111:	.long		\sym - (222f + ARM_PC_BIAS)

See above comment about ldr=.

> +	.previous
> +	.else
> +	W(movw\c\())	\tmp, #:lower16:\sym - (222f + ARM_PC_BIAS)
> +	W(movt\c\())	\tmp, #:upper16:\sym - (222f + ARM_PC_BIAS)

Why W()?

There are no narrow forms of these instructions anyway -- if there were
then they couldn't accommodate a 16-bit immediate.

> +	.endif
> +222:
> +	.ifc		\op, add
> +	add\c		\reg, \tmp, pc
> +	.elseif		CONFIG_THUMB2_KERNEL == 1
> +	add		\tmp, \tmp, pc
> +	\op\c		\reg, [\tmp]

Shame 
	\op\c		\reg, [pc, \tmp]
doesn't work.

But it doesn't, apparently.

> +	.else
> +	\op\c		\reg, [pc, \tmp]
> +	.endif
> +	.endm
> +
> +	/*
> +	 * mov_l - move a constant value or [relocated] address into a register
> +	 */
> +	.macro		mov_l, dst:req, imm:req, cond
> +	.if		__LINUX_ARM_ARCH__ < 7
> +	ldr\cond	\dst, =\imm
> +	.else
> +	W(movw\cond\())	\dst, #:lower16:\imm
> +	W(movt\cond\())	\dst, #:upper16:\imm
> +	.endif
> +	.endm
> +
> +	/*
> +	 * adr_l - adr pseudo-op with unlimited range
> +	 *
> +	 * @dst: destination register
> +	 * @sym: name of the symbol
> +	 */
> +	.macro		adr_l, dst:req, sym:req, cond
> +	__adldst_l	add, \dst, \sym, \dst, \cond
> +	.endm
> +
> +	/*
> +	 * ldr_l - ldr <literal> pseudo-op with unlimited range
> +	 *
> +	 * @dst: destination register
> +	 * @sym: name of the symbol
> +	 */
> +	.macro		ldr_l, dst:req, sym:req, cond
> +	__adldst_l	ldr, \dst, \sym, \dst, \cond
> +	.endm
> +
> +	/*
> +	 * str_l - str <literal> pseudo-op with unlimited range
> +	 *
> +	 * @src: source register
> +	 * @sym: name of the symbol
> +	 * @tmp: mandatory scratch register
> +	 */
> +	.macro		str_l, src:req, sym:req, tmp:req, cond
> +	__adldst_l	str, \src, \sym, \tmp, \cond
> +	.endm

Cheers
---Dave
Dave Martin Aug. 14, 2017, 3:32 p.m. UTC | #2
On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:
> Like arm64, ARM supports position independent code sequences that
> produce symbol references with a greater reach than the ordinary
> adr/ldr instructions.
> 
> Currently, we use open coded instruction sequences involving literals
> and arithmetic operations. Instead, we can use movw/movt pairs on v7
> CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
> the literal into a subsection, allowing it to be emitted out of line
> while retaining the ability to perform arithmetic on label offsets.
> 
> E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:
> 
>        ldr          <reg>, 222f
>   111: add          <reg>, <reg>, pc
>        .subsection  1
>   222: .long        <sym> - (111b + 8)
>        .previous
> 
> This is allowed by the assembler because, unlike ordinary sections,
> subsections are combined into a single section into the object file,
> and so the label references are not true cross-section references that
> are visible as relocations. Note that we could even do something like
> 
>        add          <reg>, pc, #(222f - 111f) & ~0xfff
>        ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
>   111: add          <reg>, <reg>, pc
>        .subsection  1
>   222: .long        <sym> - (111b + 8)
>        .previous
> 
> if it turns out that the 4 KB range of the ldr instruction is insufficient
> to reach the literal in the subsection, although this is currently not a
> problem (of the 98 objects built from .S files in a multi_v7_defconfig
> build, only 11 have .text sections that are over 1 KB, and the largest one
> [entry-armv.o] is 3308 bytes)
> 
> Subsections have been available in binutils since 2004 at least, so
> they should not cause any issues with older toolchains.
> 
> So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm

I don't see adrm_l in this patch.

> to load multiple literals at once), ldr_l and str_l, all of which will
> use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
> otherwise.

Also...

By default, I'd assume that we should port _all_ uses of :upper16:/
:lower16: to use these.  Does this series consciously do that?  Are
there any exceptions?

[...]

Cheers
---Dave
Ard Biesheuvel Aug. 14, 2017, 3:38 p.m. UTC | #3
On 14 August 2017 at 16:29, Dave Martin <Dave.Martin@arm.com> wrote:
> On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:
>> Like arm64, ARM supports position independent code sequences that
>> produce symbol references with a greater reach than the ordinary
>> adr/ldr instructions.
>>
>> Currently, we use open coded instruction sequences involving literals
>> and arithmetic operations. Instead, we can use movw/movt pairs on v7
>> CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
>> the literal into a subsection, allowing it to be emitted out of line
>> while retaining the ability to perform arithmetic on label offsets.
>>
>> E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:
>>
>>        ldr          <reg>, 222f
>>   111: add          <reg>, <reg>, pc
>>        .subsection  1
>>   222: .long        <sym> - (111b + 8)
>>        .previous
>>
>> This is allowed by the assembler because, unlike ordinary sections,
>> subsections are combined into a single section into the object file,
>> and so the label references are not true cross-section references that
>> are visible as relocations. Note that we could even do something like
>>
>>        add          <reg>, pc, #(222f - 111f) & ~0xfff
>>        ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
>>   111: add          <reg>, <reg>, pc
>>        .subsection  1
>>   222: .long        <sym> - (111b + 8)
>>        .previous
>
> This is reinventing ldr=
>
> I seem to remember ldr= barfing on things that .long happily accepts
> though, was this the reason?
>

Yes. ldr = does not accept expressions involving symbols, only plain
symbols or expressions that evaluate to constants.

So something like

ldr <reg>, =<sym> - <label>

is rejected while the equivalent

ldr <reg>, 0f
0: .long <sym> - <label>

does work.



>> if it turns out that the 4 KB range of the ldr instruction is insufficient
>> to reach the literal in the subsection, although this is currently not a
>> problem (of the 98 objects built from .S files in a multi_v7_defconfig
>> build, only 11 have .text sections that are over 1 KB, and the largest one
>> [entry-armv.o] is 3308 bytes)
>>
>> Subsections have been available in binutils since 2004 at least, so
>> they should not cause any issues with older toolchains.
>
> (I also believe this to be an ancient feature, but I've not done the
> digging to prove it.)
>

OK

>> So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm
>> to load multiple literals at once), ldr_l and str_l, all of which will
>> use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
>> otherwise.
>>
>> Cc: Russell King <linux@armlinux.org.uk>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm/include/asm/assembler.h | 71 ++++++++++++++++++++
>>  1 file changed, 71 insertions(+)
>>
>> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index ad301f107dd2..516ebaf4ff38 100644
>> --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h
>> @@ -518,4 +518,75 @@ THUMB(   orr     \reg , \reg , #PSR_T_BIT        )
>>  #endif
>>       .endm
>>
>> +#ifdef CONFIG_THUMB2_KERNEL
>> +#define      ARM_PC_BIAS             4
>> +#else
>> +#define      ARM_PC_BIAS             8
>> +#endif
>> +
>> +     .macro          __adldst_l, op, reg, sym, tmp, c
>> +     .if             __LINUX_ARM_ARCH__ < 7
>> +     ldr\c           \tmp, 111f
>> +     .subsection     1
>> +     .align          2
>> +111: .long           \sym - (222f + ARM_PC_BIAS)
>
> See above comment about ldr=.
>
>> +     .previous
>> +     .else
>> +     W(movw\c\())    \tmp, #:lower16:\sym - (222f + ARM_PC_BIAS)
>> +     W(movt\c\())    \tmp, #:upper16:\sym - (222f + ARM_PC_BIAS)
>
> Why W()?
>
> There are no narrow forms of these instructions anyway -- if there were
> then they couldn't accommodate a 16-bit immediate.
>

That's a trick, actually, which I failed to add a comment for.

We use .arm sections in the thumb2 kernel, and using these macros
there would result in the wrong offset to be used. Adding the .w
suffix forces an error in the assembler which even results in a fairly
meaningful error message complaining about using .w in ARM code.

>> +     .endif
>> +222:
>> +     .ifc            \op, add
>> +     add\c           \reg, \tmp, pc
>> +     .elseif         CONFIG_THUMB2_KERNEL == 1
>> +     add             \tmp, \tmp, pc
>> +     \op\c           \reg, [\tmp]
>
> Shame
>         \op\c           \reg, [pc, \tmp]
> doesn't work.
>
> But it doesn't, apparently.
>

No, thumb2 does not allow that

>> +     .else
>> +     \op\c           \reg, [pc, \tmp]
>> +     .endif
>> +     .endm
>> +
>> +     /*
>> +      * mov_l - move a constant value or [relocated] address into a register
>> +      */
>> +     .macro          mov_l, dst:req, imm:req, cond
>> +     .if             __LINUX_ARM_ARCH__ < 7
>> +     ldr\cond        \dst, =\imm
>> +     .else
>> +     W(movw\cond\()) \dst, #:lower16:\imm
>> +     W(movt\cond\()) \dst, #:upper16:\imm
>> +     .endif
>> +     .endm
>> +
>> +     /*
>> +      * adr_l - adr pseudo-op with unlimited range
>> +      *
>> +      * @dst: destination register
>> +      * @sym: name of the symbol
>> +      */
>> +     .macro          adr_l, dst:req, sym:req, cond
>> +     __adldst_l      add, \dst, \sym, \dst, \cond
>> +     .endm
>> +
>> +     /*
>> +      * ldr_l - ldr <literal> pseudo-op with unlimited range
>> +      *
>> +      * @dst: destination register
>> +      * @sym: name of the symbol
>> +      */
>> +     .macro          ldr_l, dst:req, sym:req, cond
>> +     __adldst_l      ldr, \dst, \sym, \dst, \cond
>> +     .endm
>> +
>> +     /*
>> +      * str_l - str <literal> pseudo-op with unlimited range
>> +      *
>> +      * @src: source register
>> +      * @sym: name of the symbol
>> +      * @tmp: mandatory scratch register
>> +      */
>> +     .macro          str_l, src:req, sym:req, tmp:req, cond
>> +     __adldst_l      str, \src, \sym, \tmp, \cond
>> +     .endm
>
> Cheers
> ---Dave
Ard Biesheuvel Aug. 14, 2017, 3:40 p.m. UTC | #4
On 14 August 2017 at 16:32, Dave Martin <Dave.Martin@arm.com> wrote:
> On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:
>> Like arm64, ARM supports position independent code sequences that
>> produce symbol references with a greater reach than the ordinary
>> adr/ldr instructions.
>>
>> Currently, we use open coded instruction sequences involving literals
>> and arithmetic operations. Instead, we can use movw/movt pairs on v7
>> CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
>> the literal into a subsection, allowing it to be emitted out of line
>> while retaining the ability to perform arithmetic on label offsets.
>>
>> E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:
>>
>>        ldr          <reg>, 222f
>>   111: add          <reg>, <reg>, pc
>>        .subsection  1
>>   222: .long        <sym> - (111b + 8)
>>        .previous
>>
>> This is allowed by the assembler because, unlike ordinary sections,
>> subsections are combined into a single section into the object file,
>> and so the label references are not true cross-section references that
>> are visible as relocations. Note that we could even do something like
>>
>>        add          <reg>, pc, #(222f - 111f) & ~0xfff
>>        ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
>>   111: add          <reg>, <reg>, pc
>>        .subsection  1
>>   222: .long        <sym> - (111b + 8)
>>        .previous
>>
>> if it turns out that the 4 KB range of the ldr instruction is insufficient
>> to reach the literal in the subsection, although this is currently not a
>> problem (of the 98 objects built from .S files in a multi_v7_defconfig
>> build, only 11 have .text sections that are over 1 KB, and the largest one
>> [entry-armv.o] is 3308 bytes)
>>
>> Subsections have been available in binutils since 2004 at least, so
>> they should not cause any issues with older toolchains.
>>
>> So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm
>
> I don't see adrm_l in this patch.
>

Oops (2)

Nico already mentioned that, and I failed to fix the commit log. I
added it at some point, but it wasn't really useful

>> to load multiple literals at once), ldr_l and str_l, all of which will
>> use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
>> otherwise.
>
> Also...
>
> By default, I'd assume that we should port _all_ uses of :upper16:/
> :lower16: to use these.  Does this series consciously do that?  Are
> there any exceptions?
>

There aren't that many. Anything that refers to absolute symbols will
break under CONFIG_RELOCATABLE and I haven't noticed any issues (I
tested extensively with Thumb2)

I don't mind open coded movw/movt for relative references in code that
is tightly coupled to a platform that guarantees v7+ so I didn't do a
full sweep. Also, I started with 50+ patches and tried to remove the
ones that are mostly orthogonal to the KASLR stuff.
Dave Martin Aug. 14, 2017, 3:50 p.m. UTC | #5
On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
> On 14 August 2017 at 16:29, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:
> >> Like arm64, ARM supports position independent code sequences that
> >> produce symbol references with a greater reach than the ordinary
> >> adr/ldr instructions.
> >>
> >> Currently, we use open coded instruction sequences involving literals
> >> and arithmetic operations. Instead, we can use movw/movt pairs on v7
> >> CPUs, circumventing the D-cache entirely. For older CPUs, we can emit
> >> the literal into a subsection, allowing it to be emitted out of line
> >> while retaining the ability to perform arithmetic on label offsets.
> >>
> >> E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:
> >>
> >>        ldr          <reg>, 222f
> >>   111: add          <reg>, <reg>, pc
> >>        .subsection  1
> >>   222: .long        <sym> - (111b + 8)
> >>        .previous
> >>
> >> This is allowed by the assembler because, unlike ordinary sections,
> >> subsections are combined into a single section into the object file,
> >> and so the label references are not true cross-section references that
> >> are visible as relocations. Note that we could even do something like
> >>
> >>        add          <reg>, pc, #(222f - 111f) & ~0xfff
> >>        ldr          <reg>, [<reg>, #(222f - 111f) & 0xfff]
> >>   111: add          <reg>, <reg>, pc
> >>        .subsection  1
> >>   222: .long        <sym> - (111b + 8)
> >>        .previous
> >
> > This is reinventing ldr=
> >
> > I seem to remember ldr= barfing on things that .long happily accepts
> > though, was this the reason?
> >
> 
> Yes. ldr = does not accept expressions involving symbols, only plain
> symbols or expressions that evaluate to constants.
> 
> So something like
> 
> ldr <reg>, =<sym> - <label>
> 
> is rejected while the equivalent
> 
> ldr <reg>, 0f
> 0: .long <sym> - <label>
> 
> does work.

I wouldn't bother trying to rationalise gas' behaviour here.  I think
it's an accident of implementation rather than there being some
fundamental reason for it.

AFAICT gas could quite happily resolve ldr= in exactly the same way as
.long and thus not have this problem.  But we can't rewrite history.

[...]

> >> +     .macro          __adldst_l, op, reg, sym, tmp, c
> >> +     .if             __LINUX_ARM_ARCH__ < 7
> >> +     ldr\c           \tmp, 111f
> >> +     .subsection     1
> >> +     .align          2
> >> +111: .long           \sym - (222f + ARM_PC_BIAS)
> >
> > See above comment about ldr=.
> >
> >> +     .previous
> >> +     .else
> >> +     W(movw\c\())    \tmp, #:lower16:\sym - (222f + ARM_PC_BIAS)
> >> +     W(movt\c\())    \tmp, #:upper16:\sym - (222f + ARM_PC_BIAS)
> >
> > Why W()?
> >
> > There are no narrow forms of these instructions anyway -- if there were
> > then they couldn't accommodate a 16-bit immediate.
> >
> 
> That's a trick, actually, which I failed to add a comment for.
> 
> We use .arm sections in the thumb2 kernel, and using these macros
> there would result in the wrong offset to be used. Adding the .w
> suffix forces an error in the assembler which even results in a fairly
> meaningful error message complaining about using .w in ARM code.

Ewww... I think it'd be best to add a comment explaining that.

There's a fair change someone will trip over this at some point (or
worse, "fix" the assembly errors).

> 
> >> +     .endif
> >> +222:
> >> +     .ifc            \op, add
> >> +     add\c           \reg, \tmp, pc
> >> +     .elseif         CONFIG_THUMB2_KERNEL == 1
> >> +     add             \tmp, \tmp, pc
> >> +     \op\c           \reg, [\tmp]
> >
> > Shame
> >         \op\c           \reg, [pc, \tmp]
> > doesn't work.
> >
> > But it doesn't, apparently.
> >
> 
> No, thumb2 does not allow that

Meh.  Oh well.

[...]

Cheers
---Dave
Dave Martin Aug. 14, 2017, 3:53 p.m. UTC | #6
On Mon, Aug 14, 2017 at 04:40:55PM +0100, Ard Biesheuvel wrote:
> On 14 August 2017 at 16:32, Dave Martin <Dave.Martin@arm.com> wrote:
> > On Mon, Aug 14, 2017 at 01:53:43PM +0100, Ard Biesheuvel wrote:

[...]

> >> So use the above to implement the macros mov_l, adr_l, adrm_l (using ldm
> >
> > I don't see adrm_l in this patch.
> >
> 
> Oops (2)
> 
> Nico already mentioned that, and I failed to fix the commit log. I
> added it at some point, but it wasn't really useful

Thought it might be something like that.

> >> to load multiple literals at once), ldr_l and str_l, all of which will
> >> use movw/movt pairs on v7 and later CPUs, and use PC-relative literals
> >> otherwise.
> >
> > Also...
> >
> > By default, I'd assume that we should port _all_ uses of :upper16:/
> > :lower16: to use these.  Does this series consciously do that?  Are
> > there any exceptions?
> >
> 
> There aren't that many. Anything that refers to absolute symbols will
> break under CONFIG_RELOCATABLE and I haven't noticed any issues (I
> tested extensively with Thumb2)
> 
> I don't mind open coded movw/movt for relative references in code that
> is tightly coupled to a platform that guarantees v7+ so I didn't do a
> full sweep. Also, I started with 50+ patches and tried to remove the
> ones that are mostly orthogonal to the KASLR stuff.

OK, that sounds reasonable.

Cheers
---Dave
Nicolas Pitre Aug. 14, 2017, 4:18 p.m. UTC | #7
On Mon, 14 Aug 2017, Dave Martin wrote:

> On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
> > That's a trick, actually, which I failed to add a comment for.

Shame shame shame !

> > We use .arm sections in the thumb2 kernel, and using these macros
> > there would result in the wrong offset to be used. Adding the .w
> > suffix forces an error in the assembler which even results in a fairly
> > meaningful error message complaining about using .w in ARM code.
> 
> Ewww... I think it'd be best to add a comment explaining that.

Absolutely!


Nicolas
Ard Biesheuvel Aug. 14, 2017, 4:22 p.m. UTC | #8
On 14 August 2017 at 17:18, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Mon, 14 Aug 2017, Dave Martin wrote:
>
>> On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
>> > That's a trick, actually, which I failed to add a comment for.
>
> Shame shame shame !
>
>> > We use .arm sections in the thumb2 kernel, and using these macros
>> > there would result in the wrong offset to be used. Adding the .w
>> > suffix forces an error in the assembler which even results in a fairly
>> > meaningful error message complaining about using .w in ARM code.
>>
>> Ewww... I think it'd be best to add a comment explaining that.
>
> Absolutely!
>

Yeah, mea culpa.

But if people have better ideas how to avoid this situation, I am all ears.
Nicolas Pitre Aug. 14, 2017, 4:33 p.m. UTC | #9
On Mon, 14 Aug 2017, Ard Biesheuvel wrote:

> On 14 August 2017 at 17:18, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Mon, 14 Aug 2017, Dave Martin wrote:
> >
> >> On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
> >> > That's a trick, actually, which I failed to add a comment for.
> >
> > Shame shame shame !
> >
> >> > We use .arm sections in the thumb2 kernel, and using these macros
> >> > there would result in the wrong offset to be used. Adding the .w
> >> > suffix forces an error in the assembler which even results in a fairly
> >> > meaningful error message complaining about using .w in ARM code.
> >>
> >> Ewww... I think it'd be best to add a comment explaining that.
> >
> > Absolutely!
> >
> 
> Yeah, mea culpa.
> 
> But if people have better ideas how to avoid this situation, I am all ears.

Just document it.  ;-)


Nicolas
Russell King - ARM Linux admin Aug. 14, 2017, 4:42 p.m. UTC | #10
On Mon, Aug 14, 2017 at 05:22:39PM +0100, Ard Biesheuvel wrote:
> On 14 August 2017 at 17:18, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > On Mon, 14 Aug 2017, Dave Martin wrote:
> >
> >> On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
> >> > That's a trick, actually, which I failed to add a comment for.
> >
> > Shame shame shame !
> >
> >> > We use .arm sections in the thumb2 kernel, and using these macros
> >> > there would result in the wrong offset to be used. Adding the .w
> >> > suffix forces an error in the assembler which even results in a fairly
> >> > meaningful error message complaining about using .w in ARM code.
> >>
> >> Ewww... I think it'd be best to add a comment explaining that.
> >
> > Absolutely!
> >
> 
> Yeah, mea culpa.
> 
> But if people have better ideas how to avoid this situation, I am all ears.

Have you tested building an ARMv7M kernel with your patches - ARMv7M is
Thumb only, so can't contain any ARM code.  If not, please try
mps2_defconfig.
Ard Biesheuvel Aug. 14, 2017, 4:56 p.m. UTC | #11
On 14 August 2017 at 17:42, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Mon, Aug 14, 2017 at 05:22:39PM +0100, Ard Biesheuvel wrote:
>> On 14 August 2017 at 17:18, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> > On Mon, 14 Aug 2017, Dave Martin wrote:
>> >
>> >> On Mon, Aug 14, 2017 at 04:38:02PM +0100, Ard Biesheuvel wrote:
>> >> > That's a trick, actually, which I failed to add a comment for.
>> >
>> > Shame shame shame !
>> >
>> >> > We use .arm sections in the thumb2 kernel, and using these macros
>> >> > there would result in the wrong offset to be used. Adding the .w
>> >> > suffix forces an error in the assembler which even results in a fairly
>> >> > meaningful error message complaining about using .w in ARM code.
>> >>
>> >> Ewww... I think it'd be best to add a comment explaining that.
>> >
>> > Absolutely!
>> >
>>
>> Yeah, mea culpa.
>>
>> But if people have better ideas how to avoid this situation, I am all ears.
>
> Have you tested building an ARMv7M kernel with your patches - ARMv7M is
> Thumb only, so can't contain any ARM code.  If not, please try
> mps2_defconfig.
>

mps2_defconfig builds without error with these patches. I don't have
the hardware so I can't boot it, unfortunately.

Patch
diff mbox

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index ad301f107dd2..516ebaf4ff38 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -518,4 +518,75 @@  THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #endif
 	.endm
 
+#ifdef CONFIG_THUMB2_KERNEL
+#define	ARM_PC_BIAS		4
+#else
+#define	ARM_PC_BIAS		8
+#endif
+
+	.macro		__adldst_l, op, reg, sym, tmp, c
+	.if		__LINUX_ARM_ARCH__ < 7
+	ldr\c		\tmp, 111f
+	.subsection	1
+	.align		2
+111:	.long		\sym - (222f + ARM_PC_BIAS)
+	.previous
+	.else
+	W(movw\c\())	\tmp, #:lower16:\sym - (222f + ARM_PC_BIAS)
+	W(movt\c\())	\tmp, #:upper16:\sym - (222f + ARM_PC_BIAS)
+	.endif
+222:
+	.ifc		\op, add
+	add\c		\reg, \tmp, pc
+	.elseif		CONFIG_THUMB2_KERNEL == 1
+	add		\tmp, \tmp, pc
+	\op\c		\reg, [\tmp]
+	.else
+	\op\c		\reg, [pc, \tmp]
+	.endif
+	.endm
+
+	/*
+	 * mov_l - move a constant value or [relocated] address into a register
+	 */
+	.macro		mov_l, dst:req, imm:req, cond
+	.if		__LINUX_ARM_ARCH__ < 7
+	ldr\cond	\dst, =\imm
+	.else
+	W(movw\cond\())	\dst, #:lower16:\imm
+	W(movt\cond\())	\dst, #:upper16:\imm
+	.endif
+	.endm
+
+	/*
+	 * adr_l - adr pseudo-op with unlimited range
+	 *
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 */
+	.macro		adr_l, dst:req, sym:req, cond
+	__adldst_l	add, \dst, \sym, \dst, \cond
+	.endm
+
+	/*
+	 * ldr_l - ldr <literal> pseudo-op with unlimited range
+	 *
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 */
+	.macro		ldr_l, dst:req, sym:req, cond
+	__adldst_l	ldr, \dst, \sym, \dst, \cond
+	.endm
+
+	/*
+	 * str_l - str <literal> pseudo-op with unlimited range
+	 *
+	 * @src: source register
+	 * @sym: name of the symbol
+	 * @tmp: mandatory scratch register
+	 */
+	.macro		str_l, src:req, sym:req, tmp:req, cond
+	__adldst_l	str, \src, \sym, \tmp, \cond
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */