diff mbox

[RFC,v3,2/3] arm64/kernel: don't ban ADRP to work around Cortex-A53 erratum #843419

Message ID 20180214113645.16793-3-ard.biesheuvel@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Ard Biesheuvel Feb. 14, 2018, 11:36 a.m. UTC
Working around Cortex-A53 erratum #843419 involves special handling of
ADRP instructions that end up in the last two instruction slots of a
4k page, or whose output register gets overwritten without having been
read. (Note that the latter instruction sequence is never emitted by
a properly functioning compiler)

Normally, this gets taken care of by the linker, which can spot such
sequences at final link time, and insert a veneer if the ADRP ends up
at a vulnerable offset. However, linux kernel modules are partially
linked ELF objects, and so there is no 'final link time' other than the
runtime loading of the module, at which time all the static relocations
are resolved.

For this reason, we have implemented the #843419 workaround for modules
by avoiding ADRP instructions altogether, by using the large C model,
and by passing -mpc-relative-literal-loads to recent versions of GCC
that may emit adrp/ldr pairs to perform literal loads. However, this
workaround forces us to keep literal data mixed with the instructions
in the executable .text segment, and literal data may inadvertently
turn into an exploitable speculative gadget depending on the relative
offsets of arbitrary symbols.

So let's reimplement this workaround in a way that allows us to switch
back to the small C model, and to drop the -mpc-relative-literal-loads
GCC switch, by patching affected ADRP instructions at runtime:
- ADRP instructions that do not appear at 4k relative offset 0xff8 or
  0xffc are ignored
- ADRP instructions that are within 1 MB of their target symbol are
  converted into ADR instructions
- remaining ADRP instructions are redirected via a veneer that performs
  the load using an unaffected movn/movk sequence.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/Kconfig                  | 11 ++-
 arch/arm64/Makefile                 |  5 --
 arch/arm64/include/asm/module.h     |  2 +
 arch/arm64/kernel/module-plts.c     | 76 +++++++++++++++++++-
 arch/arm64/kernel/module.c          | 32 ++++++++-
 arch/arm64/kernel/reloc_test_core.c |  4 +-
 arch/arm64/kernel/reloc_test_syms.S | 12 +++-
 7 files changed, 120 insertions(+), 22 deletions(-)

Comments

Mark Rutland Feb. 23, 2018, 5:15 p.m. UTC | #1
On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
> +static bool reloc_adrp_erratum_843419(struct module *mod, __le32 *place,
> +				      u64 val)
> +{
> +	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
> +		return false;
> +
> +	/* only ADRP instructions at the end of a 4k page are affected */
> +	if (((u64)place & 0xfff) < 0xff8)
> +		return false;
> +
> +	/* patch ADRP to ADR if it is in range */
> +	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
> +			    AARCH64_INSN_IMM_ADR)) {
> +		((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */

That's a bit hideous, and broken for BE, I think.

Can we not construct the equivalent ADR using the usual insn code, and
insert it?

Thanks,
Mark.
Ard Biesheuvel Feb. 23, 2018, 5:17 p.m. UTC | #2
On 23 February 2018 at 17:15, Mark Rutland <mark.rutland@arm.com> wrote:
> On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
>> +static bool reloc_adrp_erratum_843419(struct module *mod, __le32 *place,
>> +                                   u64 val)
>> +{
>> +     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
>> +             return false;
>> +
>> +     /* only ADRP instructions at the end of a 4k page are affected */
>> +     if (((u64)place & 0xfff) < 0xff8)
>> +             return false;
>> +
>> +     /* patch ADRP to ADR if it is in range */
>> +     if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
>> +                         AARCH64_INSN_IMM_ADR)) {
>> +             ((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */
>
> That's a bit hideous, and broken for BE, I think.
>

Instructions are always LE

> Can we not construct the equivalent ADR using the usual insn code, and
> insert it?
>

Sure.
Mark Rutland Feb. 23, 2018, 5:25 p.m. UTC | #3
On Fri, Feb 23, 2018 at 05:17:31PM +0000, Ard Biesheuvel wrote:
> On 23 February 2018 at 17:15, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
> >> +static bool reloc_adrp_erratum_843419(struct module *mod, __le32 *place,
> >> +                                   u64 val)
> >> +{
> >> +     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
> >> +             return false;
> >> +
> >> +     /* only ADRP instructions at the end of a 4k page are affected */
> >> +     if (((u64)place & 0xfff) < 0xff8)
> >> +             return false;
> >> +
> >> +     /* patch ADRP to ADR if it is in range */
> >> +     if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
> >> +                         AARCH64_INSN_IMM_ADR)) {
> >> +             ((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */
> >
> > That's a bit hideous, and broken for BE, I think.
> 
> Instructions are always LE

Ugh; I forgot that single didn't have endianness. :(

> > Can we not construct the equivalent ADR using the usual insn code, and
> > insert it?
> 
> Sure.

Cheers!

Mark.
Ard Biesheuvel Feb. 24, 2018, 5:54 p.m. UTC | #4
On 23 February 2018 at 17:25, Mark Rutland <mark.rutland@arm.com> wrote:
> On Fri, Feb 23, 2018 at 05:17:31PM +0000, Ard Biesheuvel wrote:
>> On 23 February 2018 at 17:15, Mark Rutland <mark.rutland@arm.com> wrote:
>> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
>> >> +static bool reloc_adrp_erratum_843419(struct module *mod, __le32 *place,
>> >> +                                   u64 val)
>> >> +{
>> >> +     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
>> >> +             return false;
>> >> +
>> >> +     /* only ADRP instructions at the end of a 4k page are affected */
>> >> +     if (((u64)place & 0xfff) < 0xff8)
>> >> +             return false;
>> >> +
>> >> +     /* patch ADRP to ADR if it is in range */
>> >> +     if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
>> >> +                         AARCH64_INSN_IMM_ADR)) {
>> >> +             ((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */
>> >
>> > That's a bit hideous, and broken for BE, I think.
>>
>> Instructions are always LE
>
> Ugh; I forgot that single didn't have endianness. :(
>
>> > Can we not construct the equivalent ADR using the usual insn code, and
>> > insert it?
>>
>> Sure.
>
> Cheers!
>

Umm, it appears we don't have any code to emit ADR or ADRP
instructions, and I am reluctant to add a bunch of routines that are
only ever used to flick a single opcode bit.
Mark Rutland Feb. 26, 2018, 10:53 a.m. UTC | #5
On Sat, Feb 24, 2018 at 05:54:01PM +0000, Ard Biesheuvel wrote:
> On 23 February 2018 at 17:25, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Fri, Feb 23, 2018 at 05:17:31PM +0000, Ard Biesheuvel wrote:
> >> On 23 February 2018 at 17:15, Mark Rutland <mark.rutland@arm.com> wrote:
> >> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:

> >> >> +     /* patch ADRP to ADR if it is in range */
> >> >> +     if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
> >> >> +                         AARCH64_INSN_IMM_ADR)) {
> >> >> +             ((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */

> >> > Can we not construct the equivalent ADR using the usual insn code, and
> >> > insert it?
> >>
> >> Sure.
> >
> > Cheers!
> 
> Umm, it appears we don't have any code to emit ADR or ADRP
> instructions, and I am reluctant to add a bunch of routines that are
> only ever used to flick a single opcode bit.

Fair enough.

Mark.
Will Deacon March 5, 2018, 5:18 p.m. UTC | #6
On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
> Working around Cortex-A53 erratum #843419 involves special handling of
> ADRP instructions that end up in the last two instruction slots of a
> 4k page, or whose output register gets overwritten without having been
> read. (Note that the latter instruction sequence is never emitted by
> a properly functioning compiler)

Does the workaround currently implemented in the linker also make this
same assumption? If not, I'm a little wary that we're making an assumption
about compiler behaviour with no way to detect whether its been violated or
not.

> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
> index ea640f92fe5a..93b808056cb4 100644
> --- a/arch/arm64/kernel/module-plts.c
> +++ b/arch/arm64/kernel/module-plts.c
> @@ -41,6 +41,46 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
>  	return (u64)&plt[i];
>  }
>  
> +#ifdef CONFIG_ARM64_ERRATUM_843419
> +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
> +{
> +	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
> +							  &mod->arch.init;
> +	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
> +	int i = pltsec->plt_num_entries++;
> +	u32 mov0, mov1, mov2, br;
> +	int rd;
> +
> +	BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);

I'd prefer just to fail loading the module, but I see we already have
a BUG_ON for the existing PLT code. Oh well.

> +
> +	/* get the destination register of the ADRP instruction */
> +	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
> +					  le32_to_cpup((__le32 *)loc));
> +
> +	/* generate the veneer instructions */
> +	mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
> +					 AARCH64_INSN_VARIANT_64BIT,
> +					 AARCH64_INSN_MOVEWIDE_INVERSE);
> +	mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
> +					 AARCH64_INSN_VARIANT_64BIT,
> +					 AARCH64_INSN_MOVEWIDE_KEEP);
> +	mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
> +					 AARCH64_INSN_VARIANT_64BIT,
> +					 AARCH64_INSN_MOVEWIDE_KEEP);
> +	br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
> +					 AARCH64_INSN_BRANCH_NOLINK);
> +
> +	plt[i] = (struct plt_entry){
> +			cpu_to_le32(mov0),
> +			cpu_to_le32(mov1),
> +			cpu_to_le32(mov2),
> +			cpu_to_le32(br)
> +		};
> +
> +	return (u64)&plt[i];
> +}
> +#endif
> +
>  #define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
>  
>  static int cmp_rela(const void *a, const void *b)
> @@ -68,16 +108,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
>  }
>  
>  static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
> -			       Elf64_Word dstidx)
> +			       Elf64_Word dstidx, Elf_Shdr *dstsec)
>  {
>  	unsigned int ret = 0;
>  	Elf64_Sym *s;
>  	int i;
>  
>  	for (i = 0; i < num; i++) {
> +		u64 min_align;
> +
>  		switch (ELF64_R_TYPE(rela[i].r_info)) {
>  		case R_AARCH64_JUMP26:
>  		case R_AARCH64_CALL26:
> +			if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
> +				break;
> +
>  			/*
>  			 * We only have to consider branch targets that resolve
>  			 * to symbols that are defined in a different section.
> @@ -109,6 +154,31 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
>  			if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
>  				ret++;
>  			break;
> +		case R_AARCH64_ADR_PREL_PG_HI21_NC:
> +		case R_AARCH64_ADR_PREL_PG_HI21:
> +			if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
> +				break;
> +
> +			/*
> +			 * Determine the minimal safe alignment for this ADRP
> +			 * instruction: the section alignment at which it is
> +			 * guaranteed not to appear at a vulnerable offset.
> +			 */
> +			min_align = 2 << ffz(rela[i].r_offset | 0x7);

I'm struggling to decipher this, can you give me a hint please? Why 0x7? Is
the "2" because of the two vulnerable offsets?

Will
Ard Biesheuvel March 5, 2018, 5:26 p.m. UTC | #7
On 5 March 2018 at 17:18, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
>> Working around Cortex-A53 erratum #843419 involves special handling of
>> ADRP instructions that end up in the last two instruction slots of a
>> 4k page, or whose output register gets overwritten without having been
>> read. (Note that the latter instruction sequence is never emitted by
>> a properly functioning compiler)
>
> Does the workaround currently implemented in the linker also make this
> same assumption? If not, I'm a little wary that we're making an assumption
> about compiler behaviour with no way to detect whether its been violated or
> not.
>

I can check, but I don't see how a compiler would ever choose 'adrp'
when it emits what amounts to a nop instruction.

>> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
>> index ea640f92fe5a..93b808056cb4 100644
>> --- a/arch/arm64/kernel/module-plts.c
>> +++ b/arch/arm64/kernel/module-plts.c
>> @@ -41,6 +41,46 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
>>       return (u64)&plt[i];
>>  }
>>
>> +#ifdef CONFIG_ARM64_ERRATUM_843419
>> +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
>> +{
>> +     struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
>> +                                                       &mod->arch.init;
>> +     struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
>> +     int i = pltsec->plt_num_entries++;
>> +     u32 mov0, mov1, mov2, br;
>> +     int rd;
>> +
>> +     BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
>
> I'd prefer just to fail loading the module, but I see we already have
> a BUG_ON for the existing PLT code. Oh well.
>

I can fix that in a followup patch

>> +
>> +     /* get the destination register of the ADRP instruction */
>> +     rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
>> +                                       le32_to_cpup((__le32 *)loc));
>> +
>> +     /* generate the veneer instructions */
>> +     mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
>> +                                      AARCH64_INSN_VARIANT_64BIT,
>> +                                      AARCH64_INSN_MOVEWIDE_INVERSE);
>> +     mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
>> +                                      AARCH64_INSN_VARIANT_64BIT,
>> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
>> +     mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
>> +                                      AARCH64_INSN_VARIANT_64BIT,
>> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
>> +     br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
>> +                                      AARCH64_INSN_BRANCH_NOLINK);
>> +
>> +     plt[i] = (struct plt_entry){
>> +                     cpu_to_le32(mov0),
>> +                     cpu_to_le32(mov1),
>> +                     cpu_to_le32(mov2),
>> +                     cpu_to_le32(br)
>> +             };
>> +
>> +     return (u64)&plt[i];
>> +}
>> +#endif
>> +
>>  #define cmp_3way(a,b)        ((a) < (b) ? -1 : (a) > (b))
>>
>>  static int cmp_rela(const void *a, const void *b)
>> @@ -68,16 +108,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
>>  }
>>
>>  static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
>> -                            Elf64_Word dstidx)
>> +                            Elf64_Word dstidx, Elf_Shdr *dstsec)
>>  {
>>       unsigned int ret = 0;
>>       Elf64_Sym *s;
>>       int i;
>>
>>       for (i = 0; i < num; i++) {
>> +             u64 min_align;
>> +
>>               switch (ELF64_R_TYPE(rela[i].r_info)) {
>>               case R_AARCH64_JUMP26:
>>               case R_AARCH64_CALL26:
>> +                     if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
>> +                             break;
>> +
>>                       /*
>>                        * We only have to consider branch targets that resolve
>>                        * to symbols that are defined in a different section.
>> @@ -109,6 +154,31 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
>>                       if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
>>                               ret++;
>>                       break;
>> +             case R_AARCH64_ADR_PREL_PG_HI21_NC:
>> +             case R_AARCH64_ADR_PREL_PG_HI21:
>> +                     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
>> +                             break;
>> +
>> +                     /*
>> +                      * Determine the minimal safe alignment for this ADRP
>> +                      * instruction: the section alignment at which it is
>> +                      * guaranteed not to appear at a vulnerable offset.
>> +                      */
>> +                     min_align = 2 << ffz(rela[i].r_offset | 0x7);
>
> I'm struggling to decipher this, can you give me a hint please? Why 0x7? Is
> the "2" because of the two vulnerable offsets?
>

What it basically does is preserve 0 bits in the address of the instruction.

Given that ADRP instructions at addresses ending in fff8 or fffc may
be vulnerable, if any of the bits [11:3] are zero, we can increase the
alignment of this section to ensure it remains zero, guaranteeing that
the resulting address won't end in fff8 or fffc

Bits 0 .. 2 don't count, hence the 0x7. If bit 3 is zero, we can align
to '1 << (3 + 1)' == '2 << 3', and bit 3 will remain zero.
Will Deacon March 5, 2018, 5:34 p.m. UTC | #8
On Mon, Mar 05, 2018 at 05:26:35PM +0000, Ard Biesheuvel wrote:
> On 5 March 2018 at 17:18, Will Deacon <will.deacon@arm.com> wrote:
> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
> >> Working around Cortex-A53 erratum #843419 involves special handling of
> >> ADRP instructions that end up in the last two instruction slots of a
> >> 4k page, or whose output register gets overwritten without having been
> >> read. (Note that the latter instruction sequence is never emitted by
> >> a properly functioning compiler)
> >
> > Does the workaround currently implemented in the linker also make this
> > same assumption? If not, I'm a little wary that we're making an assumption
> > about compiler behaviour with no way to detect whether its been violated or
> > not.
> >
> 
> I can check, but I don't see how a compiler would ever choose 'adrp'
> when it emits what amounts to a nop instruction.

Agreed, but it wouldn't be the first time we've seen compilers do weird
things.

> >> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
> >> index ea640f92fe5a..93b808056cb4 100644
> >> --- a/arch/arm64/kernel/module-plts.c
> >> +++ b/arch/arm64/kernel/module-plts.c
> >> @@ -41,6 +41,46 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
> >>       return (u64)&plt[i];
> >>  }
> >>
> >> +#ifdef CONFIG_ARM64_ERRATUM_843419
> >> +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
> >> +{
> >> +     struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
> >> +                                                       &mod->arch.init;
> >> +     struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
> >> +     int i = pltsec->plt_num_entries++;
> >> +     u32 mov0, mov1, mov2, br;
> >> +     int rd;
> >> +
> >> +     BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
> >
> > I'd prefer just to fail loading the module, but I see we already have
> > a BUG_ON for the existing PLT code. Oh well.
> >
> 
> I can fix that in a followup patch

Thanks.

> >> +
> >> +     /* get the destination register of the ADRP instruction */
> >> +     rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
> >> +                                       le32_to_cpup((__le32 *)loc));
> >> +
> >> +     /* generate the veneer instructions */
> >> +     mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
> >> +                                      AARCH64_INSN_VARIANT_64BIT,
> >> +                                      AARCH64_INSN_MOVEWIDE_INVERSE);
> >> +     mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
> >> +                                      AARCH64_INSN_VARIANT_64BIT,
> >> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
> >> +     mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
> >> +                                      AARCH64_INSN_VARIANT_64BIT,
> >> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
> >> +     br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
> >> +                                      AARCH64_INSN_BRANCH_NOLINK);
> >> +
> >> +     plt[i] = (struct plt_entry){
> >> +                     cpu_to_le32(mov0),
> >> +                     cpu_to_le32(mov1),
> >> +                     cpu_to_le32(mov2),
> >> +                     cpu_to_le32(br)
> >> +             };
> >> +
> >> +     return (u64)&plt[i];
> >> +}
> >> +#endif
> >> +
> >>  #define cmp_3way(a,b)        ((a) < (b) ? -1 : (a) > (b))
> >>
> >>  static int cmp_rela(const void *a, const void *b)
> >> @@ -68,16 +108,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
> >>  }
> >>
> >>  static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
> >> -                            Elf64_Word dstidx)
> >> +                            Elf64_Word dstidx, Elf_Shdr *dstsec)
> >>  {
> >>       unsigned int ret = 0;
> >>       Elf64_Sym *s;
> >>       int i;
> >>
> >>       for (i = 0; i < num; i++) {
> >> +             u64 min_align;
> >> +
> >>               switch (ELF64_R_TYPE(rela[i].r_info)) {
> >>               case R_AARCH64_JUMP26:
> >>               case R_AARCH64_CALL26:
> >> +                     if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
> >> +                             break;
> >> +
> >>                       /*
> >>                        * We only have to consider branch targets that resolve
> >>                        * to symbols that are defined in a different section.
> >> @@ -109,6 +154,31 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
> >>                       if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
> >>                               ret++;
> >>                       break;
> >> +             case R_AARCH64_ADR_PREL_PG_HI21_NC:
> >> +             case R_AARCH64_ADR_PREL_PG_HI21:
> >> +                     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
> >> +                             break;
> >> +
> >> +                     /*
> >> +                      * Determine the minimal safe alignment for this ADRP
> >> +                      * instruction: the section alignment at which it is
> >> +                      * guaranteed not to appear at a vulnerable offset.
> >> +                      */
> >> +                     min_align = 2 << ffz(rela[i].r_offset | 0x7);
> >
> > I'm struggling to decipher this, can you give me a hint please? Why 0x7? Is
> > the "2" because of the two vulnerable offsets?
> >
> 
> What it basically does is preserve 0 bits in the address of the instruction.
> 
> Given that ADRP instructions at addresses ending in fff8 or fffc may
> be vulnerable, if any of the bits [11:3] are zero, we can increase the
> alignment of this section to ensure it remains zero, guaranteeing that
> the resulting address won't end in fff8 or fffc
> 
> Bits 0 .. 2 don't count, hence the 0x7. If bit 3 is zero, we can align
> to '1 << (3 + 1)' == '2 << 3', and bit 3 will remain zero.

Please put this in a comment ;)

Will
Ard Biesheuvel March 5, 2018, 5:41 p.m. UTC | #9
On 5 March 2018 at 17:34, Will Deacon <will.deacon@arm.com> wrote:
> On Mon, Mar 05, 2018 at 05:26:35PM +0000, Ard Biesheuvel wrote:
>> On 5 March 2018 at 17:18, Will Deacon <will.deacon@arm.com> wrote:
>> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
>> >> Working around Cortex-A53 erratum #843419 involves special handling of
>> >> ADRP instructions that end up in the last two instruction slots of a
>> >> 4k page, or whose output register gets overwritten without having been
>> >> read. (Note that the latter instruction sequence is never emitted by
>> >> a properly functioning compiler)
>> >
>> > Does the workaround currently implemented in the linker also make this
>> > same assumption? If not, I'm a little wary that we're making an assumption
>> > about compiler behaviour with no way to detect whether its been violated or
>> > not.
>> >
>>
>> I can check, but I don't see how a compiler would ever choose 'adrp'
>> when it emits what amounts to a nop instruction.
>
> Agreed, but it wouldn't be the first time we've seen compilers do weird
> things.
>

I just checked ld.bfd: it only checks for sequence 1, which is the one
that affects ADRP instructions at offsets 0xfff8/0xfffc modulo 4 KB.
It actually checks more elaborately whether the sequence is in fact
vulnerable rather than assuming any ADRP instruction at such an offset
may be vulnerable, but in my testing of the current patches, I only
get a handful of them anyway of which the majority are resolved by
patching ADRP->ADR.

Sequence 2 is described as follows in the docs:

"""
Sequence 2:
1) An ADRP instruction, which writes to a register Rn.
2) Another instruction which writes to Rn.
o This cannot be a branch or an ADRP.
o This cannot read Rn.
3)
4)
Another instruction.
o This cannot be a branch.
o This cannot write Rn.
o This may optionally read Rn.
A load or store instruction from the "Load/store register (unsigned
immediate)" encoding class, using Rn as the
base address register.
"""

and ld.bfd does not check for it at all.



>> >> diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
>> >> index ea640f92fe5a..93b808056cb4 100644
>> >> --- a/arch/arm64/kernel/module-plts.c
>> >> +++ b/arch/arm64/kernel/module-plts.c
>> >> @@ -41,6 +41,46 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
>> >>       return (u64)&plt[i];
>> >>  }
>> >>
>> >> +#ifdef CONFIG_ARM64_ERRATUM_843419
>> >> +u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
>> >> +{
>> >> +     struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
>> >> +                                                       &mod->arch.init;
>> >> +     struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
>> >> +     int i = pltsec->plt_num_entries++;
>> >> +     u32 mov0, mov1, mov2, br;
>> >> +     int rd;
>> >> +
>> >> +     BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
>> >
>> > I'd prefer just to fail loading the module, but I see we already have
>> > a BUG_ON for the existing PLT code. Oh well.
>> >
>>
>> I can fix that in a followup patch
>
> Thanks.
>
>> >> +
>> >> +     /* get the destination register of the ADRP instruction */
>> >> +     rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
>> >> +                                       le32_to_cpup((__le32 *)loc));
>> >> +
>> >> +     /* generate the veneer instructions */
>> >> +     mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
>> >> +                                      AARCH64_INSN_VARIANT_64BIT,
>> >> +                                      AARCH64_INSN_MOVEWIDE_INVERSE);
>> >> +     mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
>> >> +                                      AARCH64_INSN_VARIANT_64BIT,
>> >> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
>> >> +     mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
>> >> +                                      AARCH64_INSN_VARIANT_64BIT,
>> >> +                                      AARCH64_INSN_MOVEWIDE_KEEP);
>> >> +     br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
>> >> +                                      AARCH64_INSN_BRANCH_NOLINK);
>> >> +
>> >> +     plt[i] = (struct plt_entry){
>> >> +                     cpu_to_le32(mov0),
>> >> +                     cpu_to_le32(mov1),
>> >> +                     cpu_to_le32(mov2),
>> >> +                     cpu_to_le32(br)
>> >> +             };
>> >> +
>> >> +     return (u64)&plt[i];
>> >> +}
>> >> +#endif
>> >> +
>> >>  #define cmp_3way(a,b)        ((a) < (b) ? -1 : (a) > (b))
>> >>
>> >>  static int cmp_rela(const void *a, const void *b)
>> >> @@ -68,16 +108,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num)
>> >>  }
>> >>
>> >>  static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
>> >> -                            Elf64_Word dstidx)
>> >> +                            Elf64_Word dstidx, Elf_Shdr *dstsec)
>> >>  {
>> >>       unsigned int ret = 0;
>> >>       Elf64_Sym *s;
>> >>       int i;
>> >>
>> >>       for (i = 0; i < num; i++) {
>> >> +             u64 min_align;
>> >> +
>> >>               switch (ELF64_R_TYPE(rela[i].r_info)) {
>> >>               case R_AARCH64_JUMP26:
>> >>               case R_AARCH64_CALL26:
>> >> +                     if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
>> >> +                             break;
>> >> +
>> >>                       /*
>> >>                        * We only have to consider branch targets that resolve
>> >>                        * to symbols that are defined in a different section.
>> >> @@ -109,6 +154,31 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
>> >>                       if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
>> >>                               ret++;
>> >>                       break;
>> >> +             case R_AARCH64_ADR_PREL_PG_HI21_NC:
>> >> +             case R_AARCH64_ADR_PREL_PG_HI21:
>> >> +                     if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
>> >> +                             break;
>> >> +
>> >> +                     /*
>> >> +                      * Determine the minimal safe alignment for this ADRP
>> >> +                      * instruction: the section alignment at which it is
>> >> +                      * guaranteed not to appear at a vulnerable offset.
>> >> +                      */
>> >> +                     min_align = 2 << ffz(rela[i].r_offset | 0x7);
>> >
>> > I'm struggling to decipher this, can you give me a hint please? Why 0x7? Is
>> > the "2" because of the two vulnerable offsets?
>> >
>>
>> What it basically does is preserve 0 bits in the address of the instruction.
>>
>> Given that ADRP instructions at addresses ending in fff8 or fffc may
>> be vulnerable, if any of the bits [11:3] are zero, we can increase the
>> alignment of this section to ensure it remains zero, guaranteeing that
>> the resulting address won't end in fff8 or fffc
>>
>> Bits 0 .. 2 don't count, hence the 0x7. If bit 3 is zero, we can align
>> to '1 << (3 + 1)' == '2 << 3', and bit 3 will remain zero.
>
> Please put this in a comment ;)
>

OK
Will Deacon March 5, 2018, 5:42 p.m. UTC | #10
On Mon, Mar 05, 2018 at 05:41:17PM +0000, Ard Biesheuvel wrote:
> On 5 March 2018 at 17:34, Will Deacon <will.deacon@arm.com> wrote:
> > On Mon, Mar 05, 2018 at 05:26:35PM +0000, Ard Biesheuvel wrote:
> >> On 5 March 2018 at 17:18, Will Deacon <will.deacon@arm.com> wrote:
> >> > On Wed, Feb 14, 2018 at 11:36:44AM +0000, Ard Biesheuvel wrote:
> >> >> Working around Cortex-A53 erratum #843419 involves special handling of
> >> >> ADRP instructions that end up in the last two instruction slots of a
> >> >> 4k page, or whose output register gets overwritten without having been
> >> >> read. (Note that the latter instruction sequence is never emitted by
> >> >> a properly functioning compiler)
> >> >
> >> > Does the workaround currently implemented in the linker also make this
> >> > same assumption? If not, I'm a little wary that we're making an assumption
> >> > about compiler behaviour with no way to detect whether its been violated or
> >> > not.
> >> >
> >>
> >> I can check, but I don't see how a compiler would ever choose 'adrp'
> >> when it emits what amounts to a nop instruction.
> >
> > Agreed, but it wouldn't be the first time we've seen compilers do weird
> > things.
> >
> 
> I just checked ld.bfd: it only checks for sequence 1, which is the one
> that affects ADRP instructions at offsets 0xfff8/0xfffc modulo 4 KB.
> It actually checks more elaborately whether the sequence is in fact
> vulnerable rather than assuming any ADRP instruction at such an offset
> may be vulnerable, but in my testing of the current patches, I only
> get a handful of them anyway of which the majority are resolved by
> patching ADRP->ADR.
> 
> Sequence 2 is described as follows in the docs:
> 
> """
> Sequence 2:
> 1) An ADRP instruction, which writes to a register Rn.
> 2) Another instruction which writes to Rn.
> o This cannot be a branch or an ADRP.
> o This cannot read Rn.
> 3)
> 4)
> Another instruction.
> o This cannot be a branch.
> o This cannot write Rn.
> o This may optionally read Rn.
> A load or store instruction from the "Load/store register (unsigned
> immediate)" encoding class, using Rn as the
> base address register.
> """
> 
> and ld.bfd does not check for it at all.

In which case, the code you're proposing for modules is just as good as
what we're doing for the kernel text itself. Might be worth mentioning that
somewhere, but I think it's fine.

Thanks for looking,

Will
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ae7d3d4c0bbe..1b11285cba36 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -455,12 +455,12 @@  config ARM64_ERRATUM_845719
 config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
 	default y
-	select ARM64_MODULE_CMODEL_LARGE if MODULES
+	select ARM64_MODULE_PLTS if MODULES
 	help
 	  This option links the kernel with '--fix-cortex-a53-843419' and
-	  builds modules using the large memory model in order to avoid the use
-	  of the ADRP instruction, which can cause a subsequent memory access
-	  to use an incorrect address on Cortex-A53 parts up to r0p4.
+	  enables PLT support to replace certain ADRP instructions, which can
+	  cause subsequent memory accesses to use an incorrect address on
+	  Cortex-A53 parts up to r0p4.
 
 	  If unsure, say Y.
 
@@ -1104,9 +1104,6 @@  config ARM64_SVE
 
 	  To enable use of this extension on CPUs that implement it, say Y.
 
-config ARM64_MODULE_CMODEL_LARGE
-	bool
-
 config ARM64_MODULE_PLTS
 	bool
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b481b4a7c011..811ad08c531d 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,6 @@  endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
@@ -77,10 +76,6 @@  endif
 
 CHECKFLAGS	+= -D__aarch64__ -m64
 
-ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
-KBUILD_CFLAGS_MODULE	+= -mcmodel=large
-endif
-
 ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
 endif
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 4f766178fa6f..b6dbbe3123a9 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -39,6 +39,8 @@  struct mod_arch_specific {
 u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);
 
+u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val);
+
 #ifdef CONFIG_RANDOMIZE_BASE
 extern u64 module_alloc_base;
 #else
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index ea640f92fe5a..93b808056cb4 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -41,6 +41,46 @@  u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 	return (u64)&plt[i];
 }
 
+#ifdef CONFIG_ARM64_ERRATUM_843419
+u64 module_emit_adrp_veneer(struct module *mod, void *loc, u64 val)
+{
+	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+							  &mod->arch.init;
+	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+	int i = pltsec->plt_num_entries++;
+	u32 mov0, mov1, mov2, br;
+	int rd;
+
+	BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries);
+
+	/* get the destination register of the ADRP instruction */
+	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
+					  le32_to_cpup((__le32 *)loc));
+
+	/* generate the veneer instructions */
+	mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_INVERSE);
+	mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
+					 AARCH64_INSN_BRANCH_NOLINK);
+
+	plt[i] = (struct plt_entry){
+			cpu_to_le32(mov0),
+			cpu_to_le32(mov1),
+			cpu_to_le32(mov2),
+			cpu_to_le32(br)
+		};
+
+	return (u64)&plt[i];
+}
+#endif
+
 #define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
 
 static int cmp_rela(const void *a, const void *b)
@@ -68,16 +108,21 @@  static bool duplicate_rel(const Elf64_Rela *rela, int num)
 }
 
 static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
-			       Elf64_Word dstidx)
+			       Elf64_Word dstidx, Elf_Shdr *dstsec)
 {
 	unsigned int ret = 0;
 	Elf64_Sym *s;
 	int i;
 
 	for (i = 0; i < num; i++) {
+		u64 min_align;
+
 		switch (ELF64_R_TYPE(rela[i].r_info)) {
 		case R_AARCH64_JUMP26:
 		case R_AARCH64_CALL26:
+			if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+				break;
+
 			/*
 			 * We only have to consider branch targets that resolve
 			 * to symbols that are defined in a different section.
@@ -109,6 +154,31 @@  static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
 			if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
 				ret++;
 			break;
+		case R_AARCH64_ADR_PREL_PG_HI21_NC:
+		case R_AARCH64_ADR_PREL_PG_HI21:
+			if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
+				break;
+
+			/*
+			 * Determine the minimal safe alignment for this ADRP
+			 * instruction: the section alignment at which it is
+			 * guaranteed not to appear at a vulnerable offset.
+			 */
+			min_align = 2 << ffz(rela[i].r_offset | 0x7);
+
+			/*
+			 * Allocate veneer space for each ADRP that may appear
+			 * at a vulnerable offset nonetheless. At relocation
+			 * time, some of these will remain unused since some
+			 * ADRP instructions can be patched to ADR instructions
+			 * instead.
+			 */
+			if (min_align > SZ_4K)
+				ret++;
+			else
+				dstsec->sh_addralign = max(dstsec->sh_addralign,
+							   min_align);
+			break;
 		}
 	}
 	return ret;
@@ -166,10 +236,10 @@  int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 
 		if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
 			core_plts += count_plts(syms, rels, numrels,
-						sechdrs[i].sh_info);
+						sechdrs[i].sh_info, dstsec);
 		else
 			init_plts += count_plts(syms, rels, numrels,
-						sechdrs[i].sh_info);
+						sechdrs[i].sh_info, dstsec);
 	}
 
 	mod->arch.core.plt->sh_type = SHT_NOBITS;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 10c6ab9534e8..d1bbdffd1e6c 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -198,6 +198,32 @@  static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 	return 0;
 }
 
+static bool reloc_adrp_erratum_843419(struct module *mod, __le32 *place,
+				      u64 val)
+{
+	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419))
+		return false;
+
+	/* only ADRP instructions at the end of a 4k page are affected */
+	if (((u64)place & 0xfff) < 0xff8)
+		return false;
+
+	/* patch ADRP to ADR if it is in range */
+	if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21,
+			    AARCH64_INSN_IMM_ADR)) {
+		((u8 *)place)[3] &= 0x7f; /* clear opcode bit 31 */
+	} else {
+		u32 insn;
+
+		/* out of range for ADR -> emit a veneer */
+		val = module_emit_adrp_veneer(mod, place, val & ~0xfff);
+		insn = aarch64_insn_gen_branch_imm((u64)place, val,
+						   AARCH64_INSN_BRANCH_NOLINK);
+		*place = cpu_to_le32(insn);
+	}
+	return true;
+}
+
 int apply_relocate_add(Elf64_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
@@ -337,14 +363,16 @@  int apply_relocate_add(Elf64_Shdr *sechdrs,
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
 					     AARCH64_INSN_IMM_ADR);
 			break;
-#ifndef CONFIG_ARM64_ERRATUM_843419
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 		case R_AARCH64_ADR_PREL_PG_HI21:
+			ovf = 0;
+			if (reloc_adrp_erratum_843419(me, loc, val))
+				break;
+
 			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
 					     AARCH64_INSN_IMM_ADR);
 			break;
-#endif
 		case R_AARCH64_ADD_ABS_LO12_NC:
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index c124752a8bd3..a70489c584c7 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -28,6 +28,7 @@  asmlinkage u64 absolute_data16(void);
 asmlinkage u64 signed_movw(void);
 asmlinkage u64 unsigned_movw(void);
 asmlinkage u64 relative_adrp(void);
+asmlinkage u64 relative_adrp_far(void);
 asmlinkage u64 relative_adr(void);
 asmlinkage u64 relative_data64(void);
 asmlinkage u64 relative_data32(void);
@@ -43,9 +44,8 @@  static struct {
 	{ "R_AARCH64_ABS16",		absolute_data16, UL(SYM16_ABS_VAL) },
 	{ "R_AARCH64_MOVW_SABS_Gn",	signed_movw, UL(SYM64_ABS_VAL) },
 	{ "R_AARCH64_MOVW_UABS_Gn",	unsigned_movw, UL(SYM64_ABS_VAL) },
-#ifndef CONFIG_ARM64_ERRATUM_843419
 	{ "R_AARCH64_ADR_PREL_PG_HI21",	relative_adrp, (u64)&sym64_rel },
-#endif
+	{ "R_AARCH64_ADR_PREL_PG_HI21",	relative_adrp_far, (u64)&printk },
 	{ "R_AARCH64_ADR_PREL_LO21",	relative_adr, (u64)&sym64_rel },
 	{ "R_AARCH64_PREL64",		relative_data64, (u64)&sym64_rel },
 	{ "R_AARCH64_PREL32",		relative_data32, (u64)&sym64_rel },
diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S
index e1edcefeb02d..f333b4b7880d 100644
--- a/arch/arm64/kernel/reloc_test_syms.S
+++ b/arch/arm64/kernel/reloc_test_syms.S
@@ -43,15 +43,21 @@  ENTRY(unsigned_movw)
 	ret
 ENDPROC(unsigned_movw)
 
-#ifndef CONFIG_ARM64_ERRATUM_843419
-
+	.align	12
+	.space	0xff8
 ENTRY(relative_adrp)
 	adrp	x0, sym64_rel
 	add	x0, x0, #:lo12:sym64_rel
 	ret
 ENDPROC(relative_adrp)
 
-#endif
+	.align	12
+	.space	0xffc
+ENTRY(relative_adrp_far)
+	adrp	x0, printk
+	add	x0, x0, #:lo12:printk
+	ret
+ENDPROC(relative_adrp_far)
 
 ENTRY(relative_adr)
 	adr	x0, sym64_rel