diff mbox series

[v6,3/6] arm64: head: record the MMU state at primary entry

Message ID 20221129161418.1968319-4-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: Permit EFI boot with MMU and caches on | expand

Commit Message

Ard Biesheuvel Nov. 29, 2022, 4:14 p.m. UTC
Prepare for being able to deal with primary entry with the MMU and
caches enabled, by recording whether or not we entered with the MMU on
in register x19 and in a global variable. (Note that writing to this
variable does not require cache invalidation, nor is it required for
storing the bootargs in that case, so omit the cache maintenance).

Since boot with the MMU enabled is not permitted by the bare metal boot
protocol, ensure that a diagnostic is emitted and a taint bit set if
the MMU was found to be enabled on a non-EFI boot. We will make an
exception for EFI boot later, which has strict requirements for the
mapping of system memory, permitting us to relax the boot protocol and
hand over from the EFI stub to the core kernel with MMU and caches left
enabled. To reduce the likelihood that bare metal boot will violate this
requirement, introduce a separate entry point for EFI boot, which is
different from the one that is invoked by branching to offset #0 in the
image.

While at it, add 'pre_disable_mmu_workaround' macro invocations to
init_kernel_el, as its manipulation of SCTLR_ELx may amount to disabling
of the MMU after subsequent patches.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/kernel/head.S       | 28 ++++++++++++++++++--
 arch/arm64/kernel/image-vars.h |  2 +-
 arch/arm64/kernel/setup.c      |  9 +++++--
 3 files changed, 34 insertions(+), 5 deletions(-)

Comments

Ard Biesheuvel Dec. 1, 2022, 9:18 a.m. UTC | #1
On Tue, 29 Nov 2022 at 17:14, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> Prepare for being able to deal with primary entry with the MMU and
> caches enabled, by recording whether or not we entered with the MMU on
> in register x19 and in a global variable. (Note that writing to this
> variable does not require cache invalidation, nor is it required for
> storing the bootargs in that case, so omit the cache maintenance).
>
> Since boot with the MMU enabled is not permitted by the bare metal boot
> protocol, ensure that a diagnostic is emitted and a taint bit set if
> the MMU was found to be enabled on a non-EFI boot. We will make an
> exception for EFI boot later, which has strict requirements for the
> mapping of system memory, permitting us to relax the boot protocol and
> hand over from the EFI stub to the core kernel with MMU and caches left
> enabled. To reduce the likelihood that bare metal boot will violate this
> requirement, introduce a separate entry point for EFI boot, which is
> different from the one that is invoked by branching to offset #0 in the
> image.
>

This is actually much more fiddly than I though:
- the existing arm64 stub can link to any symbol so there, it does not
make any difference
- the zboot stub needs to find the right entry point into the image,
as the one pointed to by the header is no longer correct.

I confused myself into thinking that we can just grab it from the
PE/COFF header, but that is the header point *into* the EFI stub, not
the one that the stub calls on its way out.

Grabbing stuff from the PE/COFF header is needed anyway, and can be
done in a generic manner.
Grabbing a special EFI-only entry point means mangling the ELF to find
the relative offset from the start of the binary representation of the
image, and passing that information along with the compressed blob to
the decompressor.

So the options are:
- go back to the old approach (and add a panic() in the right place)
- add a branch to the EFI entrypoint at a known offset (in the image header?)
- other ideas?


> While at it, add 'pre_disable_mmu_workaround' macro invocations to
> init_kernel_el, as its manipulation of SCTLR_ELx may amount to disabling
> of the MMU after subsequent patches.
>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/kernel/head.S       | 28 ++++++++++++++++++--
>  arch/arm64/kernel/image-vars.h |  2 +-
>  arch/arm64/kernel/setup.c      |  9 +++++--
>  3 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index bec97aad092c2b43..c3b97f4ae6d769f7 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -77,6 +77,7 @@
>          * primary lowlevel boot path:
>          *
>          *  Register   Scope                      Purpose
> +        *  x19        primary_entry() .. start_kernel()        whether we entered with the MMU on
>          *  x20        primary_entry() .. __primary_switch()    CPU boot mode
>          *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
>          *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
> @@ -85,8 +86,13 @@
>          *  x25        primary_entry() .. start_kernel()        supported VA size
>          *  x28        create_idmap()                           callee preserved temp register
>          */
> -SYM_CODE_START(primary_entry)
> -       bl      preserve_boot_args
> +SYM_CODE_START(efi_primary_entry)
> +       bl      record_mmu_state
> +       b       0f
> +
> +SYM_INNER_LABEL(primary_entry, SYM_L_LOCAL)
> +       mov     x19, xzr                        // MMU must be off on bare metal boot
> +0:     bl      preserve_boot_args
>         bl      init_kernel_el                  // w0=cpu_boot_mode
>         mov     x20, x0
>         bl      create_idmap
> @@ -109,6 +115,18 @@ SYM_CODE_START(primary_entry)
>         b       __primary_switch
>  SYM_CODE_END(primary_entry)
>
> +SYM_CODE_START_LOCAL(record_mmu_state)
> +       mrs     x19, CurrentEL
> +       cmp     x19, #CurrentEL_EL2
> +       mrs     x19, sctlr_el1
> +       b.ne    0f
> +       mrs     x19, sctlr_el2
> +0:     tst     x19, #SCTLR_ELx_C               // Z := (C == 0)
> +       and     x19, x19, #SCTLR_ELx_M          // isolate M bit
> +       csel    x19, xzr, x19, eq               // clear x19 if Z
> +       ret
> +SYM_CODE_END(record_mmu_state)
> +
>  /*
>   * Preserve the arguments passed by the bootloader in x0 .. x3
>   */
> @@ -119,11 +137,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
>         stp     x21, x1, [x0]                   // x0 .. x3 at kernel entry
>         stp     x2, x3, [x0, #16]
>
> +       cbnz    x19, 0f                         // skip cache invalidation if MMU is on
>         dmb     sy                              // needed before dc ivac with
>                                                 // MMU off
>
>         add     x1, x0, #0x20                   // 4 x 8 bytes
>         b       dcache_inval_poc                // tail call
> +0:     str_l   x19, mmu_enabled_at_boot, x0
> +       ret
>  SYM_CODE_END(preserve_boot_args)
>
>  SYM_FUNC_START_LOCAL(clear_page_tables)
> @@ -497,6 +518,7 @@ SYM_FUNC_START(init_kernel_el)
>
>  SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
>         mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> +       pre_disable_mmu_workaround
>         msr     sctlr_el1, x0
>         isb
>         mov_q   x0, INIT_PSTATE_EL1
> @@ -529,11 +551,13 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
>         cbz     x0, 1f
>
>         /* Set a sane SCTLR_EL1, the VHE way */
> +       pre_disable_mmu_workaround
>         msr_s   SYS_SCTLR_EL12, x1
>         mov     x2, #BOOT_CPU_FLAG_E2H
>         b       2f
>
>  1:
> +       pre_disable_mmu_workaround
>         msr     sctlr_el1, x1
>         mov     x2, xzr
>  2:
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 8151412653de209c..b925094692983734 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -11,7 +11,7 @@
>  #endif
>
>  PROVIDE(__efistub_kernel_size          = _edata - _text);
> -PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
> +PROVIDE(__efistub_primary_entry_offset = efi_primary_entry - _text);
>
>  /*
>   * The EFI stub has its own symbol namespace prefixed by __efistub_, to
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 12cfe9d0d3fac10d..0ac8605a8efe00a5 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -58,6 +58,7 @@ static int num_standard_resources;
>  static struct resource *standard_resources;
>
>  phys_addr_t __fdt_pointer __initdata;
> +u64 mmu_enabled_at_boot __initdata;
>
>  /*
>   * Standard memory resources
> @@ -332,8 +333,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
>         xen_early_init();
>         efi_init();
>
> -       if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
> -            pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
> +       if (!efi_enabled(EFI_BOOT)) {
> +               if ((u64)_text % MIN_KIMG_ALIGN)
> +                       pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
> +               WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND,
> +                          FW_BUG "Booted with MMU enabled!");
> +       }
>
>         arm64_memblock_init();
>
> --
> 2.35.1
>
Will Deacon Dec. 1, 2022, 5:51 p.m. UTC | #2
On Thu, Dec 01, 2022 at 10:18:50AM +0100, Ard Biesheuvel wrote:
> On Tue, 29 Nov 2022 at 17:14, Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > Prepare for being able to deal with primary entry with the MMU and
> > caches enabled, by recording whether or not we entered with the MMU on
> > in register x19 and in a global variable. (Note that writing to this
> > variable does not require cache invalidation, nor is it required for
> > storing the bootargs in that case, so omit the cache maintenance).
> >
> > Since boot with the MMU enabled is not permitted by the bare metal boot
> > protocol, ensure that a diagnostic is emitted and a taint bit set if
> > the MMU was found to be enabled on a non-EFI boot. We will make an
> > exception for EFI boot later, which has strict requirements for the
> > mapping of system memory, permitting us to relax the boot protocol and
> > hand over from the EFI stub to the core kernel with MMU and caches left
> > enabled. To reduce the likelihood that bare metal boot will violate this
> > requirement, introduce a separate entry point for EFI boot, which is
> > different from the one that is invoked by branching to offset #0 in the
> > image.
> >
> 
> This is actually much more fiddly than I though:
> - the existing arm64 stub can link to any symbol so there, it does not
> make any difference
> - the zboot stub needs to find the right entry point into the image,
> as the one pointed to by the header is no longer correct.
> 
> I confused myself into thinking that we can just grab it from the
> PE/COFF header, but that is the header point *into* the EFI stub, not
> the one that the stub calls on its way out.
> 
> Grabbing stuff from the PE/COFF header is needed anyway, and can be
> done in a generic manner.
> Grabbing a special EFI-only entry point means mangling the ELF to find
> the relative offset from the start of the binary representation of the
> image, and passing that information along with the compressed blob to
> the decompressor.
> 
> So the options are:
> - go back to the old approach (and add a panic() in the right place)

I'm ok with that, but I'd like an Ack from Mark as well.

Cheers,

Will
diff mbox series

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index bec97aad092c2b43..c3b97f4ae6d769f7 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -77,6 +77,7 @@ 
 	 * primary lowlevel boot path:
 	 *
 	 *  Register   Scope                      Purpose
+	 *  x19        primary_entry() .. start_kernel()        whether we entered with the MMU on
 	 *  x20        primary_entry() .. __primary_switch()    CPU boot mode
 	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
 	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
@@ -85,8 +86,13 @@ 
 	 *  x25        primary_entry() .. start_kernel()        supported VA size
 	 *  x28        create_idmap()                           callee preserved temp register
 	 */
-SYM_CODE_START(primary_entry)
-	bl	preserve_boot_args
+SYM_CODE_START(efi_primary_entry)
+	bl	record_mmu_state
+	b	0f
+
+SYM_INNER_LABEL(primary_entry, SYM_L_LOCAL)
+	mov	x19, xzr			// MMU must be off on bare metal boot
+0:	bl	preserve_boot_args
 	bl	init_kernel_el			// w0=cpu_boot_mode
 	mov	x20, x0
 	bl	create_idmap
@@ -109,6 +115,18 @@  SYM_CODE_START(primary_entry)
 	b	__primary_switch
 SYM_CODE_END(primary_entry)
 
+SYM_CODE_START_LOCAL(record_mmu_state)
+	mrs	x19, CurrentEL
+	cmp	x19, #CurrentEL_EL2
+	mrs	x19, sctlr_el1
+	b.ne	0f
+	mrs	x19, sctlr_el2
+0:	tst	x19, #SCTLR_ELx_C		// Z := (C == 0)
+	and	x19, x19, #SCTLR_ELx_M		// isolate M bit
+	csel	x19, xzr, x19, eq		// clear x19 if Z
+	ret
+SYM_CODE_END(record_mmu_state)
+
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
  */
@@ -119,11 +137,14 @@  SYM_CODE_START_LOCAL(preserve_boot_args)
 	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
 	stp	x2, x3, [x0, #16]
 
+	cbnz	x19, 0f				// skip cache invalidation if MMU is on
 	dmb	sy				// needed before dc ivac with
 						// MMU off
 
 	add	x1, x0, #0x20			// 4 x 8 bytes
 	b	dcache_inval_poc		// tail call
+0:	str_l   x19, mmu_enabled_at_boot, x0
+	ret
 SYM_CODE_END(preserve_boot_args)
 
 SYM_FUNC_START_LOCAL(clear_page_tables)
@@ -497,6 +518,7 @@  SYM_FUNC_START(init_kernel_el)
 
 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
 	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	pre_disable_mmu_workaround
 	msr	sctlr_el1, x0
 	isb
 	mov_q	x0, INIT_PSTATE_EL1
@@ -529,11 +551,13 @@  SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	cbz	x0, 1f
 
 	/* Set a sane SCTLR_EL1, the VHE way */
+	pre_disable_mmu_workaround
 	msr_s	SYS_SCTLR_EL12, x1
 	mov	x2, #BOOT_CPU_FLAG_E2H
 	b	2f
 
 1:
+	pre_disable_mmu_workaround
 	msr	sctlr_el1, x1
 	mov	x2, xzr
 2:
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8151412653de209c..b925094692983734 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -11,7 +11,7 @@ 
 #endif
 
 PROVIDE(__efistub_kernel_size		= _edata - _text);
-PROVIDE(__efistub_primary_entry_offset	= primary_entry - _text);
+PROVIDE(__efistub_primary_entry_offset	= efi_primary_entry - _text);
 
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 12cfe9d0d3fac10d..0ac8605a8efe00a5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -58,6 +58,7 @@  static int num_standard_resources;
 static struct resource *standard_resources;
 
 phys_addr_t __fdt_pointer __initdata;
+u64 mmu_enabled_at_boot __initdata;
 
 /*
  * Standard memory resources
@@ -332,8 +333,12 @@  void __init __no_sanitize_address setup_arch(char **cmdline_p)
 	xen_early_init();
 	efi_init();
 
-	if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
-	     pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+	if (!efi_enabled(EFI_BOOT)) {
+		if ((u64)_text % MIN_KIMG_ALIGN)
+			pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
+		WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND,
+			   FW_BUG "Booted with MMU enabled!");
+	}
 
 	arm64_memblock_init();