Message ID | 20221129161418.1968319-4-ardb@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Permit EFI boot with MMU and caches on | expand |
On Tue, 29 Nov 2022 at 17:14, Ard Biesheuvel <ardb@kernel.org> wrote: > > Prepare for being able to deal with primary entry with the MMU and > caches enabled, by recording whether or not we entered with the MMU on > in register x19 and in a global variable. (Note that writing to this > variable does not require cache invalidation, nor is it required for > storing the bootargs in that case, so omit the cache maintenance). > > Since boot with the MMU enabled is not permitted by the bare metal boot > protocol, ensure that a diagnostic is emitted and a taint bit set if > the MMU was found to be enabled on a non-EFI boot. We will make an > exception for EFI boot later, which has strict requirements for the > mapping of system memory, permitting us to relax the boot protocol and > hand over from the EFI stub to the core kernel with MMU and caches left > enabled. To reduce the likelihood that bare metal boot will violate this > requirement, introduce a separate entry point for EFI boot, which is > different from the one that is invoked by branching to offset #0 in the > image. > This is actually much more fiddly than I though: - the existing arm64 stub can link to any symbol so there, it does not make any difference - the zboot stub needs to find the right entry point into the image, as the one pointed to by the header is no longer correct. I confused myself into thinking that we can just grab it from the PE/COFF header, but that is the header point *into* the EFI stub, not the one that the stub calls on its way out. Grabbing stuff from the PE/COFF header is needed anyway, and can be done in a generic manner. Grabbing a special EFI-only entry point means mangling the ELF to find the relative offset from the start of the binary representation of the image, and passing that information along with the compressed blob to the decompressor. So the options are: - go back to the old approach (and add a panic() in the right place) - add a branch to the EFI entrypoint at a known offset (in the image header?) - other ideas? > While at it, add 'pre_disable_mmu_workaround' macro invocations to > init_kernel_el, as its manipulation of SCTLR_ELx may amount to disabling > of the MMU after subsequent patches. > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org> > --- > arch/arm64/kernel/head.S | 28 ++++++++++++++++++-- > arch/arm64/kernel/image-vars.h | 2 +- > arch/arm64/kernel/setup.c | 9 +++++-- > 3 files changed, 34 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index bec97aad092c2b43..c3b97f4ae6d769f7 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -77,6 +77,7 @@ > * primary lowlevel boot path: > * > * Register Scope Purpose > + * x19 primary_entry() .. start_kernel() whether we entered with the MMU on > * x20 primary_entry() .. __primary_switch() CPU boot mode > * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 > * x22 create_idmap() .. start_kernel() ID map VA of the DT blob > @@ -85,8 +86,13 @@ > * x25 primary_entry() .. start_kernel() supported VA size > * x28 create_idmap() callee preserved temp register > */ > -SYM_CODE_START(primary_entry) > - bl preserve_boot_args > +SYM_CODE_START(efi_primary_entry) > + bl record_mmu_state > + b 0f > + > +SYM_INNER_LABEL(primary_entry, SYM_L_LOCAL) > + mov x19, xzr // MMU must be off on bare metal boot > +0: bl preserve_boot_args > bl init_kernel_el // w0=cpu_boot_mode > mov x20, x0 > bl create_idmap > @@ -109,6 +115,18 @@ SYM_CODE_START(primary_entry) > b __primary_switch > SYM_CODE_END(primary_entry) > > +SYM_CODE_START_LOCAL(record_mmu_state) > + mrs x19, CurrentEL > + cmp x19, #CurrentEL_EL2 > + mrs x19, sctlr_el1 > + b.ne 0f > + mrs x19, sctlr_el2 > +0: tst x19, #SCTLR_ELx_C // Z := (C == 0) > + and x19, x19, #SCTLR_ELx_M // isolate M bit > + csel x19, xzr, x19, eq // clear x19 if Z > + ret > +SYM_CODE_END(record_mmu_state) > + > /* > * Preserve the arguments passed by the bootloader in x0 .. x3 > */ > @@ -119,11 +137,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args) > stp x21, x1, [x0] // x0 .. x3 at kernel entry > stp x2, x3, [x0, #16] > > + cbnz x19, 0f // skip cache invalidation if MMU is on > dmb sy // needed before dc ivac with > // MMU off > > add x1, x0, #0x20 // 4 x 8 bytes > b dcache_inval_poc // tail call > +0: str_l x19, mmu_enabled_at_boot, x0 > + ret > SYM_CODE_END(preserve_boot_args) > > SYM_FUNC_START_LOCAL(clear_page_tables) > @@ -497,6 +518,7 @@ SYM_FUNC_START(init_kernel_el) > > SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) > mov_q x0, INIT_SCTLR_EL1_MMU_OFF > + pre_disable_mmu_workaround > msr sctlr_el1, x0 > isb > mov_q x0, INIT_PSTATE_EL1 > @@ -529,11 +551,13 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) > cbz x0, 1f > > /* Set a sane SCTLR_EL1, the VHE way */ > + pre_disable_mmu_workaround > msr_s SYS_SCTLR_EL12, x1 > mov x2, #BOOT_CPU_FLAG_E2H > b 2f > > 1: > + pre_disable_mmu_workaround > msr sctlr_el1, x1 > mov x2, xzr > 2: > diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h > index 8151412653de209c..b925094692983734 100644 > --- a/arch/arm64/kernel/image-vars.h > +++ b/arch/arm64/kernel/image-vars.h > @@ -11,7 +11,7 @@ > #endif > > PROVIDE(__efistub_kernel_size = _edata - _text); > -PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); > +PROVIDE(__efistub_primary_entry_offset = efi_primary_entry - _text); > > /* > * The EFI stub has its own symbol namespace prefixed by __efistub_, to > diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c > index 12cfe9d0d3fac10d..0ac8605a8efe00a5 100644 > --- a/arch/arm64/kernel/setup.c > +++ b/arch/arm64/kernel/setup.c > @@ -58,6 +58,7 @@ static int num_standard_resources; > static struct resource *standard_resources; > > phys_addr_t __fdt_pointer __initdata; > +u64 mmu_enabled_at_boot __initdata; > > /* > * Standard memory resources > @@ -332,8 +333,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) > xen_early_init(); > efi_init(); > > - if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0) > - pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); > + if (!efi_enabled(EFI_BOOT)) { > + if ((u64)_text % MIN_KIMG_ALIGN) > + pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); > + WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND, > + FW_BUG "Booted with MMU enabled!"); > + } > > arm64_memblock_init(); > > -- > 2.35.1 >
On Thu, Dec 01, 2022 at 10:18:50AM +0100, Ard Biesheuvel wrote: > On Tue, 29 Nov 2022 at 17:14, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > Prepare for being able to deal with primary entry with the MMU and > > caches enabled, by recording whether or not we entered with the MMU on > > in register x19 and in a global variable. (Note that writing to this > > variable does not require cache invalidation, nor is it required for > > storing the bootargs in that case, so omit the cache maintenance). > > > > Since boot with the MMU enabled is not permitted by the bare metal boot > > protocol, ensure that a diagnostic is emitted and a taint bit set if > > the MMU was found to be enabled on a non-EFI boot. We will make an > > exception for EFI boot later, which has strict requirements for the > > mapping of system memory, permitting us to relax the boot protocol and > > hand over from the EFI stub to the core kernel with MMU and caches left > > enabled. To reduce the likelihood that bare metal boot will violate this > > requirement, introduce a separate entry point for EFI boot, which is > > different from the one that is invoked by branching to offset #0 in the > > image. > > > > This is actually much more fiddly than I though: > - the existing arm64 stub can link to any symbol so there, it does not > make any difference > - the zboot stub needs to find the right entry point into the image, > as the one pointed to by the header is no longer correct. > > I confused myself into thinking that we can just grab it from the > PE/COFF header, but that is the header point *into* the EFI stub, not > the one that the stub calls on its way out. > > Grabbing stuff from the PE/COFF header is needed anyway, and can be > done in a generic manner. > Grabbing a special EFI-only entry point means mangling the ELF to find > the relative offset from the start of the binary representation of the > image, and passing that information along with the compressed blob to > the decompressor. > > So the options are: > - go back to the old approach (and add a panic() in the right place) I'm ok with that, but I'd like an Ack from Mark as well. Cheers, Will
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index bec97aad092c2b43..c3b97f4ae6d769f7 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -77,6 +77,7 @@ * primary lowlevel boot path: * * Register Scope Purpose + * x19 primary_entry() .. start_kernel() whether we entered with the MMU on * x20 primary_entry() .. __primary_switch() CPU boot mode * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 * x22 create_idmap() .. start_kernel() ID map VA of the DT blob @@ -85,8 +86,13 @@ * x25 primary_entry() .. start_kernel() supported VA size * x28 create_idmap() callee preserved temp register */ -SYM_CODE_START(primary_entry) - bl preserve_boot_args +SYM_CODE_START(efi_primary_entry) + bl record_mmu_state + b 0f + +SYM_INNER_LABEL(primary_entry, SYM_L_LOCAL) + mov x19, xzr // MMU must be off on bare metal boot +0: bl preserve_boot_args bl init_kernel_el // w0=cpu_boot_mode mov x20, x0 bl create_idmap @@ -109,6 +115,18 @@ SYM_CODE_START(primary_entry) b __primary_switch SYM_CODE_END(primary_entry) +SYM_CODE_START_LOCAL(record_mmu_state) + mrs x19, CurrentEL + cmp x19, #CurrentEL_EL2 + mrs x19, sctlr_el1 + b.ne 0f + mrs x19, sctlr_el2 +0: tst x19, #SCTLR_ELx_C // Z := (C == 0) + and x19, x19, #SCTLR_ELx_M // isolate M bit + csel x19, xzr, x19, eq // clear x19 if Z + ret +SYM_CODE_END(record_mmu_state) + /* * Preserve the arguments passed by the bootloader in x0 .. x3 */ @@ -119,11 +137,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args) stp x21, x1, [x0] // x0 .. x3 at kernel entry stp x2, x3, [x0, #16] + cbnz x19, 0f // skip cache invalidation if MMU is on dmb sy // needed before dc ivac with // MMU off add x1, x0, #0x20 // 4 x 8 bytes b dcache_inval_poc // tail call +0: str_l x19, mmu_enabled_at_boot, x0 + ret SYM_CODE_END(preserve_boot_args) SYM_FUNC_START_LOCAL(clear_page_tables) @@ -497,6 +518,7 @@ SYM_FUNC_START(init_kernel_el) SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) mov_q x0, INIT_SCTLR_EL1_MMU_OFF + pre_disable_mmu_workaround msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 @@ -529,11 +551,13 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) cbz x0, 1f /* Set a sane SCTLR_EL1, the VHE way */ + pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 mov x2, #BOOT_CPU_FLAG_E2H b 2f 1: + pre_disable_mmu_workaround msr sctlr_el1, x1 mov x2, xzr 2: diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 8151412653de209c..b925094692983734 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -11,7 +11,7 @@ #endif PROVIDE(__efistub_kernel_size = _edata - _text); -PROVIDE(__efistub_primary_entry_offset = primary_entry - _text); +PROVIDE(__efistub_primary_entry_offset = efi_primary_entry - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 12cfe9d0d3fac10d..0ac8605a8efe00a5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -58,6 +58,7 @@ static int num_standard_resources; static struct resource *standard_resources; phys_addr_t __fdt_pointer __initdata; +u64 mmu_enabled_at_boot __initdata; /* * Standard memory resources @@ -332,8 +333,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) xen_early_init(); efi_init(); - if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0) - pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); + if (!efi_enabled(EFI_BOOT)) { + if ((u64)_text % MIN_KIMG_ALIGN) + pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!"); + WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND, + FW_BUG "Booted with MMU enabled!"); + } arm64_memblock_init();
Prepare for being able to deal with primary entry with the MMU and caches enabled, by recording whether or not we entered with the MMU on in register x19 and in a global variable. (Note that writing to this variable does not require cache invalidation, nor is it required for storing the bootargs in that case, so omit the cache maintenance). Since boot with the MMU enabled is not permitted by the bare metal boot protocol, ensure that a diagnostic is emitted and a taint bit set if the MMU was found to be enabled on a non-EFI boot. We will make an exception for EFI boot later, which has strict requirements for the mapping of system memory, permitting us to relax the boot protocol and hand over from the EFI stub to the core kernel with MMU and caches left enabled. To reduce the likelihood that bare metal boot will violate this requirement, introduce a separate entry point for EFI boot, which is different from the one that is invoked by branching to offset #0 in the image. While at it, add 'pre_disable_mmu_workaround' macro invocations to init_kernel_el, as its manipulation of SCTLR_ELx may amount to disabling of the MMU after subsequent patches. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/kernel/head.S | 28 ++++++++++++++++++-- arch/arm64/kernel/image-vars.h | 2 +- arch/arm64/kernel/setup.c | 9 +++++-- 3 files changed, 34 insertions(+), 5 deletions(-)