diff mbox series

[v4,17/26] arm64: head: populate kernel page tables with MMU and caches on

Message ID 20220613144550.3760857-18-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: refactor boot flow and add support for WXN | expand

Commit Message

Ard Biesheuvel June 13, 2022, 2:45 p.m. UTC
Now that we can access the entire kernel image via the ID map, we can
execute the page table population code with the MMU and caches enabled.
The only thing we need to ensure is that translations via TTBR1 remain
disabled while we are updating the page tables the second time around,
in case KASLR wants them to be randomized.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/kernel/head.S | 62 +++++---------------
 1 file changed, 16 insertions(+), 46 deletions(-)

Comments

Will Deacon June 24, 2022, 12:56 p.m. UTC | #1
On Mon, Jun 13, 2022 at 04:45:41PM +0200, Ard Biesheuvel wrote:
> Now that we can access the entire kernel image via the ID map, we can
> execute the page table population code with the MMU and caches enabled.
> The only thing we need to ensure is that translations via TTBR1 remain
> disabled while we are updating the page tables the second time around,
> in case KASLR wants them to be randomized.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/arm64/kernel/head.S | 62 +++++---------------
>  1 file changed, 16 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index d704d0bd8ffc..583cbea865e1 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -85,8 +85,6 @@
>  	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
>  	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
>  	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
> -	 *  x28        clear_page_tables()                      callee preserved temp register
> -	 *  x19/x20    __primary_switch()                       callee preserved temp registers
>  	 *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
>  	 *  x28        create_idmap()                           callee preserved temp register
>  	 */
> @@ -96,9 +94,7 @@ SYM_CODE_START(primary_entry)
>  	adrp	x23, __PHYS_OFFSET
>  	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
>  	bl	set_cpu_boot_mode_flag
> -	bl	clear_page_tables
>  	bl	create_idmap
> -	bl	create_kernel_mapping
>  
>  	/*
>  	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
> @@ -128,32 +124,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
>  SYM_CODE_END(preserve_boot_args)
>  
>  SYM_FUNC_START_LOCAL(clear_page_tables)
> -	mov	x28, lr
> -
> -	/*
> -	 * Invalidate the init page tables to avoid potential dirty cache lines
> -	 * being evicted. Other page tables are allocated in rodata as part of
> -	 * the kernel image, and thus are clean to the PoC per the boot
> -	 * protocol.
> -	 */
> -	adrp	x0, init_pg_dir
> -	adrp	x1, init_pg_end
> -	bl	dcache_inval_poc
> -
>  	/*
>  	 * Clear the init page tables.
>  	 */
>  	adrp	x0, init_pg_dir
>  	adrp	x1, init_pg_end
> -	sub	x1, x1, x0
> -1:	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	stp	xzr, xzr, [x0], #16
> -	subs	x1, x1, #64
> -	b.ne	1b
> -
> -	ret	x28
> +	sub	x2, x1, x0
> +	mov	x1, xzr
> +	b	__pi_memset			// tail call
>  SYM_FUNC_END(clear_page_tables)
>  
>  /*
> @@ -399,16 +377,8 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping)
>  
>  	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
>  
> -	/*
> -	 * Since the page tables have been populated with non-cacheable
> -	 * accesses (MMU disabled), invalidate those tables again to
> -	 * remove any speculatively loaded cache lines.
> -	 */
> -	dmb	sy
> -
> -	adrp	x0, init_pg_dir
> -	adrp	x1, init_pg_end
> -	b	dcache_inval_poc		// tail call
> +	dsb	ishst				// sync with page table walker
> +	ret
>  SYM_FUNC_END(create_kernel_mapping)
>  
>  	/*
> @@ -863,14 +833,15 @@ SYM_FUNC_END(__relocate_kernel)
>  #endif
>  
>  SYM_FUNC_START_LOCAL(__primary_switch)
> -#ifdef CONFIG_RANDOMIZE_BASE
> -	mov	x19, x0				// preserve new SCTLR_EL1 value
> -	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
> -#endif
> -
> -	adrp	x1, init_pg_dir
> +	adrp	x1, reserved_pg_dir
>  	adrp	x2, init_idmap_pg_dir
>  	bl	__enable_mmu
> +
> +	bl	clear_page_tables
> +	bl	create_kernel_mapping
> +
> +	adrp	x1, init_pg_dir
> +	load_ttbr1 x1, x1, x2
>  #ifdef CONFIG_RELOCATABLE
>  #ifdef CONFIG_RELR
>  	mov	x24, #0				// no RELR displacement yet
> @@ -886,9 +857,8 @@ SYM_FUNC_START_LOCAL(__primary_switch)
>  	 * to take into account by discarding the current kernel mapping and
>  	 * creating a new one.
>  	 */
> -	pre_disable_mmu_workaround
> -	msr	sctlr_el1, x20			// disable the MMU
> -	isb
> +	adrp	x1, reserved_pg_dir		// Disable translations via TTBR1
> +	load_ttbr1 x1, x1, x2

I'd have thought we'd need some TLB maintenance here... is that not the
case?

Also, it might be a tiny bit easier to clear EPD1 instead of using the
reserved_pg_dir.

Will
Ard Biesheuvel June 24, 2022, 1:07 p.m. UTC | #2
On Fri, 24 Jun 2022 at 14:56, Will Deacon <will@kernel.org> wrote:
>
> On Mon, Jun 13, 2022 at 04:45:41PM +0200, Ard Biesheuvel wrote:
> > Now that we can access the entire kernel image via the ID map, we can
> > execute the page table population code with the MMU and caches enabled.
> > The only thing we need to ensure is that translations via TTBR1 remain
> > disabled while we are updating the page tables the second time around,
> > in case KASLR wants them to be randomized.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >  arch/arm64/kernel/head.S | 62 +++++---------------
> >  1 file changed, 16 insertions(+), 46 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index d704d0bd8ffc..583cbea865e1 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -85,8 +85,6 @@
> >        *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
> >        *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
> >        *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
> > -      *  x28        clear_page_tables()                      callee preserved temp register
> > -      *  x19/x20    __primary_switch()                       callee preserved temp registers
> >        *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
> >        *  x28        create_idmap()                           callee preserved temp register
> >        */
> > @@ -96,9 +94,7 @@ SYM_CODE_START(primary_entry)
> >       adrp    x23, __PHYS_OFFSET
> >       and     x23, x23, MIN_KIMG_ALIGN - 1    // KASLR offset, defaults to 0
> >       bl      set_cpu_boot_mode_flag
> > -     bl      clear_page_tables
> >       bl      create_idmap
> > -     bl      create_kernel_mapping
> >
> >       /*
> >        * The following calls CPU setup code, see arch/arm64/mm/proc.S for
> > @@ -128,32 +124,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
> >  SYM_CODE_END(preserve_boot_args)
> >
> >  SYM_FUNC_START_LOCAL(clear_page_tables)
> > -     mov     x28, lr
> > -
> > -     /*
> > -      * Invalidate the init page tables to avoid potential dirty cache lines
> > -      * being evicted. Other page tables are allocated in rodata as part of
> > -      * the kernel image, and thus are clean to the PoC per the boot
> > -      * protocol.
> > -      */
> > -     adrp    x0, init_pg_dir
> > -     adrp    x1, init_pg_end
> > -     bl      dcache_inval_poc
> > -
> >       /*
> >        * Clear the init page tables.
> >        */
> >       adrp    x0, init_pg_dir
> >       adrp    x1, init_pg_end
> > -     sub     x1, x1, x0
> > -1:   stp     xzr, xzr, [x0], #16
> > -     stp     xzr, xzr, [x0], #16
> > -     stp     xzr, xzr, [x0], #16
> > -     stp     xzr, xzr, [x0], #16
> > -     subs    x1, x1, #64
> > -     b.ne    1b
> > -
> > -     ret     x28
> > +     sub     x2, x1, x0
> > +     mov     x1, xzr
> > +     b       __pi_memset                     // tail call
> >  SYM_FUNC_END(clear_page_tables)
> >
> >  /*
> > @@ -399,16 +377,8 @@ SYM_FUNC_START_LOCAL(create_kernel_mapping)
> >
> >       map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
> >
> > -     /*
> > -      * Since the page tables have been populated with non-cacheable
> > -      * accesses (MMU disabled), invalidate those tables again to
> > -      * remove any speculatively loaded cache lines.
> > -      */
> > -     dmb     sy
> > -
> > -     adrp    x0, init_pg_dir
> > -     adrp    x1, init_pg_end
> > -     b       dcache_inval_poc                // tail call
> > +     dsb     ishst                           // sync with page table walker
> > +     ret
> >  SYM_FUNC_END(create_kernel_mapping)
> >
> >       /*
> > @@ -863,14 +833,15 @@ SYM_FUNC_END(__relocate_kernel)
> >  #endif
> >
> >  SYM_FUNC_START_LOCAL(__primary_switch)
> > -#ifdef CONFIG_RANDOMIZE_BASE
> > -     mov     x19, x0                         // preserve new SCTLR_EL1 value
> > -     mrs     x20, sctlr_el1                  // preserve old SCTLR_EL1 value
> > -#endif
> > -
> > -     adrp    x1, init_pg_dir
> > +     adrp    x1, reserved_pg_dir
> >       adrp    x2, init_idmap_pg_dir
> >       bl      __enable_mmu
> > +
> > +     bl      clear_page_tables
> > +     bl      create_kernel_mapping
> > +
> > +     adrp    x1, init_pg_dir
> > +     load_ttbr1 x1, x1, x2
> >  #ifdef CONFIG_RELOCATABLE
> >  #ifdef CONFIG_RELR
> >       mov     x24, #0                         // no RELR displacement yet
> > @@ -886,9 +857,8 @@ SYM_FUNC_START_LOCAL(__primary_switch)
> >        * to take into account by discarding the current kernel mapping and
> >        * creating a new one.
> >        */
> > -     pre_disable_mmu_workaround
> > -     msr     sctlr_el1, x20                  // disable the MMU
> > -     isb
> > +     adrp    x1, reserved_pg_dir             // Disable translations via TTBR1
> > +     load_ttbr1 x1, x1, x2
>
> I'd have thought we'd need some TLB maintenance here... is that not the
> case?
>

You mean at this particular point? We are running from the ID map with
TTBR1 translations disabled. We clear the page tables, repopulate
them, and perform a TLBI VMALLE1.

So are you saying repopulating the page tables while translations are
disabled needs to occur only after doing TLB maintenance?

> Also, it might be a tiny bit easier to clear EPD1 instead of using the
> reserved_pg_dir.
>

Right. So is there any reason in particular why it would be
appropriate here but not anywhere else? IOW, why do we have
reserved_pg_dir in the first place if we can just flick EPD1 on and
off?
Will Deacon June 24, 2022, 1:29 p.m. UTC | #3
On Fri, Jun 24, 2022 at 03:07:44PM +0200, Ard Biesheuvel wrote:
> On Fri, 24 Jun 2022 at 14:56, Will Deacon <will@kernel.org> wrote:
> >
> > On Mon, Jun 13, 2022 at 04:45:41PM +0200, Ard Biesheuvel wrote:
> > > Now that we can access the entire kernel image via the ID map, we can
> > > execute the page table population code with the MMU and caches enabled.
> > > The only thing we need to ensure is that translations via TTBR1 remain
> > > disabled while we are updating the page tables the second time around,
> > > in case KASLR wants them to be randomized.
> > >
> > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > ---
> > >  arch/arm64/kernel/head.S | 62 +++++---------------
> > >  1 file changed, 16 insertions(+), 46 deletions(-)

[...]

> > > @@ -886,9 +857,8 @@ SYM_FUNC_START_LOCAL(__primary_switch)
> > >        * to take into account by discarding the current kernel mapping and
> > >        * creating a new one.
> > >        */
> > > -     pre_disable_mmu_workaround
> > > -     msr     sctlr_el1, x20                  // disable the MMU
> > > -     isb
> > > +     adrp    x1, reserved_pg_dir             // Disable translations via TTBR1
> > > +     load_ttbr1 x1, x1, x2
> >
> > I'd have thought we'd need some TLB maintenance here... is that not the
> > case?
> >
> 
> You mean at this particular point? We are running from the ID map with
> TTBR1 translations disabled. We clear the page tables, repopulate
> them, and perform a TLBI VMALLE1.
> 
> So are you saying repopulating the page tables while translations are
> disabled needs to occur only after doing TLB maintenance?

I'm thinking about walk cache entries from the previous page-table, which
would make the reserved_pg_dir ineffective. However, if we're clearing the
page-table anyway, I'm not even sure why we need reserved_pg_dir at all!

> > Also, it might be a tiny bit easier to clear EPD1 instead of using the
> > reserved_pg_dir.
> >
> 
> Right. So is there any reason in particular why it would be
> appropriate here but not anywhere else? IOW, why do we have
> reserved_pg_dir in the first place if we can just flick EPD1 on and
> off?

I think using a reserved (all zeroes) page-table makes sense when it
has its own ASID, as you can switch to/from it without TLB invalidation,
but that doesn't seem to be the case here. Anyway, no strong preference,
I just thought it might simplify things a bit.

Will
Ard Biesheuvel June 24, 2022, 2:07 p.m. UTC | #4
On Fri, 24 Jun 2022 at 15:29, Will Deacon <will@kernel.org> wrote:
>
> On Fri, Jun 24, 2022 at 03:07:44PM +0200, Ard Biesheuvel wrote:
> > On Fri, 24 Jun 2022 at 14:56, Will Deacon <will@kernel.org> wrote:
> > >
> > > On Mon, Jun 13, 2022 at 04:45:41PM +0200, Ard Biesheuvel wrote:
> > > > Now that we can access the entire kernel image via the ID map, we can
> > > > execute the page table population code with the MMU and caches enabled.
> > > > The only thing we need to ensure is that translations via TTBR1 remain
> > > > disabled while we are updating the page tables the second time around,
> > > > in case KASLR wants them to be randomized.
> > > >
> > > > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > > > ---
> > > >  arch/arm64/kernel/head.S | 62 +++++---------------
> > > >  1 file changed, 16 insertions(+), 46 deletions(-)
>
> [...]
>
> > > > @@ -886,9 +857,8 @@ SYM_FUNC_START_LOCAL(__primary_switch)
> > > >        * to take into account by discarding the current kernel mapping and
> > > >        * creating a new one.
> > > >        */
> > > > -     pre_disable_mmu_workaround
> > > > -     msr     sctlr_el1, x20                  // disable the MMU
> > > > -     isb
> > > > +     adrp    x1, reserved_pg_dir             // Disable translations via TTBR1
> > > > +     load_ttbr1 x1, x1, x2
> > >
> > > I'd have thought we'd need some TLB maintenance here... is that not the
> > > case?
> > >
> >
> > You mean at this particular point? We are running from the ID map with
> > TTBR1 translations disabled. We clear the page tables, repopulate
> > them, and perform a TLBI VMALLE1.
> >
> > So are you saying repopulating the page tables while translations are
> > disabled needs to occur only after doing TLB maintenance?
>
> I'm thinking about walk cache entries from the previous page-table, which
> would make the reserved_pg_dir ineffective. However, if we're clearing the
> page-table anyway, I'm not even sure why we need reserved_pg_dir at all!
>

Perhaps not. But this code is removed again two patches later so it
doesn't matter that much to begin with.

> > > Also, it might be a tiny bit easier to clear EPD1 instead of using the
> > > reserved_pg_dir.
> > >
> >
> > Right. So is there any reason in particular why it would be
> > appropriate here but not anywhere else? IOW, why do we have
> > reserved_pg_dir in the first place if we can just flick EPD1 on and
> > off?
>
> I think using a reserved (all zeroes) page-table makes sense when it
> has its own ASID, as you can switch to/from it without TLB invalidation,
> but that doesn't seem to be the case here. Anyway, no strong preference,
> I just thought it might simplify things a bit.
>

Ah right, I hadn't considered ASIDs.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d704d0bd8ffc..583cbea865e1 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -85,8 +85,6 @@ 
 	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
 	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
 	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
-	 *  x28        clear_page_tables()                      callee preserved temp register
-	 *  x19/x20    __primary_switch()                       callee preserved temp registers
 	 *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
 	 *  x28        create_idmap()                           callee preserved temp register
 	 */
@@ -96,9 +94,7 @@  SYM_CODE_START(primary_entry)
 	adrp	x23, __PHYS_OFFSET
 	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
 	bl	set_cpu_boot_mode_flag
-	bl	clear_page_tables
 	bl	create_idmap
-	bl	create_kernel_mapping
 
 	/*
 	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
@@ -128,32 +124,14 @@  SYM_CODE_START_LOCAL(preserve_boot_args)
 SYM_CODE_END(preserve_boot_args)
 
 SYM_FUNC_START_LOCAL(clear_page_tables)
-	mov	x28, lr
-
-	/*
-	 * Invalidate the init page tables to avoid potential dirty cache lines
-	 * being evicted. Other page tables are allocated in rodata as part of
-	 * the kernel image, and thus are clean to the PoC per the boot
-	 * protocol.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	bl	dcache_inval_poc
-
 	/*
 	 * Clear the init page tables.
 	 */
 	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
-	sub	x1, x1, x0
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	subs	x1, x1, #64
-	b.ne	1b
-
-	ret	x28
+	sub	x2, x1, x0
+	mov	x1, xzr
+	b	__pi_memset			// tail call
 SYM_FUNC_END(clear_page_tables)
 
 /*
@@ -399,16 +377,8 @@  SYM_FUNC_START_LOCAL(create_kernel_mapping)
 
 	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
 
-	/*
-	 * Since the page tables have been populated with non-cacheable
-	 * accesses (MMU disabled), invalidate those tables again to
-	 * remove any speculatively loaded cache lines.
-	 */
-	dmb	sy
-
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	b	dcache_inval_poc		// tail call
+	dsb	ishst				// sync with page table walker
+	ret
 SYM_FUNC_END(create_kernel_mapping)
 
 	/*
@@ -863,14 +833,15 @@  SYM_FUNC_END(__relocate_kernel)
 #endif
 
 SYM_FUNC_START_LOCAL(__primary_switch)
-#ifdef CONFIG_RANDOMIZE_BASE
-	mov	x19, x0				// preserve new SCTLR_EL1 value
-	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
-#endif
-
-	adrp	x1, init_pg_dir
+	adrp	x1, reserved_pg_dir
 	adrp	x2, init_idmap_pg_dir
 	bl	__enable_mmu
+
+	bl	clear_page_tables
+	bl	create_kernel_mapping
+
+	adrp	x1, init_pg_dir
+	load_ttbr1 x1, x1, x2
 #ifdef CONFIG_RELOCATABLE
 #ifdef CONFIG_RELR
 	mov	x24, #0				// no RELR displacement yet
@@ -886,9 +857,8 @@  SYM_FUNC_START_LOCAL(__primary_switch)
 	 * to take into account by discarding the current kernel mapping and
 	 * creating a new one.
 	 */
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, x20			// disable the MMU
-	isb
+	adrp	x1, reserved_pg_dir		// Disable translations via TTBR1
+	load_ttbr1 x1, x1, x2
 	bl	clear_page_tables
 	bl	create_kernel_mapping		// Recreate kernel mapping
 
@@ -896,8 +866,8 @@  SYM_FUNC_START_LOCAL(__primary_switch)
 	dsb	nsh
 	isb
 
-	set_sctlr_el1	x19			// re-enable the MMU
-
+	adrp	x1, init_pg_dir			// Re-enable translations via TTBR1
+	load_ttbr1 x1, x1, x2
 	bl	__relocate_kernel
 #endif
 #endif