diff mbox series

[v6,5/9] arm64: head: move early kernel mapping and relocation code to C code

Message ID 20220701130444.2945106-6-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: add support for WXN | expand

Commit Message

Ard Biesheuvel July 1, 2022, 1:04 p.m. UTC
The asm version of the kernel mapping code works fine for creating a
coarse grained identity map, but for mapping the kernel down to its
exact boundaries with the right attributes, it is not suitable. This is
why we create a preliminary RWX kernel mapping first, and then rebuild
it from scratch later on.

So let's reimplement this, and along with it the relocation routines, in
C, in a way that will make it unnecessary to create the kernel page
tables yet another time in paging_init().

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/include/asm/pgtable-prot.h   |   2 +
 arch/arm64/kernel/Makefile              |   4 +-
 arch/arm64/kernel/head.S                | 157 +--------
 arch/arm64/kernel/image-vars.h          |  16 +
 arch/arm64/kernel/pi/Makefile           |   2 +-
 arch/arm64/kernel/pi/early_map_kernel.c | 368 ++++++++++++++++++++
 arch/arm64/kernel/pi/kaslr_early.c      | 112 ------
 arch/arm64/kernel/vmlinux.lds.S         |  13 +-
 arch/arm64/mm/proc.S                    |   1 +
 9 files changed, 410 insertions(+), 265 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 62e0ebeed720..dd38f8e80fac 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -32,7 +32,9 @@ 
 #include <asm/cpufeature.h>
 #include <asm/pgtable-types.h>
 
+#ifndef arm64_use_ng_mappings
 extern bool arm64_use_ng_mappings;
+#endif
 
 #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 88a96511580e..802de025bbea 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -29,7 +29,7 @@  obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
 			   syscall.o proton-pack.o idreg-override.o idle.o	\
-			   patching.o
+			   patching.o pi/
 
 targets			+= efi-entry.o
 
@@ -59,7 +59,7 @@  obj-$(CONFIG_ACPI)			+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)			+= acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f80127df5846..b7f1bd07a647 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -81,8 +81,6 @@ 
 	 *  x20        primary_entry() .. __primary_switch()    CPU boot mode
 	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
 	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
-	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
-	 *  x24        __primary_switch()                       linear map KASLR seed
 	 *  x25        primary_entry() .. start_kernel()        supported VA size
 	 *  x28        create_idmap()                           callee preserved temp register
 	 */
@@ -153,17 +151,6 @@  SYM_CODE_START_LOCAL(preserve_boot_args)
 0:	ret
 SYM_CODE_END(preserve_boot_args)
 
-SYM_FUNC_START_LOCAL(clear_page_tables)
-	/*
-	 * Clear the init page tables.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x2, x1, x0
-	mov	x1, xzr
-	b	__pi_memset			// tail call
-SYM_FUNC_END(clear_page_tables)
-
 /*
  * Macro to populate page table entries, these entries can be pointers to the next level
  * or last level entries pointing to physical memory.
@@ -365,7 +352,7 @@  SYM_FUNC_START_LOCAL(create_idmap)
 	/* Remap the kernel page tables r/w in the ID map */
 	adrp	x1, _text
 	adrp	x2, init_pg_dir
-	adrp	x3, init_pg_end
+	adrp	x3, _end
 	bic	x4, x2, #SWAPPER_BLOCK_SIZE - 1
 	mov	x5, SWAPPER_RW_MMUFLAGS
 	mov	x6, #SWAPPER_BLOCK_SHIFT
@@ -396,22 +383,6 @@  SYM_FUNC_START_LOCAL(create_idmap)
 0:	ret	x28
 SYM_FUNC_END(create_idmap)
 
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
-	adrp	x0, init_pg_dir
-	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
-	add	x5, x5, x23			// add KASLR displacement
-	adrp	x6, _end			// runtime __pa(_end)
-	adrp	x3, _text			// runtime __pa(_text)
-	sub	x6, x6, x3			// _end - _text
-	add	x6, x6, x5			// runtime __va(_end)
-	mov	x7, SWAPPER_RW_MMUFLAGS
-
-	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
-	dsb	ishst				// sync with page table walker
-	ret
-SYM_FUNC_END(create_kernel_mapping)
-
 	/*
 	 * Initialize CPU registers with task-specific and cpu-specific context.
 	 *
@@ -445,6 +416,7 @@  SYM_FUNC_END(create_kernel_mapping)
  * The following fragment of code is executed with the MMU enabled.
  *
  *   x0 = __pa(KERNEL_START)
+ *   w1 = memstart_offset_seed
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
 	adr_l	x4, init_task
@@ -454,6 +426,11 @@  SYM_FUNC_START_LOCAL(__primary_switched)
 	msr	vbar_el1, x8			// vector table address
 	isb
 
+#ifdef CONFIG_RANDOMIZE_BASE
+	adrp	x5, memstart_offset_seed	// Save KASLR linear map seed
+	strh	w1, [x5, :lo12:memstart_offset_seed]
+#endif
+
 	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
 
@@ -479,11 +456,6 @@  SYM_FUNC_START_LOCAL(__primary_switched)
 	str	x25, [x8]			// ... observes the correct value
 	dc	civac, x8			// Make visible to booting secondaries
 #endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
-	adrp	x5, memstart_offset_seed	// Save KASLR linear map seed
-	strh	w24, [x5, :lo12:memstart_offset_seed]
-#endif
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 	bl	kasan_early_init
 #endif
@@ -747,123 +719,18 @@  SYM_FUNC_START_LOCAL(__no_granule_support)
 	b	1b
 SYM_FUNC_END(__no_granule_support)
 
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
-	/*
-	 * Iterate over each entry in the relocation table, and apply the
-	 * relocations in place.
-	 */
-	adr_l	x9, __rela_start
-	adr_l	x10, __rela_end
-	mov_q	x11, KIMAGE_VADDR		// default virtual offset
-	add	x11, x11, x23			// actual virtual offset
-
-0:	cmp	x9, x10
-	b.hs	1f
-	ldp	x12, x13, [x9], #24
-	ldr	x14, [x9, #-8]
-	cmp	w13, #R_AARCH64_RELATIVE
-	b.ne	0b
-	add	x14, x14, x23			// relocate
-	str	x14, [x12, x23]
-	b	0b
-
-1:
-#ifdef CONFIG_RELR
-	/*
-	 * Apply RELR relocations.
-	 *
-	 * RELR is a compressed format for storing relative relocations. The
-	 * encoded sequence of entries looks like:
-	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
-	 *
-	 * i.e. start with an address, followed by any number of bitmaps. The
-	 * address entry encodes 1 relocation. The subsequent bitmap entries
-	 * encode up to 63 relocations each, at subsequent offsets following
-	 * the last address entry.
-	 *
-	 * The bitmap entries must have 1 in the least significant bit. The
-	 * assumption here is that an address cannot have 1 in lsb. Odd
-	 * addresses are not supported. Any odd addresses are stored in the RELA
-	 * section, which is handled above.
-	 *
-	 * Excluding the least significant bit in the bitmap, each non-zero
-	 * bit in the bitmap represents a relocation to be applied to
-	 * a corresponding machine word that follows the base address
-	 * word. The second least significant bit represents the machine
-	 * word immediately following the initial address, and each bit
-	 * that follows represents the next word, in linear order. As such,
-	 * a single bitmap can encode up to 63 relocations in a 64-bit object.
-	 *
-	 * In this implementation we store the address of the next RELR table
-	 * entry in x9, the address being relocated by the current address or
-	 * bitmap entry in x13 and the address being relocated by the current
-	 * bit in x14.
-	 */
-	adr_l	x9, __relr_start
-	adr_l	x10, __relr_end
-
-2:	cmp	x9, x10
-	b.hs	7f
-	ldr	x11, [x9], #8
-	tbnz	x11, #0, 3f			// branch to handle bitmaps
-	add	x13, x11, x23
-	ldr	x12, [x13]			// relocate address entry
-	add	x12, x12, x23
-	str	x12, [x13], #8			// adjust to start of bitmap
-	b	2b
-
-3:	mov	x14, x13
-4:	lsr	x11, x11, #1
-	cbz	x11, 6f
-	tbz	x11, #0, 5f			// skip bit if not set
-	ldr	x12, [x14]			// relocate bit
-	add	x12, x12, x23
-	str	x12, [x14]
-
-5:	add	x14, x14, #8			// move to next bit's address
-	b	4b
-
-6:	/*
-	 * Move to the next bitmap's address. 8 is the word size, and 63 is the
-	 * number of significant bits in a bitmap entry.
-	 */
-	add	x13, x13, #(8 * 63)
-	b	2b
-
-7:
-#endif
-	ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
 SYM_FUNC_START_LOCAL(__primary_switch)
 	adrp	x1, reserved_pg_dir
 	adrp	x2, init_idmap_pg_dir
 	bl	__enable_mmu
-#ifdef CONFIG_RELOCATABLE
-	adrp	x23, KERNEL_START
-	and	x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
-	mov	x0, x22
-	adrp	x1, init_pg_end
+
+	adrp	x1, primary_init_stack
 	mov	sp, x1
 	mov	x29, xzr
-	bl	__pi_kaslr_early_init
-	and	x24, x0, #SZ_2M - 1		// capture memstart offset seed
-	bic	x0, x0, #SZ_2M - 1
-	orr	x23, x23, x0			// record kernel offset
-#endif
-#endif
-	bl	clear_page_tables
-	bl	create_kernel_mapping
+	mov	x0, x22				// pass FDT pointer
+	bl	__pi_early_map_kernel
+	mov	w1, w0				// capture memstart offset seed
 
-	adrp	x1, init_pg_dir
-	load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
-	bl	__relocate_kernel
-#endif
 	ldr	x8, =__primary_switched
 	adrp	x0, KERNEL_START		// __pa(KERNEL_START)
 	br	x8
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index afa69e04e75e..032c3e1aff20 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -54,6 +54,22 @@  PROVIDE(__pi___memcpy			= __pi_memcpy);
 PROVIDE(__pi___memmove			= __pi_memmove);
 PROVIDE(__pi___memset			= __pi_memset);
 
+/*
+ * The symbols below are used by the early C kernel mapping code.
+ */
+PROVIDE(__pi_init_pg_dir		= init_pg_dir);
+PROVIDE(__pi_init_pg_end		= init_pg_end);
+
+PROVIDE(__pi__text			= _text);
+PROVIDE(__pi__stext               	= _stext);
+PROVIDE(__pi__etext               	= _etext);
+PROVIDE(__pi___start_rodata       	= __start_rodata);
+PROVIDE(__pi___inittext_begin     	= __inittext_begin);
+PROVIDE(__pi___inittext_end       	= __inittext_end);
+PROVIDE(__pi___initdata_begin     	= __initdata_begin);
+PROVIDE(__pi___initdata_end       	= __initdata_end);
+PROVIDE(__pi__data                	= _data);
+
 #ifdef CONFIG_KVM
 
 /*
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
index 839291430cb3..b88612eab1bc 100644
--- a/arch/arm64/kernel/pi/Makefile
+++ b/arch/arm64/kernel/pi/Makefile
@@ -29,5 +29,5 @@  $(obj)/%.pi.o: $(obj)/%.o FORCE
 $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-obj-y		:= kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y		:= early_map_kernel.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
 extra-y		:= $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/early_map_kernel.c b/arch/arm64/kernel/pi/early_map_kernel.c
new file mode 100644
index 000000000000..4199548584fb
--- /dev/null
+++ b/arch/arm64/kernel/pi/early_map_kernel.c
@@ -0,0 +1,368 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#define arm64_use_ng_mappings 0
+
+#include <linux/elf.h>
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+	size_t l1, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	l1 = strlen(s1);
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+
+/*
+ * Returns whether @pfx appears in @string, either at the very start, or
+ * elsewhere but preceded by a space character.
+ */
+static bool string_contains_prefix(const u8 *string, const u8 *pfx)
+{
+	const u8 *str;
+
+	str = __strstr(string, pfx);
+	return str == string || (str > string && *(str - 1) == ' ');
+}
+
+static bool cmdline_has(void *fdt, const u8 *word)
+{
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		int node;
+		const u8 *prop;
+
+		node = fdt_path_offset(fdt, "/chosen");
+		if (node < 0)
+			goto out;
+
+		prop = fdt_getprop(fdt, node, "bootargs", NULL);
+		if (!prop)
+			goto out;
+
+		if (string_contains_prefix(prop, word))
+			return true;
+
+		if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+			goto out;
+
+		return false;
+	}
+out:
+	return string_contains_prefix(CONFIG_CMDLINE, word);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	fdt64_t *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static u64 kaslr_early_init(void *fdt)
+{
+	u64 seed;
+
+	if (cmdline_has(fdt, "nokaslr"))
+		return 0;
+
+	seed = get_kaslr_seed(fdt);
+	if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+		 if (!__early_cpu_has_rndr() ||
+		     !__arm64_rndr((unsigned long *)&seed))
+#endif
+		return 0;
+	}
+
+	/*
+	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+	 * kernel image offset from the seed. Let's place the kernel in the
+	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+	 * the lower and upper quarters to avoid colliding with other
+	 * allocations.
+	 */
+	return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+static void relocate_kernel(u64 offset)
+{
+	const Elf64_Rela *rela;
+	const u64 *relr;
+	u64 *place;
+
+	for (rela = rela_start; rela < rela_end; rela++) {
+		if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+			continue;
+		place = (u64 *)(rela->r_offset + offset);
+		*place = rela->r_addend + offset;
+	}
+
+	if (!IS_ENABLED(CONFIG_RELR) || !offset)
+		return;
+
+	/*
+	 * Apply RELR relocations.
+	 *
+	 * RELR is a compressed format for storing relative relocations. The
+	 * encoded sequence of entries looks like:
+	 * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+	 *
+	 * i.e. start with an address, followed by any number of bitmaps. The
+	 * address entry encodes 1 relocation. The subsequent bitmap entries
+	 * encode up to 63 relocations each, at subsequent offsets following
+	 * the last address entry.
+	 *
+	 * The bitmap entries must have 1 in the least significant bit. The
+	 * assumption here is that an address cannot have 1 in lsb. Odd
+	 * addresses are not supported. Any odd addresses are stored in the
+	 * RELA section, which is handled above.
+	 *
+	 * Excluding the least significant bit in the bitmap, each non-zero bit
+	 * in the bitmap represents a relocation to be applied to a
+	 * corresponding machine word that follows the base address word. The
+	 * second least significant bit represents the machine word immediately
+	 * following the initial address, and each bit that follows represents
+	 * the next word, in linear order. As such, a single bitmap can encode
+	 * up to 63 relocations in a 64-bit object.
+	 */
+	for (relr = relr_start; relr < relr_end; relr++) {
+		u64 *p, r = *relr;
+
+		if ((r & 1) == 0) {
+			place = (u64 *)(r + offset);
+			*place++ += offset;
+		} else {
+			for (p = place; r; p++) {
+				r >>= 1;
+				if (r & 1)
+					*p += offset;
+			}
+			place += 63;
+		}
+	}
+}
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void map_range(pgd_t **pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+		      int level, pte_t *tbl, bool may_use_cont)
+{
+	u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+	u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+	int lshift = (3 - level) * (PAGE_SHIFT - 3);
+	u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+	// Advance tbl to the entry that covers start
+	tbl += (start >> (lshift + PAGE_SHIFT)) % BIT(PAGE_SHIFT - 3);
+
+	// Set the right block/page bits for this level unless we are
+	// clearing the mapping
+	if (protval)
+		protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+	while (start < end) {
+		u64 next = min((start | lmask) + 1, end);
+
+		if (level < 3 &&
+		    (start & lmask || next & lmask || pa & lmask)) {
+			// This chunk needs a finer grained mapping
+			// Put down a table mapping if necessary and recurse
+			if (pte_none(*tbl)) {
+				*tbl = __pte(__phys_to_pte_val((u64)*pgd) |
+					     PMD_TYPE_TABLE);
+				*pgd += PTRS_PER_PTE;
+			}
+			map_range(pgd, start, next, pa, prot, level + 1,
+				  (pte_t *)__pte_to_phys(*tbl), may_use_cont);
+		} else {
+			// Start a contiguous range if start and pa are
+			// suitably aligned
+			if (((start | pa) & cmask) == 0 && may_use_cont)
+				protval |= PTE_CONT;
+			// Clear the contiguous attribute if the remaining
+			// range does not cover a contiguous block
+			if ((end & ~cmask) <= start)
+				protval &= ~PTE_CONT;
+			// Put down a block or page mapping
+			*tbl = __pte(__phys_to_pte_val(pa) | protval);
+		}
+		pa += next - start;
+		start = next;
+		tbl++;
+	}
+}
+
+static void map_segment(pgd_t **pgd, u64 va_offset, void *start, void *end,
+			pgprot_t prot, bool may_use_cont)
+{
+	map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+		  ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+		  prot, 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+		  may_use_cont);
+}
+
+static void unmap_segment(u64 va_offset, void *start, void *end)
+{
+	map_range(NULL, ((u64)start + va_offset) & ~PAGE_OFFSET,
+		  ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+		  __pgprot(0), 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+		  false);
+}
+
+/*
+ * Open coded check for BTI, only for use to determine configuration
+ * for early mappings for before the cpufeature code has run.
+ */
+static bool arm64_early_this_cpu_has_bti(void)
+{
+	u64 pfr1;
+
+	if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+		return false;
+
+	pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+	return cpuid_feature_extract_unsigned_field(pfr1,
+						    ID_AA64PFR1_BT_SHIFT);
+}
+
+static bool arm64_early_this_cpu_has_e0pd(void)
+{
+	u64 mmfr2;
+
+	if (!IS_ENABLED(CONFIG_ARM64_E0PD))
+		return false;
+
+	mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+	return cpuid_feature_extract_unsigned_field(mmfr2,
+						    ID_AA64MMFR2_E0PD_SHIFT);
+}
+
+static void map_kernel(void *fdt, u64 kaslr_offset, u64 va_offset)
+{
+	pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
+	pgprot_t text_prot = PAGE_KERNEL_ROX;
+	pgprot_t data_prot = PAGE_KERNEL;
+	pgprot_t prot;
+
+	if (cmdline_has(fdt, "rodata=off"))
+		text_prot = PAGE_KERNEL_EXEC;
+
+	// If we have a CPU that supports BTI and a kernel built for
+	// BTI then mark the kernel executable text as guarded pages
+	// now so we don't have to rewrite the page tables later.
+	if (arm64_early_this_cpu_has_bti() && !cmdline_has(fdt, "arm64.nobti"))
+		text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+
+	// Assume that any CPU that does not implement E0PD needs KPTI to
+	// ensure that KASLR randomized addresses will not leak. This means we
+	// need to use non-global mappings for the kernel text and data.
+	if (!arm64_early_this_cpu_has_e0pd() && kaslr_offset >= MIN_KIMG_ALIGN) {
+		text_prot = __pgprot_modify(text_prot, PTE_NG, PTE_NG);
+		data_prot = __pgprot_modify(data_prot, PTE_NG, PTE_NG);
+	}
+
+	// Map all code read-write on the first pass for relocation processing
+	prot = IS_ENABLED(CONFIG_RELOCATABLE) ? data_prot : text_prot;
+
+	map_segment(&pgdp, va_offset, _stext, _etext, prot, true);
+	map_segment(&pgdp, va_offset, __start_rodata, __inittext_begin, data_prot, false);
+	map_segment(&pgdp, va_offset, __inittext_begin, __inittext_end, prot, false);
+	map_segment(&pgdp, va_offset, __initdata_begin, __initdata_end, data_prot, false);
+	map_segment(&pgdp, va_offset, _data, init_pg_dir, data_prot, true);
+	// omit [init_pg_dir, _end] - it doesn't need a kernel mapping
+	dsb(ishst);
+
+	idmap_cpu_replace_ttbr1(init_pg_dir);
+
+	if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+		relocate_kernel(kaslr_offset);
+
+		// Unmap the text region before remapping it, to avoid
+		// potential TLB conflicts on the contiguous descriptors. This
+		// assumes that it is permitted to clear the valid bit on a
+		// live descriptor with the CONT bit set.
+		unmap_segment(va_offset, _stext, _etext);
+		dsb(ishst);
+		isb();
+		__tlbi(vmalle1);
+		isb();
+
+		// Remap these segments with different permissions
+		// No new page table allocations should be needed
+		map_segment(NULL, va_offset, _stext, _etext, text_prot, true);
+		map_segment(NULL, va_offset, __inittext_begin, __inittext_end,
+			    text_prot, false);
+		dsb(ishst);
+	}
+}
+
+asmlinkage u64 early_map_kernel(void *fdt)
+{
+	u64 kaslr_seed = 0, kaslr_offset = 0;
+	u64 va_base = KIMAGE_VADDR;
+	u64 pa_base = (u64)&_text;
+
+	// Clear the initial page tables before populating them
+	memset(init_pg_dir, 0, init_pg_end - init_pg_dir);
+
+	// The virtual KASLR displacement modulo 2MiB is decided by the
+	// physical placement of the image, as otherwise, we might not be able
+	// to create the early kernel mapping using 2 MiB block descriptors. So
+	// take the low bits of the KASLR offset from the physical address, and
+	// fill in the high bits from the seed.
+	if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+		kaslr_offset = pa_base & (MIN_KIMG_ALIGN - 1);
+		if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+			kaslr_seed = kaslr_early_init(fdt);
+			kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+		}
+		va_base += kaslr_offset;
+	}
+
+	map_kernel(fdt, kaslr_offset, va_base - pa_base);
+
+	// Return the lower 16 bits of the seed - this will be
+	// used to randomize the linear map
+	return kaslr_seed & U16_MAX;
+}
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
deleted file mode 100644
index 6c3855e69395..000000000000
--- a/arch/arm64/kernel/pi/kaslr_early.c
+++ /dev/null
@@ -1,112 +0,0 @@ 
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright 2022 Google LLC
-// Author: Ard Biesheuvel <ardb@google.com>
-
-// NOTE: code in this file runs *very* early, and is not permitted to use
-// global variables or anything that relies on absolute addressing.
-
-#include <linux/libfdt.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/string.h>
-
-#include <asm/archrandom.h>
-#include <asm/memory.h>
-
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
-	size_t l1, l2;
-
-	l2 = strlen(s2);
-	if (!l2)
-		return (char *)s1;
-	l1 = strlen(s1);
-	while (l1 >= l2) {
-		l1--;
-		if (!memcmp(s1, s2, l2))
-			return (char *)s1;
-		s1++;
-	}
-	return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
-	const u8 *str;
-
-	str = __strstr(cmdline, "nokaslr");
-	return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
-	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
-		int node;
-		const u8 *prop;
-
-		node = fdt_path_offset(fdt, "/chosen");
-		if (node < 0)
-			goto out;
-
-		prop = fdt_getprop(fdt, node, "bootargs", NULL);
-		if (!prop)
-			goto out;
-
-		if (cmdline_contains_nokaslr(prop))
-			return true;
-
-		if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
-			goto out;
-
-		return false;
-	}
-out:
-	return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
-{
-	int node, len;
-	fdt64_t *prop;
-	u64 ret;
-
-	node = fdt_path_offset(fdt, "/chosen");
-	if (node < 0)
-		return 0;
-
-	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
-	if (!prop || len != sizeof(u64))
-		return 0;
-
-	ret = fdt64_to_cpu(*prop);
-	*prop = 0;
-	return ret;
-}
-
-asmlinkage u64 kaslr_early_init(void *fdt)
-{
-	u64 seed;
-
-	if (is_kaslr_disabled_cmdline(fdt))
-		return 0;
-
-	seed = get_kaslr_seed(fdt);
-	if (!seed) {
-#ifdef CONFIG_ARCH_RANDOM
-		 if (!__early_cpu_has_rndr() ||
-		     !__arm64_rndr((unsigned long *)&seed))
-#endif
-		return 0;
-	}
-
-	/*
-	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
-	 * kernel image offset from the seed. Let's place the kernel in the
-	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
-	 * the lower and upper quarters to avoid colliding with other
-	 * allocations.
-	 */
-	return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
-}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5002d869fa7f..60641aa3ac07 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -257,15 +257,15 @@  SECTIONS
 	HYPERVISOR_RELOC_SECTION
 
 	.rela.dyn : ALIGN(8) {
-		__rela_start = .;
+		__pi_rela_start = .;
 		*(.rela .rela*)
-		__rela_end = .;
+		__pi_rela_end = .;
 	}
 
 	.relr.dyn : ALIGN(8) {
-		__relr_start = .;
+		__pi_relr_start = .;
 		*(.relr.dyn)
-		__relr_end = .;
+		__pi_relr_end = .;
 	}
 
 	. = ALIGN(SEGMENT_ALIGN);
@@ -309,11 +309,14 @@  SECTIONS
 
 	BSS_SECTION(SBSS_ALIGN, 0, 0)
 
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(SEGMENT_ALIGN);
 	init_pg_dir = .;
 	. += INIT_DIR_SIZE;
 	init_pg_end = .;
 
+	. += SZ_4K;
+	primary_init_stack = .;
+
 	. = ALIGN(SEGMENT_ALIGN);
 	__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
 	_end = .;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 113a4fedf5b8..4322ddf5e02f 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -196,6 +196,7 @@  SYM_FUNC_START(idmap_cpu_replace_ttbr1)
 
 	ret
 SYM_FUNC_END(idmap_cpu_replace_ttbr1)
+SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
 	.popsection
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0