@@ -32,7 +32,9 @@
#include <asm/cpufeature.h>
#include <asm/pgtable-types.h>
+#ifndef arm64_use_ng_mappings
extern bool arm64_use_ng_mappings;
+#endif
#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
@@ -29,7 +29,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
syscall.o proton-pack.o idreg-override.o idle.o \
- patching.o
+ patching.o pi/
targets += efi-entry.o
@@ -59,7 +59,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \
@@ -81,8 +81,6 @@
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
- * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
- * x24 __primary_switch() linear map KASLR seed
* x25 primary_entry() .. start_kernel() supported VA size
* x28 create_idmap() callee preserved temp register
*/
@@ -153,17 +151,6 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
0: ret
SYM_CODE_END(preserve_boot_args)
-SYM_FUNC_START_LOCAL(clear_page_tables)
- /*
- * Clear the init page tables.
- */
- adrp x0, init_pg_dir
- adrp x1, init_pg_end
- sub x2, x1, x0
- mov x1, xzr
- b __pi_memset // tail call
-SYM_FUNC_END(clear_page_tables)
-
/*
* Macro to populate page table entries, these entries can be pointers to the next level
* or last level entries pointing to physical memory.
@@ -365,7 +352,7 @@ SYM_FUNC_START_LOCAL(create_idmap)
/* Remap the kernel page tables r/w in the ID map */
adrp x1, _text
adrp x2, init_pg_dir
- adrp x3, init_pg_end
+ adrp x3, _end
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
mov x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
@@ -396,22 +383,6 @@ SYM_FUNC_START_LOCAL(create_idmap)
0: ret x28
SYM_FUNC_END(create_idmap)
-SYM_FUNC_START_LOCAL(create_kernel_mapping)
- adrp x0, init_pg_dir
- mov_q x5, KIMAGE_VADDR // compile time __va(_text)
- add x5, x5, x23 // add KASLR displacement
- adrp x6, _end // runtime __pa(_end)
- adrp x3, _text // runtime __pa(_text)
- sub x6, x6, x3 // _end - _text
- add x6, x6, x5 // runtime __va(_end)
- mov x7, SWAPPER_RW_MMUFLAGS
-
- map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
-
- dsb ishst // sync with page table walker
- ret
-SYM_FUNC_END(create_kernel_mapping)
-
/*
* Initialize CPU registers with task-specific and cpu-specific context.
*
@@ -445,6 +416,7 @@ SYM_FUNC_END(create_kernel_mapping)
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __pa(KERNEL_START)
+ * w1 = memstart_offset_seed
*/
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
@@ -454,6 +426,11 @@ SYM_FUNC_START_LOCAL(__primary_switched)
msr vbar_el1, x8 // vector table address
isb
+#ifdef CONFIG_RANDOMIZE_BASE
+ adrp x5, memstart_offset_seed // Save KASLR linear map seed
+ strh w1, [x5, :lo12:memstart_offset_seed]
+#endif
+
stp x29, x30, [sp, #-16]!
mov x29, sp
@@ -479,11 +456,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
str x25, [x8] // ... observes the correct value
dc civac, x8 // Make visible to booting secondaries
#endif
-
-#ifdef CONFIG_RANDOMIZE_BASE
- adrp x5, memstart_offset_seed // Save KASLR linear map seed
- strh w24, [x5, :lo12:memstart_offset_seed]
-#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
@@ -747,123 +719,18 @@ SYM_FUNC_START_LOCAL(__no_granule_support)
b 1b
SYM_FUNC_END(__no_granule_support)
-#ifdef CONFIG_RELOCATABLE
-SYM_FUNC_START_LOCAL(__relocate_kernel)
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x9, __rela_start
- adr_l x10, __rela_end
- mov_q x11, KIMAGE_VADDR // default virtual offset
- add x11, x11, x23 // actual virtual offset
-
-0: cmp x9, x10
- b.hs 1f
- ldp x12, x13, [x9], #24
- ldr x14, [x9, #-8]
- cmp w13, #R_AARCH64_RELATIVE
- b.ne 0b
- add x14, x14, x23 // relocate
- str x14, [x12, x23]
- b 0b
-
-1:
-#ifdef CONFIG_RELR
- /*
- * Apply RELR relocations.
- *
- * RELR is a compressed format for storing relative relocations. The
- * encoded sequence of entries looks like:
- * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
- *
- * i.e. start with an address, followed by any number of bitmaps. The
- * address entry encodes 1 relocation. The subsequent bitmap entries
- * encode up to 63 relocations each, at subsequent offsets following
- * the last address entry.
- *
- * The bitmap entries must have 1 in the least significant bit. The
- * assumption here is that an address cannot have 1 in lsb. Odd
- * addresses are not supported. Any odd addresses are stored in the RELA
- * section, which is handled above.
- *
- * Excluding the least significant bit in the bitmap, each non-zero
- * bit in the bitmap represents a relocation to be applied to
- * a corresponding machine word that follows the base address
- * word. The second least significant bit represents the machine
- * word immediately following the initial address, and each bit
- * that follows represents the next word, in linear order. As such,
- * a single bitmap can encode up to 63 relocations in a 64-bit object.
- *
- * In this implementation we store the address of the next RELR table
- * entry in x9, the address being relocated by the current address or
- * bitmap entry in x13 and the address being relocated by the current
- * bit in x14.
- */
- adr_l x9, __relr_start
- adr_l x10, __relr_end
-
-2: cmp x9, x10
- b.hs 7f
- ldr x11, [x9], #8
- tbnz x11, #0, 3f // branch to handle bitmaps
- add x13, x11, x23
- ldr x12, [x13] // relocate address entry
- add x12, x12, x23
- str x12, [x13], #8 // adjust to start of bitmap
- b 2b
-
-3: mov x14, x13
-4: lsr x11, x11, #1
- cbz x11, 6f
- tbz x11, #0, 5f // skip bit if not set
- ldr x12, [x14] // relocate bit
- add x12, x12, x23
- str x12, [x14]
-
-5: add x14, x14, #8 // move to next bit's address
- b 4b
-
-6: /*
- * Move to the next bitmap's address. 8 is the word size, and 63 is the
- * number of significant bits in a bitmap entry.
- */
- add x13, x13, #(8 * 63)
- b 2b
-
-7:
-#endif
- ret
-
-SYM_FUNC_END(__relocate_kernel)
-#endif
-
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
-#ifdef CONFIG_RELOCATABLE
- adrp x23, KERNEL_START
- and x23, x23, MIN_KIMG_ALIGN - 1
-#ifdef CONFIG_RANDOMIZE_BASE
- mov x0, x22
- adrp x1, init_pg_end
+
+ adrp x1, primary_init_stack
mov sp, x1
mov x29, xzr
- bl __pi_kaslr_early_init
- and x24, x0, #SZ_2M - 1 // capture memstart offset seed
- bic x0, x0, #SZ_2M - 1
- orr x23, x23, x0 // record kernel offset
-#endif
-#endif
- bl clear_page_tables
- bl create_kernel_mapping
+ mov x0, x22 // pass FDT pointer
+ bl __pi_early_map_kernel
+ mov w1, w0 // capture memstart offset seed
- adrp x1, init_pg_dir
- load_ttbr1 x1, x1, x2
-#ifdef CONFIG_RELOCATABLE
- bl __relocate_kernel
-#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
@@ -54,6 +54,22 @@ PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
+/*
+ * The symbols below are used by the early C kernel mapping code.
+ */
+PROVIDE(__pi_init_pg_dir = init_pg_dir);
+PROVIDE(__pi_init_pg_end = init_pg_end);
+
+PROVIDE(__pi__text = _text);
+PROVIDE(__pi__stext = _stext);
+PROVIDE(__pi__etext = _etext);
+PROVIDE(__pi___start_rodata = __start_rodata);
+PROVIDE(__pi___inittext_begin = __inittext_begin);
+PROVIDE(__pi___inittext_end = __inittext_end);
+PROVIDE(__pi___initdata_begin = __initdata_begin);
+PROVIDE(__pi___initdata_end = __initdata_end);
+PROVIDE(__pi__data = _data);
+
#ifdef CONFIG_KVM
/*
@@ -29,5 +29,5 @@ $(obj)/%.pi.o: $(obj)/%.o FORCE
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
-obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y := early_map_kernel.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
new file mode 100644
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#define arm64_use_ng_mappings 0
+
+#include <linux/elf.h>
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+/*
+ * Returns whether @pfx appears in @string, either at the very start, or
+ * elsewhere but preceded by a space character.
+ */
+static bool string_contains_prefix(const u8 *string, const u8 *pfx)
+{
+ const u8 *str;
+
+ str = __strstr(string, pfx);
+ return str == string || (str > string && *(str - 1) == ' ');
+}
+
+static bool cmdline_has(void *fdt, const u8 *word)
+{
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ int node;
+ const u8 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ goto out;
+
+ prop = fdt_getprop(fdt, node, "bootargs", NULL);
+ if (!prop)
+ goto out;
+
+ if (string_contains_prefix(prop, word))
+ return true;
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+ goto out;
+
+ return false;
+ }
+out:
+ return string_contains_prefix(CONFIG_CMDLINE, word);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static u64 kaslr_early_init(void *fdt)
+{
+ u64 seed;
+
+ if (cmdline_has(fdt, "nokaslr"))
+ return 0;
+
+ seed = get_kaslr_seed(fdt);
+ if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+ if (!__early_cpu_has_rndr() ||
+ !__arm64_rndr((unsigned long *)&seed))
+#endif
+ return 0;
+ }
+
+ /*
+ * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+ * kernel image offset from the seed. Let's place the kernel in the
+ * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+ * the lower and upper quarters to avoid colliding with other
+ * allocations.
+ */
+ return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
+
+extern const Elf64_Rela rela_start[], rela_end[];
+extern const u64 relr_start[], relr_end[];
+
+static void relocate_kernel(u64 offset)
+{
+ const Elf64_Rela *rela;
+ const u64 *relr;
+ u64 *place;
+
+ for (rela = rela_start; rela < rela_end; rela++) {
+ if (ELF64_R_TYPE(rela->r_info) != R_AARCH64_RELATIVE)
+ continue;
+ place = (u64 *)(rela->r_offset + offset);
+ *place = rela->r_addend + offset;
+ }
+
+ if (!IS_ENABLED(CONFIG_RELR) || !offset)
+ return;
+
+ /*
+ * Apply RELR relocations.
+ *
+ * RELR is a compressed format for storing relative relocations. The
+ * encoded sequence of entries looks like:
+ * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
+ *
+ * i.e. start with an address, followed by any number of bitmaps. The
+ * address entry encodes 1 relocation. The subsequent bitmap entries
+ * encode up to 63 relocations each, at subsequent offsets following
+ * the last address entry.
+ *
+ * The bitmap entries must have 1 in the least significant bit. The
+ * assumption here is that an address cannot have 1 in lsb. Odd
+ * addresses are not supported. Any odd addresses are stored in the
+ * RELA section, which is handled above.
+ *
+ * Excluding the least significant bit in the bitmap, each non-zero bit
+ * in the bitmap represents a relocation to be applied to a
+ * corresponding machine word that follows the base address word. The
+ * second least significant bit represents the machine word immediately
+ * following the initial address, and each bit that follows represents
+ * the next word, in linear order. As such, a single bitmap can encode
+ * up to 63 relocations in a 64-bit object.
+ */
+ for (relr = relr_start; relr < relr_end; relr++) {
+ u64 *p, r = *relr;
+
+ if ((r & 1) == 0) {
+ place = (u64 *)(r + offset);
+ *place++ += offset;
+ } else {
+ for (p = place; r; p++) {
+ r >>= 1;
+ if (r & 1)
+ *p += offset;
+ }
+ place += 63;
+ }
+ }
+}
+
+extern void idmap_cpu_replace_ttbr1(void *pgdir);
+
+static void map_range(pgd_t **pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+ int level, pte_t *tbl, bool may_use_cont)
+{
+ u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
+ u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
+ int lshift = (3 - level) * (PAGE_SHIFT - 3);
+ u64 lmask = (PAGE_SIZE << lshift) - 1;
+
+ // Advance tbl to the entry that covers start
+ tbl += (start >> (lshift + PAGE_SHIFT)) % BIT(PAGE_SHIFT - 3);
+
+ // Set the right block/page bits for this level unless we are
+ // clearing the mapping
+ if (protval)
+ protval |= (level < 3) ? PMD_TYPE_SECT : PTE_TYPE_PAGE;
+
+ while (start < end) {
+ u64 next = min((start | lmask) + 1, end);
+
+ if (level < 3 &&
+ (start & lmask || next & lmask || pa & lmask)) {
+ // This chunk needs a finer grained mapping
+ // Put down a table mapping if necessary and recurse
+ if (pte_none(*tbl)) {
+ *tbl = __pte(__phys_to_pte_val((u64)*pgd) |
+ PMD_TYPE_TABLE);
+ *pgd += PTRS_PER_PTE;
+ }
+ map_range(pgd, start, next, pa, prot, level + 1,
+ (pte_t *)__pte_to_phys(*tbl), may_use_cont);
+ } else {
+ // Start a contiguous range if start and pa are
+ // suitably aligned
+ if (((start | pa) & cmask) == 0 && may_use_cont)
+ protval |= PTE_CONT;
+ // Clear the contiguous attribute if the remaining
+ // range does not cover a contiguous block
+ if ((end & ~cmask) <= start)
+ protval &= ~PTE_CONT;
+ // Put down a block or page mapping
+ *tbl = __pte(__phys_to_pte_val(pa) | protval);
+ }
+ pa += next - start;
+ start = next;
+ tbl++;
+ }
+}
+
+static void map_segment(pgd_t **pgd, u64 va_offset, void *start, void *end,
+ pgprot_t prot, bool may_use_cont)
+{
+ map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ prot, 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+ may_use_cont);
+}
+
+static void unmap_segment(u64 va_offset, void *start, void *end)
+{
+ map_range(NULL, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ ((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
+ __pgprot(0), 4 - CONFIG_PGTABLE_LEVELS, (pte_t *)init_pg_dir,
+ false);
+}
+
+/*
+ * Open coded check for BTI, only for use to determine configuration
+ * for early mappings for before the cpufeature code has run.
+ */
+static bool arm64_early_this_cpu_has_bti(void)
+{
+ u64 pfr1;
+
+ if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
+ return false;
+
+ pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(pfr1,
+ ID_AA64PFR1_BT_SHIFT);
+}
+
+static bool arm64_early_this_cpu_has_e0pd(void)
+{
+ u64 mmfr2;
+
+ if (!IS_ENABLED(CONFIG_ARM64_E0PD))
+ return false;
+
+ mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr2,
+ ID_AA64MMFR2_E0PD_SHIFT);
+}
+
+static void map_kernel(void *fdt, u64 kaslr_offset, u64 va_offset)
+{
+ pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+ pgprot_t prot;
+
+ if (cmdline_has(fdt, "rodata=off"))
+ text_prot = PAGE_KERNEL_EXEC;
+
+ // If we have a CPU that supports BTI and a kernel built for
+ // BTI then mark the kernel executable text as guarded pages
+ // now so we don't have to rewrite the page tables later.
+ if (arm64_early_this_cpu_has_bti() && !cmdline_has(fdt, "arm64.nobti"))
+ text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP);
+
+ // Assume that any CPU that does not implement E0PD needs KPTI to
+ // ensure that KASLR randomized addresses will not leak. This means we
+ // need to use non-global mappings for the kernel text and data.
+ if (!arm64_early_this_cpu_has_e0pd() && kaslr_offset >= MIN_KIMG_ALIGN) {
+ text_prot = __pgprot_modify(text_prot, PTE_NG, PTE_NG);
+ data_prot = __pgprot_modify(data_prot, PTE_NG, PTE_NG);
+ }
+
+ // Map all code read-write on the first pass for relocation processing
+ prot = IS_ENABLED(CONFIG_RELOCATABLE) ? data_prot : text_prot;
+
+ map_segment(&pgdp, va_offset, _stext, _etext, prot, true);
+ map_segment(&pgdp, va_offset, __start_rodata, __inittext_begin, data_prot, false);
+ map_segment(&pgdp, va_offset, __inittext_begin, __inittext_end, prot, false);
+ map_segment(&pgdp, va_offset, __initdata_begin, __initdata_end, data_prot, false);
+ map_segment(&pgdp, va_offset, _data, init_pg_dir, data_prot, true);
+ // omit [init_pg_dir, _end] - it doesn't need a kernel mapping
+ dsb(ishst);
+
+ idmap_cpu_replace_ttbr1(init_pg_dir);
+
+ if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+ relocate_kernel(kaslr_offset);
+
+ // Unmap the text region before remapping it, to avoid
+ // potential TLB conflicts on the contiguous descriptors. This
+ // assumes that it is permitted to clear the valid bit on a
+ // live descriptor with the CONT bit set.
+ unmap_segment(va_offset, _stext, _etext);
+ dsb(ishst);
+ isb();
+ __tlbi(vmalle1);
+ isb();
+
+ // Remap these segments with different permissions
+ // No new page table allocations should be needed
+ map_segment(NULL, va_offset, _stext, _etext, text_prot, true);
+ map_segment(NULL, va_offset, __inittext_begin, __inittext_end,
+ text_prot, false);
+ dsb(ishst);
+ }
+}
+
+asmlinkage u64 early_map_kernel(void *fdt)
+{
+ u64 kaslr_seed = 0, kaslr_offset = 0;
+ u64 va_base = KIMAGE_VADDR;
+ u64 pa_base = (u64)&_text;
+
+ // Clear the initial page tables before populating them
+ memset(init_pg_dir, 0, init_pg_end - init_pg_dir);
+
+ // The virtual KASLR displacement modulo 2MiB is decided by the
+ // physical placement of the image, as otherwise, we might not be able
+ // to create the early kernel mapping using 2 MiB block descriptors. So
+ // take the low bits of the KASLR offset from the physical address, and
+ // fill in the high bits from the seed.
+ if (IS_ENABLED(CONFIG_RELOCATABLE)) {
+ kaslr_offset = pa_base & (MIN_KIMG_ALIGN - 1);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ kaslr_seed = kaslr_early_init(fdt);
+ kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
+ }
+ va_base += kaslr_offset;
+ }
+
+ map_kernel(fdt, kaslr_offset, va_base - pa_base);
+
+ // Return the lower 16 bits of the seed - this will be
+ // used to randomize the linear map
+ return kaslr_seed & U16_MAX;
+}
deleted file mode 100644
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright 2022 Google LLC
-// Author: Ard Biesheuvel <ardb@google.com>
-
-// NOTE: code in this file runs *very* early, and is not permitted to use
-// global variables or anything that relies on absolute addressing.
-
-#include <linux/libfdt.h>
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/string.h>
-
-#include <asm/archrandom.h>
-#include <asm/memory.h>
-
-/* taken from lib/string.c */
-static char *__strstr(const char *s1, const char *s2)
-{
- size_t l1, l2;
-
- l2 = strlen(s2);
- if (!l2)
- return (char *)s1;
- l1 = strlen(s1);
- while (l1 >= l2) {
- l1--;
- if (!memcmp(s1, s2, l2))
- return (char *)s1;
- s1++;
- }
- return NULL;
-}
-static bool cmdline_contains_nokaslr(const u8 *cmdline)
-{
- const u8 *str;
-
- str = __strstr(cmdline, "nokaslr");
- return str == cmdline || (str > cmdline && *(str - 1) == ' ');
-}
-
-static bool is_kaslr_disabled_cmdline(void *fdt)
-{
- if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
- int node;
- const u8 *prop;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- goto out;
-
- prop = fdt_getprop(fdt, node, "bootargs", NULL);
- if (!prop)
- goto out;
-
- if (cmdline_contains_nokaslr(prop))
- return true;
-
- if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
- goto out;
-
- return false;
- }
-out:
- return cmdline_contains_nokaslr(CONFIG_CMDLINE);
-}
-
-static u64 get_kaslr_seed(void *fdt)
-{
- int node, len;
- fdt64_t *prop;
- u64 ret;
-
- node = fdt_path_offset(fdt, "/chosen");
- if (node < 0)
- return 0;
-
- prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
- if (!prop || len != sizeof(u64))
- return 0;
-
- ret = fdt64_to_cpu(*prop);
- *prop = 0;
- return ret;
-}
-
-asmlinkage u64 kaslr_early_init(void *fdt)
-{
- u64 seed;
-
- if (is_kaslr_disabled_cmdline(fdt))
- return 0;
-
- seed = get_kaslr_seed(fdt);
- if (!seed) {
-#ifdef CONFIG_ARCH_RANDOM
- if (!__early_cpu_has_rndr() ||
- !__arm64_rndr((unsigned long *)&seed))
-#endif
- return 0;
- }
-
- /*
- * OK, so we are proceeding with KASLR enabled. Calculate a suitable
- * kernel image offset from the seed. Let's place the kernel in the
- * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
- * the lower and upper quarters to avoid colliding with other
- * allocations.
- */
- return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
-}
@@ -257,15 +257,15 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
- __rela_start = .;
+ __pi_rela_start = .;
*(.rela .rela*)
- __rela_end = .;
+ __pi_rela_end = .;
}
.relr.dyn : ALIGN(8) {
- __relr_start = .;
+ __pi_relr_start = .;
*(.relr.dyn)
- __relr_end = .;
+ __pi_relr_end = .;
}
. = ALIGN(SEGMENT_ALIGN);
@@ -309,11 +309,14 @@ SECTIONS
BSS_SECTION(SBSS_ALIGN, 0, 0)
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(SEGMENT_ALIGN);
init_pg_dir = .;
. += INIT_DIR_SIZE;
init_pg_end = .;
+ . += SZ_4K;
+ primary_init_stack = .;
+
. = ALIGN(SEGMENT_ALIGN);
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
@@ -196,6 +196,7 @@ SYM_FUNC_START(idmap_cpu_replace_ttbr1)
ret
SYM_FUNC_END(idmap_cpu_replace_ttbr1)
+SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
.popsection
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
The asm version of the kernel mapping code works fine for creating a coarse grained identity map, but for mapping the kernel down to its exact boundaries with the right attributes, it is not suitable. This is why we create a preliminary RWX kernel mapping first, and then rebuild it from scratch later on. So let's reimplement this, and along with it the relocation routines, in C, in a way that will make it unnecessary to create the kernel page tables yet another time in paging_init(). Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/include/asm/pgtable-prot.h | 2 + arch/arm64/kernel/Makefile | 4 +- arch/arm64/kernel/head.S | 157 +-------- arch/arm64/kernel/image-vars.h | 16 + arch/arm64/kernel/pi/Makefile | 2 +- arch/arm64/kernel/pi/early_map_kernel.c | 368 ++++++++++++++++++++ arch/arm64/kernel/pi/kaslr_early.c | 112 ------ arch/arm64/kernel/vmlinux.lds.S | 13 +- arch/arm64/mm/proc.S | 1 + 9 files changed, 410 insertions(+), 265 deletions(-)