diff mbox series

[4/4] arm64: kexec: Change relocate_kernel to C code

Message ID 20240328115656.24090-5-piliu@redhat.com (mailing list archive)
State New, archived
Headers show
Series arm64: kexec: translate relocate_kernel.S to C languange | expand

Commit Message

Pingfan Liu March 28, 2024, 11:56 a.m. UTC
The kexec_relocate.o is a self-contained section, and it should be PIE.

Beside that, C function call requires stack, which is built on the idmap
of the rear of kimage->control_code_page.

Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
To: linux-arm-kernel@lists.infradead.org
---
 arch/arm64/kernel/Makefile          |   1 +
 arch/arm64/kernel/asm-offsets.c     |  10 --
 arch/arm64/kernel/machine_kexec.c   |   9 +-
 arch/arm64/kernel/relocate_kernel.S | 100 --------------
 arch/arm64/kernel/relocate_kernel.c | 197 ++++++++++++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S     |   1 +
 6 files changed, 206 insertions(+), 112 deletions(-)
 delete mode 100644 arch/arm64/kernel/relocate_kernel.S
 create mode 100644 arch/arm64/kernel/relocate_kernel.c

Comments

Mark Rutland April 2, 2024, 9:55 a.m. UTC | #1
On Thu, Mar 28, 2024 at 07:56:54PM +0800, Pingfan Liu wrote:
> The kexec_relocate.o is a self-contained section, and it should be PIE.
> 
> Beside that, C function call requires stack, which is built on the idmap
> of the rear of kimage->control_code_page.
> 
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
> To: linux-arm-kernel@lists.infradead.org
> ---
>  arch/arm64/kernel/Makefile          |   1 +
>  arch/arm64/kernel/asm-offsets.c     |  10 --
>  arch/arm64/kernel/machine_kexec.c   |   9 +-
>  arch/arm64/kernel/relocate_kernel.S | 100 --------------
>  arch/arm64/kernel/relocate_kernel.c | 197 ++++++++++++++++++++++++++++
>  arch/arm64/kernel/vmlinux.lds.S     |   1 +
>  6 files changed, 206 insertions(+), 112 deletions(-)
>  delete mode 100644 arch/arm64/kernel/relocate_kernel.S
>  create mode 100644 arch/arm64/kernel/relocate_kernel.c

> +static void  __kexec_section turn_mmu_off(void)
> +{
> +	u64 tmp = INIT_SCTLR_EL1_MMU_OFF;
> +
> +	/* pre_disable_mmu_workaround */
> +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
> +	isb();
> +#endif
> +	write_sysreg(tmp, sctlr_el1);
> +	isb();
> +}

Disabling the MMU cannot be done from C; as soon as we write to SCTLR_EL1 (even
before the ISB) we cannot safely access the stack until that has been explcitly
cleaned+invalidated to the PoC (and that has to be done by VA).

I don't think we should bother trying to move this to C; the MMU-off portions
should remain as asssembly.

If you want to move the MMU-on portions to C, then *maybe* that's worthwhile, but 
given the diffstat I reckon it's better to leave this all as asm for now. We
can make this more legibile without converting it to C.

Mark.
diff mbox series

Patch

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 467cb7117273..5fc539c6d094 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -13,6 +13,7 @@  CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 # checks due to randomize_kstack_offset.
 CFLAGS_REMOVE_syscall.o	 = -fstack-protector -fstack-protector-strong
 CFLAGS_syscall.o	+= -fno-stack-protector
+CFLAGS_relocate_kernel.o	+= -fPIE
 
 # When KASAN is enabled, a stack trace is recorded for every alloc/free, which
 # can significantly impact performance. Avoid instrumenting the stack trace
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5a7dbbe0ce63..ce3f3bed76a4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -186,16 +186,6 @@  int main(void)
 #endif
   BLANK();
 #endif
-#ifdef CONFIG_KEXEC_CORE
-  DEFINE(KIMAGE_ARCH_DTB_MEM,		offsetof(struct kimage, arch.dtb_mem));
-  DEFINE(KIMAGE_ARCH_EL2_VECTORS,	offsetof(struct kimage, arch.el2_vectors));
-  DEFINE(KIMAGE_ARCH_ZERO_PAGE,		offsetof(struct kimage, arch.zero_page));
-  DEFINE(KIMAGE_ARCH_PHYS_OFFSET,	offsetof(struct kimage, arch.phys_offset));
-  DEFINE(KIMAGE_ARCH_TTBR1,		offsetof(struct kimage, arch.ttbr1));
-  DEFINE(KIMAGE_HEAD,			offsetof(struct kimage, head));
-  DEFINE(KIMAGE_START,			offsetof(struct kimage, start));
-  BLANK();
-#endif
 #ifdef CONFIG_FUNCTION_TRACER
   DEFINE(FTRACE_OPS_FUNC,		offsetof(struct ftrace_ops, func));
 #endif
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index b4ae24dcac8c..31d96655664b 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -198,13 +198,18 @@  void machine_kexec(struct kimage *kimage)
 		restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
 			0, 0);
 	} else {
-		void (*kernel_reloc)(struct kimage *kimage);
+		void (*kernel_reloc)(struct kimage *kimage, unsigned long sp);
+		u64 new_sp = (u64)(page_to_pfn(kimage->control_code_page) << PAGE_SHIFT)
+					+ KEXEC_CONTROL_PAGE_SIZE;
 
 		if (is_hyp_nvhe())
 			__hyp_set_vectors(kimage->arch.el2_vectors);
 		cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
 		kernel_reloc = (void *)kimage->arch.kern_reloc;
-		kernel_reloc(kimage);
+		pr_info("jump to relocation at: 0x%llx, with sp:0x%llx\n",
+			(u64)kernel_reloc, new_sp);
+		/* new_sp is accessible through idmap */
+		kernel_reloc(kimage, new_sp);
 	}
 
 	BUG(); /* Should never get here. */
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
deleted file mode 100644
index 413f899e4ac6..000000000000
--- a/arch/arm64/kernel/relocate_kernel.S
+++ /dev/null
@@ -1,100 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * kexec for arm64
- *
- * Copyright (C) Linaro.
- * Copyright (C) Huawei Futurewei Technologies.
- * Copyright (C) 2021, Microsoft Corporation.
- * Pasha Tatashin <pasha.tatashin@soleen.com>
- */
-
-#include <linux/kexec.h>
-#include <linux/linkage.h>
-
-#include <asm/assembler.h>
-#include <asm/kexec.h>
-#include <asm/page.h>
-#include <asm/sysreg.h>
-#include <asm/virt.h>
-
-.macro turn_off_mmu tmp1, tmp2
-	mov_q   \tmp1, INIT_SCTLR_EL1_MMU_OFF
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, \tmp1
-	isb
-.endm
-
-.section    ".kexec_relocate.text", "ax"
-/*
- * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
- *
- * The memory that the old kernel occupies may be overwritten when copying the
- * new image to its final location.  To assure that the
- * arm64_relocate_new_kernel routine which does that copy is not overwritten,
- * all code and data needed by arm64_relocate_new_kernel must be between the
- * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
- * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
- * safe memory that has been set up to be preserved during the copy operation.
- */
-SYM_CODE_START(arm64_relocate_new_kernel)
-	/*
-	 * The kimage structure isn't allocated specially and may be clobbered
-	 * during relocation. We must load any values we need from it prior to
-	 * any relocation occurring.
-	 */
-	ldr	x28, [x0, #KIMAGE_START]
-	ldr	x27, [x0, #KIMAGE_ARCH_EL2_VECTORS]
-	ldr	x26, [x0, #KIMAGE_ARCH_DTB_MEM]
-
-	/* Setup the list loop variables. */
-	ldr	x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */
-	ldr	x17, [x0, #KIMAGE_ARCH_TTBR1]	/* x17 = linear map copy */
-	ldr	x16, [x0, #KIMAGE_HEAD]		/* x16 = kimage_head */
-	ldr	x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET]	/* x22 phys_offset */
-	raw_dcache_line_size x15, x1		/* x15 = dcache line size */
-	break_before_make_ttbr_switch	x18, x17, x1, x2 /* set linear map */
-.Lloop:
-	and	x12, x16, PAGE_MASK		/* x12 = addr */
-	sub	x12, x12, x22			/* Convert x12 to virt */
-	/* Test the entry flags. */
-.Ltest_source:
-	tbz	x16, IND_SOURCE_BIT, .Ltest_indirection
-
-	/* Invalidate dest page to PoC. */
-	mov	x19, x13
-	copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8
-	add	x1, x19, #PAGE_SIZE
-	dcache_by_myline_op civac, sy, x19, x1, x15, x20
-	b	.Lnext
-.Ltest_indirection:
-	tbz	x16, IND_INDIRECTION_BIT, .Ltest_destination
-	mov	x14, x12			/* ptr = addr */
-	b	.Lnext
-.Ltest_destination:
-	tbz	x16, IND_DESTINATION_BIT, .Lnext
-	mov	x13, x12			/* dest = addr */
-.Lnext:
-	ldr	x16, [x14], #8			/* entry = *ptr++ */
-	tbz	x16, IND_DONE_BIT, .Lloop	/* while (!(entry & DONE)) */
-	/* wait for writes from copy_page to finish */
-	dsb	nsh
-	ic	iallu
-	dsb	nsh
-	isb
-	turn_off_mmu x12, x13
-
-	/* Start new image. */
-	cbz	x27, .Lel1
-	mov	x1, x28				/* kernel entry point */
-	mov	x2, x26				/* dtb address */
-	mov	x3, xzr
-	mov	x4, xzr
-	mov     x0, #HVC_SOFT_RESTART
-	hvc	#0				/* Jumps from el2 */
-.Lel1:
-	mov	x0, x26				/* dtb address */
-	mov	x1, xzr
-	mov	x2, xzr
-	mov	x3, xzr
-	br	x28				/* Jumps from el1 */
-SYM_CODE_END(arm64_relocate_new_kernel)
diff --git a/arch/arm64/kernel/relocate_kernel.c b/arch/arm64/kernel/relocate_kernel.c
new file mode 100644
index 000000000000..348515a0f497
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.c
@@ -0,0 +1,197 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ * Copyright (C) 2021, Microsoft Corporation.
+ * Pasha Tatashin <pasha.tatashin@soleen.com>
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+
+#include <linux/kconfig.h>
+#include <linux/kexec.h>
+#include <linux/compiler_types.h>
+
+#include <asm/pgtable-hwdef.h>
+#include <asm/cpufeature.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+#define __kexec_section __noinstr_section(".kexec_relocate.text")
+#define __kexec_entry_section __noinstr_section(".kexec_relocate.entry.text")
+
+static u64 __kexec_section offset_ttbr1(u64 ttbr)
+{
+#ifdef CONFIG_ARM64_VA_BITS_52
+	u64 tmp;
+
+	tmp = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
+	tmp &= (0xf << ID_AA64MMFR2_EL1_VARange_SHIFT);
+	if (!tmp)
+		ttbr |= TTBR1_BADDR_4852_OFFSET;
+#endif
+	return ttbr;
+}
+
+void __kexec_section make_ttbr1_switch(phys_addr_t zero_page,
+		phys_addr_t pgtable)
+{
+	unsigned long zero_ttbr;
+	unsigned long pgtable_ttbr;
+
+	zero_ttbr = phys_to_ttbr(zero_page);
+	pgtable_ttbr = phys_to_ttbr(pgtable);
+	pgtable_ttbr = offset_ttbr1(pgtable_ttbr);
+
+	write_sysreg(zero_ttbr, ttbr1_el1);
+	isb();
+	__tlbi(vmalle1);
+	dsb(nsh);
+
+	write_sysreg(pgtable_ttbr, ttbr1_el1);
+	isb();
+}
+
+static void __kexec_section sync(void)
+{
+	dsb(nsh);
+	asm volatile("ic iallu");
+	dsb(nsh);
+	isb();
+}
+
+static void  __kexec_section turn_mmu_off(void)
+{
+	u64 tmp = INIT_SCTLR_EL1_MMU_OFF;
+
+	/* pre_disable_mmu_workaround */
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+	isb();
+#endif
+	write_sysreg(tmp, sctlr_el1);
+	isb();
+}
+
+/* The parameter lays out according to the hvc call */
+static void __kexec_section hvc_call(unsigned long vector, unsigned long entry,
+		unsigned long dtb, unsigned long x3, unsigned long x4)
+{
+	asm volatile("hvc #0");
+}
+
+typedef void (*kernel_entry)(u64 dtb, u64 x1, u64 x2, u64 x3);
+
+static __always_inline void relocate_copy_page(void *dst, void *src)
+{
+	int i = PAGE_SIZE >> 3;
+	unsigned long *s, *d;
+
+	s = (unsigned long *)src;
+	d = (unsigned long *)dst;
+	for (int j = 0; j < i; j++, d++, s++)
+		*d = *s;
+}
+
+/* Borrowed from clean_dcache_range_nopatch() in arch/arm64/kernel/alternative.c */
+static __always_inline void clean_dcache_range(u64 d_size, u64 start, u64 end)
+{
+	u64 cur;
+
+	cur = start & ~(d_size - 1);
+	do {
+		/*
+		 * We must clean+invalidate to the PoC in order to avoid
+		 * Cortex-A53 errata 826319, 827319, 824069 and 819472
+		 * (this corresponds to ARM64_WORKAROUND_CLEAN_CACHE)
+		 */
+		asm volatile("dc civac, %0" : : "r" (cur) : "memory");
+	} while (cur += d_size, cur < end);
+}
+
+void __kexec_section __arm64_relocate_new_kernel(struct kimage *kimage)
+{
+	phys_addr_t dtb, el2_vectors, zero_page, ttbr1;
+	u64 start, phys_offset, ctr_el0, d_size;
+	kimage_entry_t *ptr, entry;
+	char *src, *dst;
+
+	zero_page = kimage->arch.zero_page;
+	ttbr1 = kimage->arch.ttbr1;
+	start = kimage->start;
+	dtb = kimage->arch.dtb_mem;
+	el2_vectors = kimage->arch.el2_vectors;
+	phys_offset = kimage->arch.phys_offset;
+	d_size = kimage->arch.d_size;
+
+	make_ttbr1_switch(zero_page, ttbr1);
+
+	/* kimage->head is fetched once */
+	for (ptr = &kimage->head; (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+		   (void *)((entry & PAGE_MASK) - phys_offset) : ptr + 1) {
+
+		if (entry & IND_INDIRECTION)
+			continue;
+		else if (entry & IND_DESTINATION)
+			dst = (char *)((entry & PAGE_MASK) - phys_offset);
+		else if (entry & IND_SOURCE) {
+			src = (char *)((entry & PAGE_MASK) - phys_offset);
+			relocate_copy_page(dst, src);
+			/* Force all cache line in page to PoC */
+			clean_dcache_range(d_size, (u64)dst, (u64)dst + PAGE_SIZE);
+			dst += PAGE_SIZE;
+		}
+
+	}
+	/* wait for writes from copy_page to finish */
+	sync();
+	turn_mmu_off();
+
+	if (!el2_vectors) {
+		kernel_entry entry = (kernel_entry)start;
+
+		entry(dtb, 0, 0, 0);
+	} else {
+		/* Jumps from el2 */
+		hvc_call(HVC_SOFT_RESTART, start, dtb, 0, 0);
+	}
+
+}
+
+extern void __arm64_relocate_new_kernel(struct kimage *image);
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when copying the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * safe memory that has been set up to be preserved during the copy operation.
+ *
+ * Come here through ttbr0, and ttbr1 still takes effect.
+ */
+void __kexec_entry_section arm64_relocate_new_kernel(
+				struct kimage *kimage, unsigned long new_sp)
+{
+	/*
+	 * From now on, no local variable so the new sp can be safely prepared.
+	 * The new stack should be on the control page which is safe during copying
+	 */
+	asm volatile(
+		"mov sp, %0;"
+		"mov x0, %1;"
+		"adrp x2, __arm64_relocate_new_kernel;"
+		"add x2, x2, #:lo12:__arm64_relocate_new_kernel;"
+		"br x2;"
+		:
+		: "r" (new_sp), "r" (kimage)
+		:
+	);
+	/* never return */
+}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 51eb382ab3a4..b6781667783c 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@  jiffies = jiffies_64;
 #define KEXEC_TEXT					\
 	. = ALIGN(SZ_4K);				\
 	__relocate_new_kernel_start = .;		\
+	*(.kexec_relocate.entry.text)			\
 	*(.kexec_relocate.text)				\
 	__relocate_new_kernel_end = .;
 #else