diff mbox series

[v13,11/40] arm64/gcs: Provide basic EL2 setup to allow GCS usage at EL0 and EL1

Message ID 20241001-arm64-gcs-v13-11-222b78d87eee@kernel.org (mailing list archive)
State Handled Elsewhere
Headers show
Series arm64/gcs: Provide support for GCS in userspace | expand

Commit Message

Mark Brown Oct. 1, 2024, 10:58 p.m. UTC
There is a control HCRX_EL2.GCSEn which must be set to allow GCS
features to take effect at lower ELs and also fine grained traps for GCS
usage at EL0 and EL1.  Configure all these to allow GCS usage by EL0 and
EL1.

We also initialise GCSCR_EL1 and GCSCRE0_EL1 to ensure that we can
execute function call instructions without faulting regardless of the
state when the kernel is started.

Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm64/include/asm/el2_setup.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Nathan Chancellor Oct. 9, 2024, 8:49 p.m. UTC | #1
Hi Mark,

On Tue, Oct 01, 2024 at 11:58:50PM +0100, Mark Brown wrote:
> There is a control HCRX_EL2.GCSEn which must be set to allow GCS
> features to take effect at lower ELs and also fine grained traps for GCS
> usage at EL0 and EL1.  Configure all these to allow GCS usage by EL0 and
> EL1.
> 
> We also initialise GCSCR_EL1 and GCSCRE0_EL1 to ensure that we can
> execute function call instructions without faulting regardless of the
> state when the kernel is started.
> 
> Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Mark Brown <broonie@kernel.org>

I just bisected a build failure from a failed linker script assertion
that I see with allmodconfig to this change in -next as commit
ff5181d8a2a8 ("arm64/gcs: Provide basic EL2 setup to allow GCS usage at
EL0 and EL1"):

  $ make -skj"$(nproc)" ARCH=arm64 CROSS_COMPILE=aarch64-linux- mrproper allmodconfig vmlinux
  aarch64-linux-ld: HYP init code too big
  make[4]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1
  ...

I see this with both GCC 14 and clang 19, in case toolchain version
matters. Bisect log included as well.

Cheers,
Nathan

# bad: [b6270c3bca987530eafc6a15f9d54ecd0033e0e3] Add linux-next specific files for 20241009
# good: [75b607fab38d149f232f01eae5e6392b394dd659] Merge tag 'sched_ext-for-6.12-rc2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
git bisect start 'b6270c3bca987530eafc6a15f9d54ecd0033e0e3' '75b607fab38d149f232f01eae5e6392b394dd659'
# bad: [76d36603db22f0f0774c19147b25f5a0bcac64e6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git
git bisect bad 76d36603db22f0f0774c19147b25f5a0bcac64e6
# bad: [e90a3e76b4b8080f633a167179f3a76b93077270] Merge branch 'renesas-clk' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git
git bisect bad e90a3e76b4b8080f633a167179f3a76b93077270
# good: [dd60a5d8b8ac2e7dc6810182a6dbc251a746f09e] Merge branch 'perf-tools-next' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git
git bisect good dd60a5d8b8ac2e7dc6810182a6dbc251a746f09e
# bad: [fa74fd4773673726bfa8f89d15805d8a1b26f855] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/khilman/linux-omap.git
git bisect bad fa74fd4773673726bfa8f89d15805d8a1b26f855
# bad: [a7833d5793f83512f1fb6f36fa7588ea03da6b1b] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/amlogic/linux.git
git bisect bad a7833d5793f83512f1fb6f36fa7588ea03da6b1b
# bad: [c9c0de66c9b5f295f09a116f15401465bdd13263] Merge branch 'for-next/core' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
git bisect bad c9c0de66c9b5f295f09a116f15401465bdd13263
# bad: [506496bcbb4204c9ff5cfe82b1b90e1f14366992] arm64/gcs: Ensure that new threads have a GCS
git bisect bad 506496bcbb4204c9ff5cfe82b1b90e1f14366992
# good: [d0aa2b4351862cc2ce8d97e00c96bffc02ea16af] arm64/gcs: Provide put_user_gcs()
git bisect good d0aa2b4351862cc2ce8d97e00c96bffc02ea16af
# bad: [6497b66ba6945f142902c7e8fce86e47016ead1c] arm64/mm: Map pages for guarded control stack
git bisect bad 6497b66ba6945f142902c7e8fce86e47016ead1c
# bad: [6487c963083c24ede289d4267ffa60a9db668cd4] arm64/cpufeature: Runtime detection of Guarded Control Stack (GCS)
git bisect bad 6487c963083c24ede289d4267ffa60a9db668cd4
# bad: [ff5181d8a2a82c982276a7e035896185c390e856] arm64/gcs: Provide basic EL2 setup to allow GCS usage at EL0 and EL1
git bisect bad ff5181d8a2a82c982276a7e035896185c390e856
# first bad commit: [ff5181d8a2a82c982276a7e035896185c390e856] arm64/gcs: Provide basic EL2 setup to allow GCS usage at EL0 and EL1
Marc Zyngier Oct. 10, 2024, 3:18 p.m. UTC | #2
On Wed, 09 Oct 2024 21:49:03 +0100,
Nathan Chancellor <nathan@kernel.org> wrote:
> 
> Hi Mark,
> 
> On Tue, Oct 01, 2024 at 11:58:50PM +0100, Mark Brown wrote:
> > There is a control HCRX_EL2.GCSEn which must be set to allow GCS
> > features to take effect at lower ELs and also fine grained traps for GCS
> > usage at EL0 and EL1.  Configure all these to allow GCS usage by EL0 and
> > EL1.
> > 
> > We also initialise GCSCR_EL1 and GCSCRE0_EL1 to ensure that we can
> > execute function call instructions without faulting regardless of the
> > state when the kernel is started.
> > 
> > Reviewed-by: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > Signed-off-by: Mark Brown <broonie@kernel.org>
> 
> I just bisected a build failure from a failed linker script assertion
> that I see with allmodconfig to this change in -next as commit
> ff5181d8a2a8 ("arm64/gcs: Provide basic EL2 setup to allow GCS usage at
> EL0 and EL1"):
> 
>   $ make -skj"$(nproc)" ARCH=arm64 CROSS_COMPILE=aarch64-linux- mrproper allmodconfig vmlinux
>   aarch64-linux-ld: HYP init code too big
>   make[4]: *** [scripts/Makefile.vmlinux:34: vmlinux] Error 1
>   ...
> 
> I see this with both GCC 14 and clang 19, in case toolchain version
> matters. Bisect log included as well.

Grmbl... 16 bytes too big. The hack below buys us about ~600 bytes by
removing some duplication, but we're losing half of the space to the
vectors.

Anyway, this is very lightly tested and it may eat your box.

Thanks,

	M.

From 20c98d2647c11db1e40768f92c5998ff5d764a3a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 10 Oct 2024 16:13:26 +0100
Subject: [PATCH] KVM: arm64: Shave a few bytes from the EL2 idmap code

Our idmap is becoming too big, to the point where it doesn't fit in
a 4kB page anymore.

There are some low-hanging fruits though, such as the el2_init_state
horror that is expanded 3 times in the kernel. Let's at least limit
ourselves to two copies, which makes the kernel link again.

At some point, we'll have to have a better way of doing this.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20241009204903.GA3353168@thelio-3990X
---
 arch/arm64/include/asm/kvm_asm.h   |  1 +
 arch/arm64/kernel/asm-offsets.c    |  1 +
 arch/arm64/kvm/hyp/nvhe/hyp-init.S | 52 +++++++++++++++++-------------
 3 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b36a3b6cc0116..67afac659231e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -178,6 +178,7 @@ struct kvm_nvhe_init_params {
 	unsigned long hcr_el2;
 	unsigned long vttbr;
 	unsigned long vtcr;
+	unsigned long tmp;
 };
 
 /*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 27de1dddb0abe..b21dd24b8efc3 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -146,6 +146,7 @@ int main(void)
   DEFINE(NVHE_INIT_HCR_EL2,	offsetof(struct kvm_nvhe_init_params, hcr_el2));
   DEFINE(NVHE_INIT_VTTBR,	offsetof(struct kvm_nvhe_init_params, vttbr));
   DEFINE(NVHE_INIT_VTCR,	offsetof(struct kvm_nvhe_init_params, vtcr));
+  DEFINE(NVHE_INIT_TMP,		offsetof(struct kvm_nvhe_init_params, tmp));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp));
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 401af1835be6b..fc18662260676 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -24,28 +24,25 @@
 	.align	11
 
 SYM_CODE_START(__kvm_hyp_init)
-	ventry	__invalid		// Synchronous EL2t
-	ventry	__invalid		// IRQ EL2t
-	ventry	__invalid		// FIQ EL2t
-	ventry	__invalid		// Error EL2t
+	ventry	.			// Synchronous EL2t
+	ventry	.			// IRQ EL2t
+	ventry	.			// FIQ EL2t
+	ventry	.			// Error EL2t
 
-	ventry	__invalid		// Synchronous EL2h
-	ventry	__invalid		// IRQ EL2h
-	ventry	__invalid		// FIQ EL2h
-	ventry	__invalid		// Error EL2h
+	ventry	.			// Synchronous EL2h
+	ventry	.			// IRQ EL2h
+	ventry	.			// FIQ EL2h
+	ventry	.			// Error EL2h
 
 	ventry	__do_hyp_init		// Synchronous 64-bit EL1
-	ventry	__invalid		// IRQ 64-bit EL1
-	ventry	__invalid		// FIQ 64-bit EL1
-	ventry	__invalid		// Error 64-bit EL1
+	ventry	.			// IRQ 64-bit EL1
+	ventry	.			// FIQ 64-bit EL1
+	ventry	.			// Error 64-bit EL1
 
-	ventry	__invalid		// Synchronous 32-bit EL1
-	ventry	__invalid		// IRQ 32-bit EL1
-	ventry	__invalid		// FIQ 32-bit EL1
-	ventry	__invalid		// Error 32-bit EL1
-
-__invalid:
-	b	.
+	ventry	.			// Synchronous 32-bit EL1
+	ventry	.			// IRQ 32-bit EL1
+	ventry	.			// FIQ 32-bit EL1
+	ventry	.			// Error 32-bit EL1
 
 	/*
 	 * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
@@ -76,6 +73,13 @@ __do_hyp_init:
 	eret
 SYM_CODE_END(__kvm_hyp_init)
 
+SYM_CODE_START_LOCAL(__kvm_init_el2_state)
+	/* Initialize EL2 CPU state to sane values. */
+	init_el2_state				// Clobbers x0..x2
+	finalise_el2_state
+	ret
+SYM_CODE_END(__kvm_init_el2_state)
+
 /*
  * Initialize the hypervisor in EL2.
  *
@@ -102,9 +106,12 @@ SYM_CODE_START_LOCAL(___kvm_hyp_init)
 	// TPIDR_EL2 is used to preserve x0 across the macro maze...
 	isb
 	msr	tpidr_el2, x0
-	init_el2_state
-	finalise_el2_state
+	str	lr, [x0, #NVHE_INIT_TMP]
+
+	bl	__kvm_init_el2_state
+
 	mrs	x0, tpidr_el2
+	ldr	lr, [x0, #NVHE_INIT_TMP]
 
 1:
 	ldr	x1, [x0, #NVHE_INIT_TPIDR_EL2]
@@ -199,9 +206,8 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
 
 2:	msr	SPsel, #1			// We want to use SP_EL{1,2}
 
-	/* Initialize EL2 CPU state to sane values. */
-	init_el2_state				// Clobbers x0..x2
-	finalise_el2_state
+	bl	__kvm_init_el2_state
+
 	__init_el2_nvhe_prepare_eret
 
 	/* Enable MMU, set vectors and stack. */
Catalin Marinas Oct. 10, 2024, 5:16 p.m. UTC | #3
On Thu, Oct 10, 2024 at 04:18:13PM +0100, Marc Zyngier wrote:
> From 20c98d2647c11db1e40768f92c5998ff5d764a3a Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@kernel.org>
> Date: Thu, 10 Oct 2024 16:13:26 +0100
> Subject: [PATCH] KVM: arm64: Shave a few bytes from the EL2 idmap code
> 
> Our idmap is becoming too big, to the point where it doesn't fit in
> a 4kB page anymore.
> 
> There are some low-hanging fruits though, such as the el2_init_state
> horror that is expanded 3 times in the kernel. Let's at least limit
> ourselves to two copies, which makes the kernel link again.
> 
> At some point, we'll have to have a better way of doing this.
> 
> Reported-by: Nathan Chancellor <nathan@kernel.org>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Link: https://lore.kernel.org/r/20241009204903.GA3353168@thelio-3990X

Thanks Marc for the quick fix. It looks fine to me, it will keep the
linker quiet for a while. I pushed it to arm64 for-kernelci for the time
being, see if anything falls apart. I'll apply it properly once it gets
a bit more testing.
Marc Zyngier Oct. 11, 2024, 12:55 p.m. UTC | #4
On Thu, 10 Oct 2024 18:16:52 +0100,
Catalin Marinas <catalin.marinas@arm.com> wrote:
> 
> On Thu, Oct 10, 2024 at 04:18:13PM +0100, Marc Zyngier wrote:
> > From 20c98d2647c11db1e40768f92c5998ff5d764a3a Mon Sep 17 00:00:00 2001
> > From: Marc Zyngier <maz@kernel.org>
> > Date: Thu, 10 Oct 2024 16:13:26 +0100
> > Subject: [PATCH] KVM: arm64: Shave a few bytes from the EL2 idmap code
> > 
> > Our idmap is becoming too big, to the point where it doesn't fit in
> > a 4kB page anymore.
> > 
> > There are some low-hanging fruits though, such as the el2_init_state
> > horror that is expanded 3 times in the kernel. Let's at least limit
> > ourselves to two copies, which makes the kernel link again.
> > 
> > At some point, we'll have to have a better way of doing this.
> > 
> > Reported-by: Nathan Chancellor <nathan@kernel.org>
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > Link: https://lore.kernel.org/r/20241009204903.GA3353168@thelio-3990X
> 
> Thanks Marc for the quick fix. It looks fine to me, it will keep the
> linker quiet for a while. I pushed it to arm64 for-kernelci for the time
> being, see if anything falls apart. I'll apply it properly once it gets
> a bit more testing.

Works for me. Bu if that helps, I can also queue it as a KVM fix for
6.12, leaving the GCS branch unencumbered. Just let me know.

	M.
Catalin Marinas Oct. 14, 2024, 4:31 p.m. UTC | #5
On Fri, Oct 11, 2024 at 01:55:05PM +0100, Marc Zyngier wrote:
> On Thu, 10 Oct 2024 18:16:52 +0100,
> Catalin Marinas <catalin.marinas@arm.com> wrote:
> > 
> > On Thu, Oct 10, 2024 at 04:18:13PM +0100, Marc Zyngier wrote:
> > > From 20c98d2647c11db1e40768f92c5998ff5d764a3a Mon Sep 17 00:00:00 2001
> > > From: Marc Zyngier <maz@kernel.org>
> > > Date: Thu, 10 Oct 2024 16:13:26 +0100
> > > Subject: [PATCH] KVM: arm64: Shave a few bytes from the EL2 idmap code
> > > 
> > > Our idmap is becoming too big, to the point where it doesn't fit in
> > > a 4kB page anymore.
> > > 
> > > There are some low-hanging fruits though, such as the el2_init_state
> > > horror that is expanded 3 times in the kernel. Let's at least limit
> > > ourselves to two copies, which makes the kernel link again.
> > > 
> > > At some point, we'll have to have a better way of doing this.
> > > 
> > > Reported-by: Nathan Chancellor <nathan@kernel.org>
> > > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > > Link: https://lore.kernel.org/r/20241009204903.GA3353168@thelio-3990X
> > 
> > Thanks Marc for the quick fix. It looks fine to me, it will keep the
> > linker quiet for a while. I pushed it to arm64 for-kernelci for the time
> > being, see if anything falls apart. I'll apply it properly once it gets
> > a bit more testing.
> 
> Works for me. But if that helps, I can also queue it as a KVM fix for
> 6.12, leaving the GCS branch unencumbered. Just let me know.

That would help, thanks. The for-kernelci branch merges the latest mainline
rc anyway, so it will no longer blow up allmodconfig tests.
Catalin Marinas Oct. 15, 2024, 1:05 p.m. UTC | #6
On Thu, Oct 10, 2024 at 04:18:13PM +0100, Marc Zyngier wrote:
> From 20c98d2647c11db1e40768f92c5998ff5d764a3a Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@kernel.org>
> Date: Thu, 10 Oct 2024 16:13:26 +0100
> Subject: [PATCH] KVM: arm64: Shave a few bytes from the EL2 idmap code
> 
> Our idmap is becoming too big, to the point where it doesn't fit in
> a 4kB page anymore.
> 
> There are some low-hanging fruits though, such as the el2_init_state
> horror that is expanded 3 times in the kernel. Let's at least limit
> ourselves to two copies, which makes the kernel link again.
> 
> At some point, we'll have to have a better way of doing this.
> 
> Reported-by: Nathan Chancellor <nathan@kernel.org>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> Link: https://lore.kernel.org/r/20241009204903.GA3353168@thelio-3990X

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>

Please merge it as a KVM fix if you can. Thanks.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index e0ffdf13a18b..27086a81eae3 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -27,6 +27,14 @@ 
 	ubfx	x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
 	cbz	x0, .Lskip_hcrx_\@
 	mov_q	x0, HCRX_HOST_FLAGS
+
+        /* Enable GCS if supported */
+	mrs_s	x1, SYS_ID_AA64PFR1_EL1
+	ubfx	x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+	cbz	x1, .Lset_hcrx_\@
+	orr	x0, x0, #HCRX_EL2_GCSEn
+
+.Lset_hcrx_\@:
 	msr_s	SYS_HCRX_EL2, x0
 .Lskip_hcrx_\@:
 .endm
@@ -200,6 +208,16 @@ 
 	orr	x0, x0, #HFGxTR_EL2_nPOR_EL0
 
 .Lskip_poe_fgt_\@:
+	/* GCS depends on PIE so we don't check it if PIE is absent */
+	mrs_s	x1, SYS_ID_AA64PFR1_EL1
+	ubfx	x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+	cbz	x1, .Lset_fgt_\@
+
+	/* Disable traps of access to GCS registers at EL0 and EL1 */
+	orr	x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK
+	orr	x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK
+
+.Lset_fgt_\@:
 	msr_s	SYS_HFGRTR_EL2, x0
 	msr_s	SYS_HFGWTR_EL2, x0
 	msr_s	SYS_HFGITR_EL2, xzr
@@ -215,6 +233,17 @@ 
 .Lskip_fgt_\@:
 .endm
 
+.macro __init_el2_gcs
+	mrs_s	x1, SYS_ID_AA64PFR1_EL1
+	ubfx	x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
+	cbz	x1, .Lskip_gcs_\@
+
+	/* Ensure GCS is not enabled when we start trying to do BLs */
+	msr_s	SYS_GCSCR_EL1, xzr
+	msr_s	SYS_GCSCRE0_EL1, xzr
+.Lskip_gcs_\@:
+.endm
+
 .macro __init_el2_nvhe_prepare_eret
 	mov	x0, #INIT_PSTATE_EL1
 	msr	spsr_el2, x0
@@ -240,6 +269,7 @@ 
 	__init_el2_nvhe_idregs
 	__init_el2_cptr
 	__init_el2_fgt
+        __init_el2_gcs
 .endm
 
 #ifndef __KVM_NVHE_HYPERVISOR__