diff mbox series

[v3,34/36] x86/smpboot: Implement a bit spinlock to protect the realmode stack

Message ID 20230508185219.123719053@linutronix.de (mailing list archive)
State Awaiting Upstream
Headers show
Series cpu/hotplug, x86: Reworked parallel CPU bringup | expand

Commit Message

Thomas Gleixner May 8, 2023, 7:44 p.m. UTC
From: Thomas Gleixner <tglx@linutronix.de>

Parallel AP bringup requires that the APs can run fully parallel through
the early startup code including the real mode trampoline.

To prepare for this implement a bit-spinlock to serialize access to the
real mode stack so that parallel upcoming APs are not going to corrupt each
others stack while going through the real mode startup code.

Co-developed-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>


---
 arch/x86/include/asm/realmode.h      |    3 +++
 arch/x86/kernel/head_64.S            |   13 +++++++++++++
 arch/x86/realmode/init.c             |    3 +++
 arch/x86/realmode/rm/trampoline_64.S |   27 ++++++++++++++++++++++-----
 4 files changed, 41 insertions(+), 5 deletions(-)
---

Comments

Peter Zijlstra May 9, 2023, 1:13 p.m. UTC | #1
On Mon, May 08, 2023 at 09:44:22PM +0200, Thomas Gleixner wrote:

> @@ -252,6 +252,17 @@ SYM_INNER_LABEL(secondary_startup_64_no_
>  	movq	TASK_threadsp(%rax), %rsp
>  
>  	/*
> +	 * Now that this CPU is running on its own stack, drop the realmode
> +	 * protection. For the boot CPU the pointer is NULL!
> +	 */
> +	movq	trampoline_lock(%rip), %rax
	movl	$0, (%rax)

> +.Lsetup_gdt:
> +	/*
>  	 * We must switch to a new descriptor in kernel space for the GDT
>  	 * because soon the kernel won't have access anymore to the userspace
>  	 * addresses where we're currently running on. We have to do that here

> --- a/arch/x86/realmode/rm/trampoline_64.S
> +++ b/arch/x86/realmode/rm/trampoline_64.S
> @@ -37,6 +37,24 @@
>  	.text
>  	.code16
>  
> +.macro LOAD_REALMODE_ESP
> +	/*
> +	 * Make sure only one CPU fiddles with the realmode stack
> +	 */
> +.Llock_rm\@:
> +	btl	$0, tr_lock
> +	jnc	2f
> +	pause
> +	jmp	.Llock_rm\@
> +2:
> +	lock
> +	btsl	$0, tr_lock
> +	jc	.Llock_rm\@

Do we really care about performance here; or should we pick the simpler
form? Also, 'lock' is a prefix, not an instruction.

.Llock_rm\@:
	lock btsl	$0, tr_lock;
	jnc		2f
	pause
	jmp		.Llock_rm\@
2:

> +
> +	# Setup stack
> +	movl	$rm_stack_end, %esp
> +.endm
> +
>  	.balign	PAGE_SIZE
Thomas Gleixner May 9, 2023, 1:47 p.m. UTC | #2
On Tue, May 09 2023 at 15:13, Peter Zijlstra wrote:
> On Mon, May 08, 2023 at 09:44:22PM +0200, Thomas Gleixner wrote:
>
> Do we really care about performance here; or should we pick the simpler
> form? Also, 'lock' is a prefix, not an instruction.

Right. KISS is the way to go.
diff mbox series

Patch

--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -52,6 +52,7 @@  struct trampoline_header {
 	u64 efer;
 	u32 cr4;
 	u32 flags;
+	u32 lock;
 #endif
 };
 
@@ -64,6 +65,8 @@  extern unsigned long initial_stack;
 extern unsigned long initial_vc_handler;
 #endif
 
+extern u32 *trampoline_lock;
+
 extern unsigned char real_mode_blob[];
 extern unsigned char real_mode_relocs[];
 
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -252,6 +252,17 @@  SYM_INNER_LABEL(secondary_startup_64_no_
 	movq	TASK_threadsp(%rax), %rsp
 
 	/*
+	 * Now that this CPU is running on its own stack, drop the realmode
+	 * protection. For the boot CPU the pointer is NULL!
+	 */
+	movq	trampoline_lock(%rip), %rax
+	testq	%rax, %rax
+	jz	.Lsetup_gdt
+	lock
+	btrl	$0, (%rax)
+
+.Lsetup_gdt:
+	/*
 	 * We must switch to a new descriptor in kernel space for the GDT
 	 * because soon the kernel won't have access anymore to the userspace
 	 * addresses where we're currently running on. We have to do that here
@@ -433,6 +444,8 @@  SYM_DATA(initial_code,	.quad x86_64_star
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
 #endif
+
+SYM_DATA(trampoline_lock, .quad 0);
 	__FINITDATA
 
 	__INIT
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -154,6 +154,9 @@  static void __init setup_real_mode(void)
 
 	trampoline_header->flags = 0;
 
+	trampoline_lock = &trampoline_header->lock;
+	*trampoline_lock = 0;
+
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 
 	/* Map the real mode stub as virtual == physical */
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,6 +37,24 @@ 
 	.text
 	.code16
 
+.macro LOAD_REALMODE_ESP
+	/*
+	 * Make sure only one CPU fiddles with the realmode stack
+	 */
+.Llock_rm\@:
+	btl	$0, tr_lock
+	jnc	2f
+	pause
+	jmp	.Llock_rm\@
+2:
+	lock
+	btsl	$0, tr_lock
+	jc	.Llock_rm\@
+
+	# Setup stack
+	movl	$rm_stack_end, %esp
+.endm
+
 	.balign	PAGE_SIZE
 SYM_CODE_START(trampoline_start)
 	cli			# We should be safe anyway
@@ -49,8 +67,7 @@  SYM_CODE_START(trampoline_start)
 	mov	%ax, %es
 	mov	%ax, %ss
 
-	# Setup stack
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 
 	call	verify_cpu		# Verify the cpu supports long mode
 	testl   %eax, %eax		# Check for return code
@@ -93,8 +110,7 @@  SYM_CODE_START(sev_es_trampoline_start)
 	mov	%ax, %es
 	mov	%ax, %ss
 
-	# Setup stack
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 
 	jmp	.Lswitch_to_protected
 SYM_CODE_END(sev_es_trampoline_start)
@@ -177,7 +193,7 @@  SYM_CODE_START(pa_trampoline_compat)
 	 * In compatibility mode.  Prep ESP and DX for startup_32, then disable
 	 * paging and complete the switch to legacy 32-bit mode.
 	 */
-	movl	$rm_stack_end, %esp
+	LOAD_REALMODE_ESP
 	movw	$__KERNEL_DS, %dx
 
 	movl	$(CR0_STATE & ~X86_CR0_PG), %eax
@@ -241,6 +257,7 @@  SYM_DATA_START(trampoline_header)
 	SYM_DATA(tr_efer,		.space 8)
 	SYM_DATA(tr_cr4,		.space 4)
 	SYM_DATA(tr_flags,		.space 4)
+	SYM_DATA(tr_lock,		.space 4)
 SYM_DATA_END(trampoline_header)
 
 #include "trampoline_common.S"