diff mbox

[V4,00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

Message ID 20230512203426.452963764@linutronix.de (mailing list archive)
State Awaiting Upstream
Headers show

Commit Message

Thomas Gleixner May 12, 2023, 9:06 p.m. UTC
Hi!

This is version 4 of the reworked parallel bringup series. Version 3 can be
found here:

   https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de

This is just a reiteration to address the following details:

  1) Address review feedback (Peter Zijlstra)

  2) Fix a MIPS related build problem (0day)

Other than that there are no changes and the other details are all the same
as in V3 and V2.

It's also available from git:

    git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug

Diff to V3 below.

Thanks,

	tglx
---

Comments

Oleksandr Natalenko May 13, 2023, 6:32 p.m. UTC | #1
Hello.

On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>    https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de
> 
> This is just a reiteration to address the following details:
> 
>   1) Address review feedback (Peter Zijlstra)
> 
>   2) Fix a MIPS related build problem (0day)
> 
> Other than that there are no changes and the other details are all the same
> as in V3 and V2.
> 
> It's also available from git:
> 
>     git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug
> 
> Diff to V3 below.
> 
> Thanks,
> 
> 	tglx

With this patchset:

```

[    0.137719] smpboot: Allowing 32 CPUs, 0 hotplug CPUs
[    0.777312] smpboot: CPU0: AMD Ryzen 9 5950X 16-Core Processor (family: 0x19, model: 0x21, stepping: 0x2)
[    0.777896] smpboot: Parallel CPU startup enabled: 0x80000000
```

Seems to survive suspend/resume cycle too.

Hence:

Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>

Thanks.

> ---
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index f5e0f4235746..90c71d800b59 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -690,7 +690,7 @@ void flush_tlb_one(unsigned long vaddr)
>  EXPORT_SYMBOL(flush_tlb_page);
>  EXPORT_SYMBOL(flush_tlb_one);
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
>  void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
>  {
>  	if (mp_ops->cleanup_dead_cpu)
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 0438802031c3..9cd77d319555 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -290,8 +290,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>  
>  	/*  APIC ID not found in the table. Drop the trampoline lock and bail. */
>  	movq	trampoline_lock(%rip), %rax
> -	lock
> -	btrl	$0, (%rax)
> +	movl	$0, (%rax)
>  
>  1:	cli
>  	hlt
> @@ -320,8 +319,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>  	movq	trampoline_lock(%rip), %rax
>  	testq	%rax, %rax
>  	jz	.Lsetup_gdt
> -	lock
> -	btrl	$0, (%rax)
> +	movl	$0, (%rax)
>  
>  .Lsetup_gdt:
>  	/*
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 5caf4897b507..660709e94823 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -161,31 +161,28 @@ static inline void smpboot_restore_warm_reset_vector(void)
>  
>  }
>  
> -/*
> - * Report back to the Boot Processor during boot time or to the caller processor
> - * during CPU online.
> - */
> -static void smp_callin(void)
> +/* Run the next set of setup steps for the upcoming CPU */
> +static void ap_starting(void)
>  {
>  	int cpuid = smp_processor_id();
>  
>  	/*
> -	 * If waken up by an INIT in an 82489DX configuration the alive
> -	 * synchronization guarantees we don't get here before an
> -	 * INIT_deassert IPI reaches our local APIC, so it is now safe to
> -	 * touch our local APIC.
> +	 * If woken up by an INIT in an 82489DX configuration the alive
> +	 * synchronization guarantees that the CPU does not reach this
> +	 * point before an INIT_deassert IPI reaches the local APIC, so it
> +	 * is now safe to touch the local APIC.
>  	 *
>  	 * Set up this CPU, first the APIC, which is probably redundant on
>  	 * most boards.
>  	 */
>  	apic_ap_setup();
>  
> -	/* Save our processor parameters. */
> +	/* Save the processor parameters. */
>  	smp_store_cpu_info(cpuid);
>  
>  	/*
>  	 * The topology information must be up to date before
> -	 * calibrate_delay() and notify_cpu_starting().
> +	 * notify_cpu_starting().
>  	 */
>  	set_cpu_sibling_map(cpuid);
>  
> @@ -197,7 +194,7 @@ static void smp_callin(void)
>  
>  	/*
>  	 * This runs the AP through all the cpuhp states to its target
> -	 * state (CPUHP_ONLINE in the case of serial bringup).
> +	 * state CPUHP_ONLINE.
>  	 */
>  	notify_cpu_starting(cpuid);
>  }
> @@ -274,10 +271,7 @@ static void notrace start_secondary(void *unused)
>  	rcu_cpu_starting(raw_smp_processor_id());
>  	x86_cpuinit.early_percpu_clock_init();
>  
> -	smp_callin();
> -
> -	/* Otherwise gcc will move up smp_processor_id() before cpu_init() */
> -	barrier();
> +	ap_starting();
>  
>  	/* Check TSC synchronization with the control CPU. */
>  	check_tsc_sync_target();
> diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
> index 2dfb1c400167..c6de4deec746 100644
> --- a/arch/x86/realmode/rm/trampoline_64.S
> +++ b/arch/x86/realmode/rm/trampoline_64.S
> @@ -40,17 +40,13 @@
>  .macro LOAD_REALMODE_ESP
>  	/*
>  	 * Make sure only one CPU fiddles with the realmode stack
> -	 */
> +	*/
>  .Llock_rm\@:
> -	btl	$0, tr_lock
> -	jnc	2f
> -	pause
> -	jmp	.Llock_rm\@
> +        lock btsl       $0, tr_lock
> +        jnc             2f
> +        pause
> +        jmp             .Llock_rm\@
>  2:
> -	lock
> -	btsl	$0, tr_lock
> -	jc	.Llock_rm\@
> -
>  	# Setup stack
>  	movl	$rm_stack_end, %esp
>  .endm
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 60b4093fae9e..005f863a3d2b 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -294,14 +294,14 @@ enum cpuhp_sync_state {
>   * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
>   * @state:	The synchronization state to set
>   *
> - * No synchronization point. Just update of the synchronization state.
> + * No synchronization point. Just update of the synchronization state, but implies
> + * a full barrier so that the AP changes are visible before the control CPU proceeds.
>   */
>  static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
>  {
>  	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
> -	int sync = atomic_read(st);
>  
> -	while (!atomic_try_cmpxchg(st, &sync, state));
> +	(void)atomic_xchg(st, state);
>  }
>  
>  void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
> @@ -829,7 +829,11 @@ static int bringup_cpu(unsigned int cpu)
>  	/*
>  	 * Some architectures have to walk the irq descriptors to
>  	 * setup the vector space for the cpu which comes online.
> -	 * Prevent irq alloc/free across the bringup.
> +	 *
> +	 * Prevent irq alloc/free across the bringup by acquiring the
> +	 * sparse irq lock. Hold it until the upcoming CPU completes the
> +	 * startup in cpuhp_online_idle() which allows to avoid
> +	 * intermediate synchronization points in the architecture code.
>  	 */
>  	irq_lock_sparse();
>  
> 
> 
>
Helge Deller May 13, 2023, 9 p.m. UTC | #2
Hi Thomas,
> On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:
>> This is version 4 of the reworked parallel bringup series. Version 3 can be
>> found here:
>>
>>     https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de
>> ...
>>
>> Other than that there are no changes and the other details are all the same
>> as in V3 and V2.
>>
>> It's also available from git:
>>
>>      git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug

I tested your series on the parisc architecture just to make sure that it still works
with your patch applied.
On parisc the CPU bringup happens later in the boot process (after the inventory),
so your patch won't have an direct impact anyway.
But at least everything still works, incl. manual CPU enable/disable.

So, you may add
Tested-by: Helge Deller <deller@gmx.de> # parisc

Thanks!
Helge
Guilherme G. Piccoli May 14, 2023, 9:48 p.m. UTC | #3
On 12/05/2023 18:06, Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>    https://lore.kernel.org/lkml/20230508181633.089804905@linutronix.de


Hi Thomas, thanks for series! I was able to test it on the Steam Deck
(on top of 6.4-rc2), and everything is working fine; also tested S3
suspend/resume, working as expected.

Some logs from boot time:


Parallel boot
[    0.239764] smp: Bringing up secondary CPUs ...
[...]
[    0.253130] smp: Brought up 1 node, 8 CPUs


Regular boot (with cpuhp.parallel=0)
[    0.240093] smp: Bringing up secondary CPUs ...
[...]
[    0.253475] smp: Brought up 1 node, 8 CPUs


Feel free to add (to the series):

Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck

Cheers,


Guilherme
diff mbox

Patch

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f5e0f4235746..90c71d800b59 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -690,7 +690,7 @@  void flush_tlb_one(unsigned long vaddr)
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
 
-#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
 	if (mp_ops->cleanup_dead_cpu)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0438802031c3..9cd77d319555 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -290,8 +290,7 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 
 	/*  APIC ID not found in the table. Drop the trampoline lock and bail. */
 	movq	trampoline_lock(%rip), %rax
-	lock
-	btrl	$0, (%rax)
+	movl	$0, (%rax)
 
 1:	cli
 	hlt
@@ -320,8 +319,7 @@  SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
 	movq	trampoline_lock(%rip), %rax
 	testq	%rax, %rax
 	jz	.Lsetup_gdt
-	lock
-	btrl	$0, (%rax)
+	movl	$0, (%rax)
 
 .Lsetup_gdt:
 	/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5caf4897b507..660709e94823 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -161,31 +161,28 @@  static inline void smpboot_restore_warm_reset_vector(void)
 
 }
 
-/*
- * Report back to the Boot Processor during boot time or to the caller processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
 {
 	int cpuid = smp_processor_id();
 
 	/*
-	 * If waken up by an INIT in an 82489DX configuration the alive
-	 * synchronization guarantees we don't get here before an
-	 * INIT_deassert IPI reaches our local APIC, so it is now safe to
-	 * touch our local APIC.
+	 * If woken up by an INIT in an 82489DX configuration the alive
+	 * synchronization guarantees that the CPU does not reach this
+	 * point before an INIT_deassert IPI reaches the local APIC, so it
+	 * is now safe to touch the local APIC.
 	 *
 	 * Set up this CPU, first the APIC, which is probably redundant on
 	 * most boards.
 	 */
 	apic_ap_setup();
 
-	/* Save our processor parameters. */
+	/* Save the processor parameters. */
 	smp_store_cpu_info(cpuid);
 
 	/*
 	 * The topology information must be up to date before
-	 * calibrate_delay() and notify_cpu_starting().
+	 * notify_cpu_starting().
 	 */
 	set_cpu_sibling_map(cpuid);
 
@@ -197,7 +194,7 @@  static void smp_callin(void)
 
 	/*
 	 * This runs the AP through all the cpuhp states to its target
-	 * state (CPUHP_ONLINE in the case of serial bringup).
+	 * state CPUHP_ONLINE.
 	 */
 	notify_cpu_starting(cpuid);
 }
@@ -274,10 +271,7 @@  static void notrace start_secondary(void *unused)
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
 
-	smp_callin();
-
-	/* Otherwise gcc will move up smp_processor_id() before cpu_init() */
-	barrier();
+	ap_starting();
 
 	/* Check TSC synchronization with the control CPU. */
 	check_tsc_sync_target();
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 2dfb1c400167..c6de4deec746 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -40,17 +40,13 @@ 
 .macro LOAD_REALMODE_ESP
 	/*
 	 * Make sure only one CPU fiddles with the realmode stack
-	 */
+	*/
 .Llock_rm\@:
-	btl	$0, tr_lock
-	jnc	2f
-	pause
-	jmp	.Llock_rm\@
+        lock btsl       $0, tr_lock
+        jnc             2f
+        pause
+        jmp             .Llock_rm\@
 2:
-	lock
-	btsl	$0, tr_lock
-	jc	.Llock_rm\@
-
 	# Setup stack
 	movl	$rm_stack_end, %esp
 .endm
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 60b4093fae9e..005f863a3d2b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -294,14 +294,14 @@  enum cpuhp_sync_state {
  * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
  * @state:	The synchronization state to set
  *
- * No synchronization point. Just update of the synchronization state.
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
  */
 static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
 {
 	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
-	int sync = atomic_read(st);
 
-	while (!atomic_try_cmpxchg(st, &sync, state));
+	(void)atomic_xchg(st, state);
 }
 
 void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
@@ -829,7 +829,11 @@  static int bringup_cpu(unsigned int cpu)
 	/*
 	 * Some architectures have to walk the irq descriptors to
 	 * setup the vector space for the cpu which comes online.
-	 * Prevent irq alloc/free across the bringup.
+	 *
+	 * Prevent irq alloc/free across the bringup by acquiring the
+	 * sparse irq lock. Hold it until the upcoming CPU completes the
+	 * startup in cpuhp_online_idle() which allows to avoid
+	 * intermediate synchronization points in the architecture code.
 	 */
 	irq_lock_sparse();