diff mbox series

arm64: initialize per-cpu offsets earlier

Message ID 20200730163806.23053-1-mark.rutland@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: initialize per-cpu offsets earlier | expand

Commit Message

Mark Rutland July 30, 2020, 4:38 p.m. UTC
Our contemporary initialization of the per-cpu offset register is
somehat difficult to follow, and this initialization is not always early
enough for upcoming instrumentation with KCSAN, where the
instrumentation handling functions use the per-cpu offset.

To make it possible to support KCSAN, and to simplify reasoning about
early bringup code, let's initialize the percpu offset earlier, before
we run any C code that may use the per-cpu offset. To do so, this poatch
adds a new init_this_cpu_offset() helpers that's called before the usual
primary/secondary start functions. For consistency, this is also used to
re-initialize the per-cpu offset after the runtime per-cpu areas have
been allocated (which can change CPU0's offset).

So that init_this_cpu_offset() isn't subject to any instrumentation that
might consume the per-cpu offset, it is marked with noinstr, preventing
instrumentation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h |  2 ++
 arch/arm64/kernel/head.S     |  3 +++
 arch/arm64/kernel/setup.c    | 12 ++++++------
 arch/arm64/kernel/smp.c      | 13 ++++++++-----
 4 files changed, 19 insertions(+), 11 deletions(-)

This is a preparatory cleanup for KCSAN, but given it should make per-cpu more
generally robust I thought it was worth sending on its own now. I've given this
some light SMP testing in a KVM guest.

Mark.

Comments

Mark Rutland Sept. 21, 2020, 4:30 p.m. UTC | #1
Hi Will,

Would you be happy to pick up the below patch, or would you like me to
resend with the commit message typos fixed?

I need to do some more work to enable KCSAN, but taking this would help
to get things out of the way.

Mark.

On Thu, Jul 30, 2020 at 05:38:06PM +0100, Mark Rutland wrote:
> Our contemporary initialization of the per-cpu offset register is
> somehat difficult to follow, and this initialization is not always early
> enough for upcoming instrumentation with KCSAN, where the
> instrumentation handling functions use the per-cpu offset.
> 
> To make it possible to support KCSAN, and to simplify reasoning about
> early bringup code, let's initialize the percpu offset earlier, before
> we run any C code that may use the per-cpu offset. To do so, this poatch
> adds a new init_this_cpu_offset() helpers that's called before the usual
> primary/secondary start functions. For consistency, this is also used to
> re-initialize the per-cpu offset after the runtime per-cpu areas have
> been allocated (which can change CPU0's offset).
> 
> So that init_this_cpu_offset() isn't subject to any instrumentation that
> might consume the per-cpu offset, it is marked with noinstr, preventing
> instrumentation.
> 
> Signed-off-by: Mark Rutland <mark.rutland@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: James Morse <james.morse@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>  arch/arm64/include/asm/cpu.h |  2 ++
>  arch/arm64/kernel/head.S     |  3 +++
>  arch/arm64/kernel/setup.c    | 12 ++++++------
>  arch/arm64/kernel/smp.c      | 13 ++++++++-----
>  4 files changed, 19 insertions(+), 11 deletions(-)
> 
> This is a preparatory cleanup for KCSAN, but given it should make per-cpu more
> generally robust I thought it was worth sending on its own now. I've given this
> some light SMP testing in a KVM guest.
> 
> Mark.
> 
> diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
> index 7faae6ff3ab4d..d9d60b18e8116 100644
> --- a/arch/arm64/include/asm/cpu.h
> +++ b/arch/arm64/include/asm/cpu.h
> @@ -68,4 +68,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info);
>  void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
>  				 struct cpuinfo_arm64 *boot);
>  
> +void init_this_cpu_offset(void);
> +
>  #endif /* __ASM_CPU_H */
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 037421c66b147..2720e6ec68140 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -452,6 +452,8 @@ SYM_FUNC_START_LOCAL(__primary_switched)
>  	bl	__pi_memset
>  	dsb	ishst				// Make zero page visible to PTW
>  
> +	bl	init_this_cpu_offset
> +
>  #ifdef CONFIG_KASAN
>  	bl	kasan_early_init
>  #endif
> @@ -758,6 +760,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
>  	ptrauth_keys_init_cpu x2, x3, x4, x5
>  #endif
>  
> +	bl	init_this_cpu_offset
>  	b	secondary_start_kernel
>  SYM_FUNC_END(__secondary_switched)
>  
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index c793276ec7ad9..c317293435328 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void)
>  	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
>  	cpu_logical_map(0) = mpidr;
>  
> -	/*
> -	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
> -	 * using percpu variable early, for example, lockdep will
> -	 * access percpu variable inside lock_release
> -	 */
> -	set_my_cpu_offset(0);
>  	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
>  		(unsigned long)mpidr, read_cpuid_id());
>  }
> @@ -276,6 +270,12 @@ arch_initcall(reserve_memblock_reserved_regions);
>  
>  u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
>  
> +void noinstr init_this_cpu_offset(void)
> +{
> +	unsigned int cpu = task_cpu(current);
> +	set_my_cpu_offset(per_cpu_offset(cpu));
> +}
> +
>  void __init setup_arch(char **cmdline_p)
>  {
>  	init_mm.start_code = (unsigned long) _text;
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index e43a8ff19f0f6..5c507597e4094 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -193,10 +193,7 @@ asmlinkage notrace void secondary_start_kernel(void)
>  	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
>  	struct mm_struct *mm = &init_mm;
>  	const struct cpu_operations *ops;
> -	unsigned int cpu;
> -
> -	cpu = task_cpu(current);
> -	set_my_cpu_offset(per_cpu_offset(cpu));
> +	unsigned int cpu = smp_processor_id();
>  
>  	/*
>  	 * All kernel threads share the same mm context; grab a
> @@ -436,7 +433,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
>  
>  void __init smp_prepare_boot_cpu(void)
>  {
> -	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
> +	/*
> +	 * Now that setup_per_cpu_areas() has allocated the runtime per-cpu
> +	 * areas it is only safe to read the CPU0 boot-time area, and we must
> +	 * reinitialize the offset to point to the runtime area.
> +	 */
> +	init_this_cpu_offset();
> +
>  	cpuinfo_store_boot_cpu();
>  
>  	/*
> -- 
> 2.11.0
>
Will Deacon Sept. 21, 2020, 5:12 p.m. UTC | #2
On Mon, Sep 21, 2020 at 05:30:26PM +0100, Mark Rutland wrote:
> Would you be happy to pick up the below patch, or would you like me to
> resend with the commit message typos fixed?

Please resend with the typos fixed, but also against something a bit more
recent as this has a trivial conflict with eaecca9e7710.

Will
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6ff3ab4d..d9d60b18e8116 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -68,4 +68,6 @@  void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
 				 struct cpuinfo_arm64 *boot);
 
+void init_this_cpu_offset(void);
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 037421c66b147..2720e6ec68140 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -452,6 +452,8 @@  SYM_FUNC_START_LOCAL(__primary_switched)
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+	bl	init_this_cpu_offset
+
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
 #endif
@@ -758,6 +760,7 @@  SYM_FUNC_START_LOCAL(__secondary_switched)
 	ptrauth_keys_init_cpu x2, x3, x4, x5
 #endif
 
+	bl	init_this_cpu_offset
 	b	secondary_start_kernel
 SYM_FUNC_END(__secondary_switched)
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index c793276ec7ad9..c317293435328 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,12 +87,6 @@  void __init smp_setup_processor_id(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	cpu_logical_map(0) = mpidr;
 
-	/*
-	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
-	 * using percpu variable early, for example, lockdep will
-	 * access percpu variable inside lock_release
-	 */
-	set_my_cpu_offset(0);
 	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
 		(unsigned long)mpidr, read_cpuid_id());
 }
@@ -276,6 +270,12 @@  arch_initcall(reserve_memblock_reserved_regions);
 
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
+void noinstr init_this_cpu_offset(void)
+{
+	unsigned int cpu = task_cpu(current);
+	set_my_cpu_offset(per_cpu_offset(cpu));
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	init_mm.start_code = (unsigned long) _text;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index e43a8ff19f0f6..5c507597e4094 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -193,10 +193,7 @@  asmlinkage notrace void secondary_start_kernel(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	const struct cpu_operations *ops;
-	unsigned int cpu;
-
-	cpu = task_cpu(current);
-	set_my_cpu_offset(per_cpu_offset(cpu));
+	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * All kernel threads share the same mm context; grab a
@@ -436,7 +433,13 @@  void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	/*
+	 * Now that setup_per_cpu_areas() has allocated the runtime per-cpu
+	 * areas it is only safe to read the CPU0 boot-time area, and we must
+	 * reinitialize the offset to point to the runtime area.
+	 */
+	init_this_cpu_offset();
+
 	cpuinfo_store_boot_cpu();
 
 	/*