diff mbox series

[5/7] ARM: Stop using TPIDRPRW to hold per_cpu_offset

Message ID 20210907220038.91021-6-keithpac@amazon.com (mailing list archive)
State New
Headers show
Series [1/7] ARM: Pass cpu number to secondary_start_kernel | expand

Commit Message

Keith Packard Sept. 7, 2021, 10 p.m. UTC
We're going to store TPIDRPRW here instead

Signed-off-by: Keith Packard <keithpac@amazon.com>
---
 arch/arm/include/asm/percpu.h | 31 -------------------------------
 arch/arm/kernel/setup.c       |  7 -------
 arch/arm/kernel/smp.c         |  3 ---
 3 files changed, 41 deletions(-)

Comments

Ard Biesheuvel Sept. 9, 2021, 1:54 p.m. UTC | #1
On Wed, 8 Sept 2021 at 00:00, Keith Packard <keithpac@amazon.com> wrote:
>
> We're going to store TPIDRPRW here instead
>

?

> Signed-off-by: Keith Packard <keithpac@amazon.com>

I'd much prefer to keep using TPIDIRPRW for the per-CPU offsets, and
use the user space TLS register for current.

There are several reasons for this:
- arm64 does the same - as someone who still cares about ARM while
many have moved on to arm64 or RISC-V, I am still trying to maintain
parity between ARM and arm64 where possible.
- efficiency: loading the per-CPU offset using a CPU id stored in
memory, which is then used to index the per-CPU offsets array in
memory adds two additional loads to every load/store of a per-CPU
variable
- 'current' usually does not change value under the code's feet,
whereas per-CPU offsets might change at any time. Given the fact that
the CPU offset load is visible to the compiler as a memory access, I
suppose this should be safe, but I would still prefer per-CPU access
to avoid going via current if possible.

> ---
>  arch/arm/include/asm/percpu.h | 31 -------------------------------
>  arch/arm/kernel/setup.c       |  7 -------
>  arch/arm/kernel/smp.c         |  3 ---
>  3 files changed, 41 deletions(-)
>
> diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
> index e2fcb3cfd3de..eeafcd6a3e01 100644
> --- a/arch/arm/include/asm/percpu.h
> +++ b/arch/arm/include/asm/percpu.h
> @@ -7,37 +7,6 @@
>
>  register unsigned long current_stack_pointer asm ("sp");
>
> -/*
> - * Same as asm-generic/percpu.h, except that we store the per cpu offset
> - * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
> - */
> -#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
> -static inline void set_my_cpu_offset(unsigned long off)
> -{
> -       /* Set TPIDRPRW */
> -       asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
> -}
> -
> -static inline unsigned long __my_cpu_offset(void)
> -{
> -       unsigned long off;
> -
> -       /*
> -        * Read TPIDRPRW.
> -        * We want to allow caching the value, so avoid using volatile and
> -        * instead use a fake stack read to hazard against barrier().
> -        */
> -       asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
> -               : "Q" (*(const unsigned long *)current_stack_pointer));
> -
> -       return off;
> -}
> -#define __my_cpu_offset __my_cpu_offset()
> -#else
> -#define set_my_cpu_offset(x)   do {} while(0)
> -
> -#endif /* CONFIG_SMP */
> -
>  #include <asm-generic/percpu.h>
>
>  #endif /* _ASM_ARM_PERCPU_H_ */
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index ca0201635fac..d0dc60afe54f 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -590,13 +590,6 @@ void __init smp_setup_processor_id(void)
>         for (i = 1; i < nr_cpu_ids; ++i)
>                 cpu_logical_map(i) = i == cpu ? 0 : i;
>
> -       /*
> -        * clear __my_cpu_offset on boot CPU to avoid hang caused by
> -        * using percpu variable early, for example, lockdep will
> -        * access percpu variable inside lock_release
> -        */
> -       set_my_cpu_offset(0);
> -
>         pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
>  }
>
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 5e999f1f1aea..8ccf10b34f08 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -410,8 +410,6 @@ asmlinkage void secondary_start_kernel(unsigned int cpu, struct task_struct *tas
>  {
>         struct mm_struct *mm = &init_mm;
>
> -       set_my_cpu_offset(per_cpu_offset(cpu));
> -
>         secondary_biglittle_init();
>
>         /*
> @@ -495,7 +493,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
>
>  void __init smp_prepare_boot_cpu(void)
>  {
> -       set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
>  }
>
>  void __init smp_prepare_cpus(unsigned int max_cpus)
> --
> 2.33.0
>
diff mbox series

Patch

diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h
index e2fcb3cfd3de..eeafcd6a3e01 100644
--- a/arch/arm/include/asm/percpu.h
+++ b/arch/arm/include/asm/percpu.h
@@ -7,37 +7,6 @@ 
 
 register unsigned long current_stack_pointer asm ("sp");
 
-/*
- * Same as asm-generic/percpu.h, except that we store the per cpu offset
- * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7
- */
-#if defined(CONFIG_SMP) && !defined(CONFIG_CPU_V6)
-static inline void set_my_cpu_offset(unsigned long off)
-{
-	/* Set TPIDRPRW */
-	asm volatile("mcr p15, 0, %0, c13, c0, 4" : : "r" (off) : "memory");
-}
-
-static inline unsigned long __my_cpu_offset(void)
-{
-	unsigned long off;
-
-	/*
-	 * Read TPIDRPRW.
-	 * We want to allow caching the value, so avoid using volatile and
-	 * instead use a fake stack read to hazard against barrier().
-	 */
-	asm("mrc p15, 0, %0, c13, c0, 4" : "=r" (off)
-		: "Q" (*(const unsigned long *)current_stack_pointer));
-
-	return off;
-}
-#define __my_cpu_offset __my_cpu_offset()
-#else
-#define set_my_cpu_offset(x)	do {} while(0)
-
-#endif /* CONFIG_SMP */
-
 #include <asm-generic/percpu.h>
 
 #endif /* _ASM_ARM_PERCPU_H_ */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index ca0201635fac..d0dc60afe54f 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -590,13 +590,6 @@  void __init smp_setup_processor_id(void)
 	for (i = 1; i < nr_cpu_ids; ++i)
 		cpu_logical_map(i) = i == cpu ? 0 : i;
 
-	/*
-	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
-	 * using percpu variable early, for example, lockdep will
-	 * access percpu variable inside lock_release
-	 */
-	set_my_cpu_offset(0);
-
 	pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5e999f1f1aea..8ccf10b34f08 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -410,8 +410,6 @@  asmlinkage void secondary_start_kernel(unsigned int cpu, struct task_struct *tas
 {
 	struct mm_struct *mm = &init_mm;
 
-	set_my_cpu_offset(per_cpu_offset(cpu));
-
 	secondary_biglittle_init();
 
 	/*
@@ -495,7 +493,6 @@  void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)