diff mbox series

[v2,5/5] ARM: spectre-v2: per-CPU vtables to work around big.Little systems

Message ID E1gK0dU-0007Mp-4h@rmk-PC.armlinux.org.uk (mailing list archive)
State New, archived
Headers show
Series Spectre big.Little updates | expand

Commit Message

Russell King (Oracle) Nov. 6, 2018, 12:39 p.m. UTC
In big.Little systems, some CPUs require the Spectre workarounds in
paths such as the context switch, but other CPUs do not.  In order
to handle these differences, we need per-CPU vtables.

We are unable to use the kernel's per-CPU variables to support this
as per-CPU is not initialised at times when we need access to the
vtables, so we have to use an array indexed by logical CPU number.

We use an array-of-pointers to avoid having function pointers in
the kernel's read/write .data section.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/proc-fns.h | 20 ++++++++++++++++++++
 arch/arm/kernel/setup.c         |  5 +++++
 arch/arm/kernel/smp.c           | 31 +++++++++++++++++++++++++++++++
 arch/arm/mm/proc-v7-bugs.c      | 17 ++---------------
 4 files changed, 58 insertions(+), 15 deletions(-)

Comments

Julien Thierry Nov. 7, 2018, 5:42 p.m. UTC | #1
Hi Russel,

On 06/11/18 12:39, Russell King wrote:
> In big.Little systems, some CPUs require the Spectre workarounds in
> paths such as the context switch, but other CPUs do not.  In order
> to handle these differences, we need per-CPU vtables.
> 
> We are unable to use the kernel's per-CPU variables to support this
> as per-CPU is not initialised at times when we need access to the
> vtables, so we have to use an array indexed by logical CPU number.
> 
> We use an array-of-pointers to avoid having function pointers in
> the kernel's read/write .data section.
> 
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> ---
>   arch/arm/include/asm/proc-fns.h | 20 ++++++++++++++++++++
>   arch/arm/kernel/setup.c         |  5 +++++
>   arch/arm/kernel/smp.c           | 31 +++++++++++++++++++++++++++++++
>   arch/arm/mm/proc-v7-bugs.c      | 17 ++---------------
>   4 files changed, 58 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
> index c259cc49c641..fe58fb3c4b4e 100644
> --- a/arch/arm/include/asm/proc-fns.h
> +++ b/arch/arm/include/asm/proc-fns.h
> @@ -104,12 +104,32 @@ extern void cpu_do_resume(void *);
>   #else
>   
>   extern struct processor processor;
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +#include <linux/smp.h>
> +/*
> + * This can't be a per-cpu variable because we need to access it before
> + * per-cpu has been initialised.
> + */
> +extern struct processor *cpu_vtable[];
> +#define PROC_VTABLE(f)			cpu_vtable[smp_processor_id()]->f
> +#define PROC_TABLE(f)			cpu_vtable[0]->f

I feel it would be worth to have a comment to explain why we need the 
distinction between PROC_TABLE and PROC_VTABLE. I currently understand 
thanks to the commit message (of the previous patch), but just reading 
the code I probably would be very confused.

Otherwise things look good to me:

Reviewed-by: Julien Thierry <julien.thierry@arm.com>

Thanks,

Julien

> +static inline void init_proc_vtable(const struct processor *p)
> +{
> +	unsigned int cpu = smp_processor_id();
> +	*cpu_vtable[cpu] = *p;
> +	WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area !=
> +		     cpu_vtable[0]->dcache_clean_area);
> +	WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext !=
> +		     cpu_vtable[0]->set_pte_ext);
> +}
> +#else
>   #define PROC_VTABLE(f)			processor.f
>   #define PROC_TABLE(f)			processor.f
>   static inline void init_proc_vtable(const struct processor *p)
>   {
>   	processor = *p;
>   }
> +#endif
>   
>   #define cpu_proc_init			PROC_VTABLE(_proc_init)
>   #define cpu_check_bugs			PROC_VTABLE(check_bugs)
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index c214bd14a1fe..cd46a595422c 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2);
>   
>   #ifdef MULTI_CPU
>   struct processor processor __ro_after_init;
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +struct processor *cpu_vtable[NR_CPUS] = {
> +	[0] = &processor,
> +};
> +#endif
>   #endif
>   #ifdef MULTI_TLB
>   struct cpu_tlb_fns cpu_tlb __ro_after_init;
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 5ad0b67b9e33..82b879db32ee 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -42,6 +42,7 @@
>   #include <asm/mmu_context.h>
>   #include <asm/pgtable.h>
>   #include <asm/pgalloc.h>
> +#include <asm/procinfo.h>
>   #include <asm/processor.h>
>   #include <asm/sections.h>
>   #include <asm/tlbflush.h>
> @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd)
>   #endif
>   }
>   
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +static int secondary_biglittle_prepare(unsigned int cpu)
> +{
> +	if (!cpu_vtable[cpu])
> +		cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL);
> +
> +	return cpu_vtable[cpu] ? 0 : -ENOMEM;
> +}
> +
> +static void secondary_biglittle_init(void)
> +{
> +	init_proc_vtable(lookup_processor(read_cpuid_id())->proc);
> +}
> +#else
> +static int secondary_biglittle_prepare(unsigned int cpu)
> +{
> +	return 0;
> +}
> +
> +static void secondary_biglittle_init(void)
> +{
> +}
> +#endif
> +
>   int __cpu_up(unsigned int cpu, struct task_struct *idle)
>   {
>   	int ret;
> @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>   	if (!smp_ops.smp_boot_secondary)
>   		return -ENOSYS;
>   
> +	ret = secondary_biglittle_prepare(cpu);
> +	if (ret)
> +		return ret;
> +
>   	/*
>   	 * We need to tell the secondary core where to find
>   	 * its stack and the page tables.
> @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void)
>   	struct mm_struct *mm = &init_mm;
>   	unsigned int cpu;
>   
> +	secondary_biglittle_init();
> +
>   	/*
>   	 * The identity mapping is uncached (strongly ordered), so
>   	 * switch away from it before attempting any exclusive accesses.
> diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
> index 5544b82a2e7a..9a07916af8dd 100644
> --- a/arch/arm/mm/proc-v7-bugs.c
> +++ b/arch/arm/mm/proc-v7-bugs.c
> @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void)
>   	case ARM_CPU_PART_CORTEX_A17:
>   	case ARM_CPU_PART_CORTEX_A73:
>   	case ARM_CPU_PART_CORTEX_A75:
> -		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
> -			goto bl_error;
>   		per_cpu(harden_branch_predictor_fn, cpu) =
>   			harden_branch_predictor_bpiall;
>   		spectre_v2_method = "BPIALL";
> @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void)
>   
>   	case ARM_CPU_PART_CORTEX_A15:
>   	case ARM_CPU_PART_BRAHMA_B15:
> -		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
> -			goto bl_error;
>   		per_cpu(harden_branch_predictor_fn, cpu) =
>   			harden_branch_predictor_iciallu;
>   		spectre_v2_method = "ICIALLU";
> @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void)
>   					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
>   			if ((int)res.a0 != 0)
>   				break;
> -			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
> -				goto bl_error;
>   			per_cpu(harden_branch_predictor_fn, cpu) =
>   				call_hvc_arch_workaround_1;
> -			processor.switch_mm = cpu_v7_hvc_switch_mm;
> +			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
>   			spectre_v2_method = "hypervisor";
>   			break;
>   
> @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void)
>   					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
>   			if ((int)res.a0 != 0)
>   				break;
> -			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
> -				goto bl_error;
>   			per_cpu(harden_branch_predictor_fn, cpu) =
>   				call_smc_arch_workaround_1;
> -			processor.switch_mm = cpu_v7_smc_switch_mm;
> +			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
>   			spectre_v2_method = "firmware";
>   			break;
>   
> @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void)
>   	if (spectre_v2_method)
>   		pr_info("CPU%u: Spectre v2: using %s workaround\n",
>   			smp_processor_id(), spectre_v2_method);
> -	return;
> -
> -bl_error:
> -	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
> -		cpu);
>   }
>   #else
>   static void cpu_v7_spectre_init(void)
>
Marek Szyprowski Dec. 6, 2018, 9:32 a.m. UTC | #2
Hi All,

On 2018-11-06 13:39, Russell King wrote:
> In big.Little systems, some CPUs require the Spectre workarounds in
> paths such as the context switch, but other CPUs do not.  In order
> to handle these differences, we need per-CPU vtables.
>
> We are unable to use the kernel's per-CPU variables to support this
> as per-CPU is not initialised at times when we need access to the
> vtables, so we have to use an array indexed by logical CPU number.
>
> We use an array-of-pointers to avoid having function pointers in
> the kernel's read/write .data section.
>
> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>

Today I've noticed that this patch (which has landed in v4.20-rc3 as
commit 383fb3ee8024) causes regression in CPU hotplug and suspend/resume
handling on Samsung Exynos4412 SoC (4*CortexA9 SMP). After putting any
non-zero CPU offline, it is no longer possible to get it online again.
Same in system suspend/resume - after s2r cycle, only CPU0 is online.
The regression can be observed on Hardkernel's Odroid U3 and Samsung
Trats2 boards.

Any idea how to fix this issue?

> ---
>  arch/arm/include/asm/proc-fns.h | 20 ++++++++++++++++++++
>  arch/arm/kernel/setup.c         |  5 +++++
>  arch/arm/kernel/smp.c           | 31 +++++++++++++++++++++++++++++++
>  arch/arm/mm/proc-v7-bugs.c      | 17 ++---------------
>  4 files changed, 58 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
> index c259cc49c641..fe58fb3c4b4e 100644
> --- a/arch/arm/include/asm/proc-fns.h
> +++ b/arch/arm/include/asm/proc-fns.h
> @@ -104,12 +104,32 @@ extern void cpu_do_resume(void *);
>  #else
>  
>  extern struct processor processor;
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +#include <linux/smp.h>
> +/*
> + * This can't be a per-cpu variable because we need to access it before
> + * per-cpu has been initialised.
> + */
> +extern struct processor *cpu_vtable[];
> +#define PROC_VTABLE(f)			cpu_vtable[smp_processor_id()]->f
> +#define PROC_TABLE(f)			cpu_vtable[0]->f
> +static inline void init_proc_vtable(const struct processor *p)
> +{
> +	unsigned int cpu = smp_processor_id();
> +	*cpu_vtable[cpu] = *p;
> +	WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area !=
> +		     cpu_vtable[0]->dcache_clean_area);
> +	WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext !=
> +		     cpu_vtable[0]->set_pte_ext);
> +}
> +#else
>  #define PROC_VTABLE(f)			processor.f
>  #define PROC_TABLE(f)			processor.f
>  static inline void init_proc_vtable(const struct processor *p)
>  {
>  	processor = *p;
>  }
> +#endif
>  
>  #define cpu_proc_init			PROC_VTABLE(_proc_init)
>  #define cpu_check_bugs			PROC_VTABLE(check_bugs)
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index c214bd14a1fe..cd46a595422c 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2);
>  
>  #ifdef MULTI_CPU
>  struct processor processor __ro_after_init;
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +struct processor *cpu_vtable[NR_CPUS] = {
> +	[0] = &processor,
> +};
> +#endif
>  #endif
>  #ifdef MULTI_TLB
>  struct cpu_tlb_fns cpu_tlb __ro_after_init;
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index 5ad0b67b9e33..82b879db32ee 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -42,6 +42,7 @@
>  #include <asm/mmu_context.h>
>  #include <asm/pgtable.h>
>  #include <asm/pgalloc.h>
> +#include <asm/procinfo.h>
>  #include <asm/processor.h>
>  #include <asm/sections.h>
>  #include <asm/tlbflush.h>
> @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd)
>  #endif
>  }
>  
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +static int secondary_biglittle_prepare(unsigned int cpu)
> +{
> +	if (!cpu_vtable[cpu])
> +		cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL);
> +
> +	return cpu_vtable[cpu] ? 0 : -ENOMEM;
> +}
> +
> +static void secondary_biglittle_init(void)
> +{
> +	init_proc_vtable(lookup_processor(read_cpuid_id())->proc);
> +}
> +#else
> +static int secondary_biglittle_prepare(unsigned int cpu)
> +{
> +	return 0;
> +}
> +
> +static void secondary_biglittle_init(void)
> +{
> +}
> +#endif
> +
>  int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  {
>  	int ret;
> @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  	if (!smp_ops.smp_boot_secondary)
>  		return -ENOSYS;
>  
> +	ret = secondary_biglittle_prepare(cpu);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * We need to tell the secondary core where to find
>  	 * its stack and the page tables.
> @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void)
>  	struct mm_struct *mm = &init_mm;
>  	unsigned int cpu;
>  
> +	secondary_biglittle_init();
> +
>  	/*
>  	 * The identity mapping is uncached (strongly ordered), so
>  	 * switch away from it before attempting any exclusive accesses.
> diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
> index 5544b82a2e7a..9a07916af8dd 100644
> --- a/arch/arm/mm/proc-v7-bugs.c
> +++ b/arch/arm/mm/proc-v7-bugs.c
> @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void)
>  	case ARM_CPU_PART_CORTEX_A17:
>  	case ARM_CPU_PART_CORTEX_A73:
>  	case ARM_CPU_PART_CORTEX_A75:
> -		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
> -			goto bl_error;
>  		per_cpu(harden_branch_predictor_fn, cpu) =
>  			harden_branch_predictor_bpiall;
>  		spectre_v2_method = "BPIALL";
> @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void)
>  
>  	case ARM_CPU_PART_CORTEX_A15:
>  	case ARM_CPU_PART_BRAHMA_B15:
> -		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
> -			goto bl_error;
>  		per_cpu(harden_branch_predictor_fn, cpu) =
>  			harden_branch_predictor_iciallu;
>  		spectre_v2_method = "ICIALLU";
> @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void)
>  					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
>  			if ((int)res.a0 != 0)
>  				break;
> -			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
> -				goto bl_error;
>  			per_cpu(harden_branch_predictor_fn, cpu) =
>  				call_hvc_arch_workaround_1;
> -			processor.switch_mm = cpu_v7_hvc_switch_mm;
> +			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
>  			spectre_v2_method = "hypervisor";
>  			break;
>  
> @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void)
>  					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
>  			if ((int)res.a0 != 0)
>  				break;
> -			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
> -				goto bl_error;
>  			per_cpu(harden_branch_predictor_fn, cpu) =
>  				call_smc_arch_workaround_1;
> -			processor.switch_mm = cpu_v7_smc_switch_mm;
> +			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
>  			spectre_v2_method = "firmware";
>  			break;
>  
> @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void)
>  	if (spectre_v2_method)
>  		pr_info("CPU%u: Spectre v2: using %s workaround\n",
>  			smp_processor_id(), spectre_v2_method);
> -	return;
> -
> -bl_error:
> -	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
> -		cpu);
>  }
>  #else
>  static void cpu_v7_spectre_init(void)

Best regards
Russell King (Oracle) Dec. 6, 2018, 10 a.m. UTC | #3
On Thu, Dec 06, 2018 at 10:32:56AM +0100, Marek Szyprowski wrote:
> Hi All,
> 
> On 2018-11-06 13:39, Russell King wrote:
> > In big.Little systems, some CPUs require the Spectre workarounds in
> > paths such as the context switch, but other CPUs do not.  In order
> > to handle these differences, we need per-CPU vtables.
> >
> > We are unable to use the kernel's per-CPU variables to support this
> > as per-CPU is not initialised at times when we need access to the
> > vtables, so we have to use an array indexed by logical CPU number.
> >
> > We use an array-of-pointers to avoid having function pointers in
> > the kernel's read/write .data section.
> >
> > Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> 
> Today I've noticed that this patch (which has landed in v4.20-rc3 as
> commit 383fb3ee8024) causes regression in CPU hotplug and suspend/resume
> handling on Samsung Exynos4412 SoC (4*CortexA9 SMP). After putting any
> non-zero CPU offline, it is no longer possible to get it online again.
> Same in system suspend/resume - after s2r cycle, only CPU0 is online.
> The regression can be observed on Hardkernel's Odroid U3 and Samsung
> Trats2 boards.
> 
> Any idea how to fix this issue?

This is getting _really_ annoying.  I send the patches out, I ask for
people to test, they seem to ignore the patches, and then they later
report problems.  This is the _second_ time that this has happened,
both times with Exynos, the first time it caused the patches to miss
the merge window.  Given that Spectre has to deal with every kernel
entry path, wouldn't it be logical to test every form of entry - boot,
CPU hotplug, suspend/resume etc?

Sorry, but at this point I have little sympathy.

I've no idea based on what you've supplied given that the SoC
maintainers are responsible for writing the code to deal with hotplug
etc, and Exynos's code there is something of a maze.  It's not clear
which bits are being used.  I think you at the very least need to debug
to find out whether the problem is at CPU down or CPU up.

From the ARM architecture point of view, for Cortex A9, all the
processor function instances should be identical.  The only difference
as a result of the patch is that we'll be calling smp_processor_id()
early (which should be fine), and indirecting through the cpu_vtable[]
array rather than merely dereferencing the processor struct.

What about checking dmesg - messages from offline CPUs do not appear
on the console(s) but are still logged in the kernel log.

You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
always access cpu_vtable[0]) to see whether it's the smp_processor_id()
that's causing your problem or not.  If it is, then try and work out
which of the processor functions is causing it by restoring
PROC_VTABLE() and then switching each from PROC_VTABLE() to
PROC_TABLE() until it does work.
Krzysztof Kozlowski Dec. 6, 2018, 10:24 a.m. UTC | #4
On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
>
> On Thu, Dec 06, 2018 at 10:32:56AM +0100, Marek Szyprowski wrote:
> > Hi All,
> >
> > On 2018-11-06 13:39, Russell King wrote:
> > > In big.Little systems, some CPUs require the Spectre workarounds in
> > > paths such as the context switch, but other CPUs do not.  In order
> > > to handle these differences, we need per-CPU vtables.
> > >
> > > We are unable to use the kernel's per-CPU variables to support this
> > > as per-CPU is not initialised at times when we need access to the
> > > vtables, so we have to use an array indexed by logical CPU number.
> > >
> > > We use an array-of-pointers to avoid having function pointers in
> > > the kernel's read/write .data section.
> > >
> > > Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> >
> > Today I've noticed that this patch (which has landed in v4.20-rc3 as
> > commit 383fb3ee8024) causes regression in CPU hotplug and suspend/resume
> > handling on Samsung Exynos4412 SoC (4*CortexA9 SMP). After putting any
> > non-zero CPU offline, it is no longer possible to get it online again.
> > Same in system suspend/resume - after s2r cycle, only CPU0 is online.
> > The regression can be observed on Hardkernel's Odroid U3 and Samsung
> > Trats2 boards.
> >
> > Any idea how to fix this issue?
>
> This is getting _really_ annoying.  I send the patches out, I ask for
> people to test, they seem to ignore the patches, and then they later
> report problems.  This is the _second_ time that this has happened,
> both times with Exynos, the first time it caused the patches to miss
> the merge window.  Given that Spectre has to deal with every kernel
> entry path, wouldn't it be logical to test every form of entry - boot,
> CPU hotplug, suspend/resume etc?
>
> Sorry, but at this point I have little sympathy.

No one here was accusing anyone so there is really no reason from your
side to respond such.

You sent v2 when I was on holiday. When I returned, I had one day for
response/testing and it ended up sent to Linus for v4.20-rc3. Pretty
late in RC cycle to send fix for something not broken during merge
window... but it happens. We just reported a problem and asked about
help in debugging. Really there is no need to feel blamed for
anything...

We have automated testing setups for linux-next but... linux-next is a
lot of times unbootable. These days - DMA. Before - multiple times NFS
was failing (which my setup uses). Other cases I do not even
remember... and we spend significant time on bisecting these little
breaks all over the kernel so we could test linux-next. The point is
that patch really has to stay in linux-next for some time so the
systems will be able to test it. If you leave it there for two weeks,
then you might hit "unbootable period of linux-next". At least that's
my impression.

> I've no idea based on what you've supplied given that the SoC
> maintainers are responsible for writing the code to deal with hotplug
> etc, and Exynos's code there is something of a maze.  It's not clear
> which bits are being used.  I think you at the very least need to debug
> to find out whether the problem is at CPU down or CPU up.
>
> From the ARM architecture point of view, for Cortex A9, all the
> processor function instances should be identical.  The only difference
> as a result of the patch is that we'll be calling smp_processor_id()
> early (which should be fine), and indirecting through the cpu_vtable[]
> array rather than merely dereferencing the processor struct.
>
> What about checking dmesg - messages from offline CPUs do not appear
> on the console(s) but are still logged in the kernel log.
>
> You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> that's causing your problem or not.  If it is, then try and work out
> which of the processor functions is causing it by restoring
> PROC_VTABLE() and then switching each from PROC_VTABLE() to
> PROC_TABLE() until it does work.

Thanks for hints!

Best regards,
Krzysztof
Russell King (Oracle) Dec. 6, 2018, 12:39 p.m. UTC | #5
On Thu, Dec 06, 2018 at 11:24:27AM +0100, Krzysztof Kozlowski wrote:
> On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> > I've no idea based on what you've supplied given that the SoC
> > maintainers are responsible for writing the code to deal with hotplug
> > etc, and Exynos's code there is something of a maze.  It's not clear
> > which bits are being used.  I think you at the very least need to debug
> > to find out whether the problem is at CPU down or CPU up.
> >
> > From the ARM architecture point of view, for Cortex A9, all the
> > processor function instances should be identical.  The only difference
> > as a result of the patch is that we'll be calling smp_processor_id()
> > early (which should be fine), and indirecting through the cpu_vtable[]
> > array rather than merely dereferencing the processor struct.
> >
> > What about checking dmesg - messages from offline CPUs do not appear
> > on the console(s) but are still logged in the kernel log.
> >
> > You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> > always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> > that's causing your problem or not.  If it is, then try and work out
> > which of the processor functions is causing it by restoring
> > PROC_VTABLE() and then switching each from PROC_VTABLE() to
> > PROC_TABLE() until it does work.
> 
> Thanks for hints!

So I can plan, how long do you think it will take to get some results
from my suggestions above?
Krzysztof Kozlowski Dec. 6, 2018, 1:54 p.m. UTC | #6
On Thu, 6 Dec 2018 at 13:40, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
>
> On Thu, Dec 06, 2018 at 11:24:27AM +0100, Krzysztof Kozlowski wrote:
> > On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
> > <linux@armlinux.org.uk> wrote:
> > > I've no idea based on what you've supplied given that the SoC
> > > maintainers are responsible for writing the code to deal with hotplug
> > > etc, and Exynos's code there is something of a maze.  It's not clear
> > > which bits are being used.  I think you at the very least need to debug
> > > to find out whether the problem is at CPU down or CPU up.
> > >
> > > From the ARM architecture point of view, for Cortex A9, all the
> > > processor function instances should be identical.  The only difference
> > > as a result of the patch is that we'll be calling smp_processor_id()
> > > early (which should be fine), and indirecting through the cpu_vtable[]
> > > array rather than merely dereferencing the processor struct.
> > >
> > > What about checking dmesg - messages from offline CPUs do not appear
> > > on the console(s) but are still logged in the kernel log.
> > >
> > > You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> > > always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> > > that's causing your problem or not.  If it is, then try and work out
> > > which of the processor functions is causing it by restoring
> > > PROC_VTABLE() and then switching each from PROC_VTABLE() to
> > > PROC_TABLE() until it does work.
> >
> > Thanks for hints!
>
> So I can plan, how long do you think it will take to get some results
> from my suggestions above?

For Suspend to RAM, on v4.20-rc3, this warning appears:
[   84.046722] WARNING: CPU: 1 PID: 0 at
../arch/arm/include/asm/proc-fns.h:124
secondary_start_kernel+0x214/0x26c
(difference between dcache_clean_area)
The secondary CPUs bringup fails with ETIMEDOUT.

Replacing all PROC_VTABLE -> PROC_TABLE improves a little bit - one
secondary CPU comes up, other timeout.
[   58.087004] Enabling non-boot CPUs ...
[   58.089116] ------------[ cut here ]------------
[   58.089138] WARNING: CPU: 1 PID: 0 at
../arch/arm/include/asm/proc-fns.h:124
secondary_start_kernel+0x208/0x264
[   58.089143] Modules linked in:
[   59.084017] CPU1: failed to boot: -110
[   59.085709] Error taking CPU1 up: -110
[   59.087667] ------------[ cut here ]------------
[   59.087683] WARNING: CPU: 2 PID: 0 at
../arch/arm/include/asm/proc-fns.h:126
secondary_start_kernel+0x240/0x264
[   59.087688] Modules linked in:
[   60.086049] CPU2: failed to boot: -110
[   60.086818] Error taking CPU2 up: -110
[   60.091198] CPU3 is up

For hotplug, making CPU online either fails with ETIMEDOUT or with segfault:
[   68.402951] WARNING: CPU: 1 PID: 0 at
../arch/arm/include/asm/proc-fns.h:126
secondary_start_kernel+0x24c/0x26c
[   68.402955] Modules linked in: s5p_cec
[   68.402966] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W
  4.20.0-rc3 #149
[   68.402970] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[   68.402981] [<c01122f8>] (unwind_backtrace) from [<c010e094>]
(show_stack+0x10/0x14)
[   68.402990] [<c010e094>] (show_stack) from [<c0a3987c>]
(dump_stack+0x98/0xc4)
[   68.402999] [<c0a3987c>] (dump_stack) from [<c0126ccc>] (__warn+0x10c/0x124)
[   68.403008] [<c0126ccc>] (__warn) from [<c0126dfc>]
(warn_slowpath_null+0x40/0x48)
[   68.403017] [<c0126dfc>] (warn_slowpath_null) from [<c0110c1c>]
(secondary_start_kernel+0x24c/0x26c)
[   68.403027] [<c0110c1c>] (secondary_start_kernel) from [<c01109a0>]
(arch_cpu_idle_dead+0x54/0x84)
[   68.403032] irq event stamp: 1028918
[   68.403039] hardirqs last  enabled at (1028917): [<c0a5b7cc>]
_raw_spin_unlock_irqrestore+0x6c/0x74
[   68.403047] hardirqs last disabled at (1028918): [<c0110964>]
arch_cpu_idle_dead+0x18/0x84
[   68.403054] softirqs last  enabled at (1028898): [<c012ecec>]
irq_enter+0x78/0x80
[   68.403061] softirqs last disabled at (1028897): [<c012ecd8>]
irq_enter+0x64/0x80
[   68.403065] ---[ end trace be591873dbd106ff ]---
[   68.403075] Unable to handle kernel paging request at virtual
address e7fddef0
[   68.403082] pgd = 06aec1fd
[   68.403086] [e7fddef0] *pgd=67e1141e(bad)
[   68.403097] Internal error: Oops: 8000000d [#1] PREEMPT SMP ARM
[   68.403103] Modules linked in: s5p_cec
[   68.403114] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G        W
  4.20.0-rc3 #149
[   68.403118] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[   68.403124] PC is at 0xe7fddef0
[   68.403131] LR is at secondary_start_kernel+0x98/0x26c
[   68.403137] pc : [<e7fddef0>]    lr : [<c0110a68>]    psr: 200f00d3
[   68.403142] sp : ee925fe8  ip : 00000000  fp : 00000000
[   68.403147] r10: c0d6a088  r9 : c0d6a088  r8 : 4000406a
[   68.403153] r7 : 00000002  r6 : 00000001  r5 : c100c67c  r4 : c1025ab0
[   68.403158] r3 : e7fddef0  r2 : ee8ab440  r1 : c1025ab0  r0 : 40004000
[   68.403165] Flags: nzCv  IRQs off  FIQs off  Mode SVC_32  ISA ARM
Segment none
[   68.403171] Control: 10c5387d  Table: 4000404a  DAC: 00000051
[   68.403177] Process swapper/1 (pid: 0, stack limit = 0xddfe9e0d)
[   68.403183] Stack: (0xee925fe8 to 0xee926000)
[   68.403192] 5fe0:                   00000001 c1007470 c10074b4
c01109a0 00000000 00000000
[   68.403203] [<c0110a68>] (secondary_start_kernel) from [<c01109a0>]
(arch_cpu_idle_dead+0x54/0x84)
[   68.403213] Code: 00004000 04000000 00000000 00000000 (00000001)
[   68.403221] ---[ end trace be591873dbd10700 ]---
[   68.403228] Kernel panic - not syncing: Attempted to kill the idle task!

The c0110a68 from secondary_start_kernel pointed to local_flush_bp_all
arch/arm/include/asm/tlbflush.h:550

I need definitely more time for narrowing this down. It might be that
some other changes are affecting this as well.

Best regards,
Krzysztof
Russell King (Oracle) Dec. 6, 2018, 2:07 p.m. UTC | #7
On Thu, Dec 06, 2018 at 02:54:10PM +0100, Krzysztof Kozlowski wrote:
> On Thu, 6 Dec 2018 at 13:40, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> >
> > On Thu, Dec 06, 2018 at 11:24:27AM +0100, Krzysztof Kozlowski wrote:
> > > On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
> > > <linux@armlinux.org.uk> wrote:
> > > > I've no idea based on what you've supplied given that the SoC
> > > > maintainers are responsible for writing the code to deal with hotplug
> > > > etc, and Exynos's code there is something of a maze.  It's not clear
> > > > which bits are being used.  I think you at the very least need to debug
> > > > to find out whether the problem is at CPU down or CPU up.
> > > >
> > > > From the ARM architecture point of view, for Cortex A9, all the
> > > > processor function instances should be identical.  The only difference
> > > > as a result of the patch is that we'll be calling smp_processor_id()
> > > > early (which should be fine), and indirecting through the cpu_vtable[]
> > > > array rather than merely dereferencing the processor struct.
> > > >
> > > > What about checking dmesg - messages from offline CPUs do not appear
> > > > on the console(s) but are still logged in the kernel log.
> > > >
> > > > You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> > > > always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> > > > that's causing your problem or not.  If it is, then try and work out
> > > > which of the processor functions is causing it by restoring
> > > > PROC_VTABLE() and then switching each from PROC_VTABLE() to
> > > > PROC_TABLE() until it does work.
> > >
> > > Thanks for hints!
> >
> > So I can plan, how long do you think it will take to get some results
> > from my suggestions above?
> 
> For Suspend to RAM, on v4.20-rc3, this warning appears:
> [   84.046722] WARNING: CPU: 1 PID: 0 at
> ../arch/arm/include/asm/proc-fns.h:124
> secondary_start_kernel+0x214/0x26c
> (difference between dcache_clean_area)
> The secondary CPUs bringup fails with ETIMEDOUT.

That basically means that the dcache_clean_area method for CPU1
differs from the dcache_clean_area method for CPU0.  If all your
CPUs are identical, that definitely should not be happening.

Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.

Please try this patch.

 arch/arm/mm/proc-arm1020.S  | 6 ++----
 arch/arm/mm/proc-arm1020e.S | 5 ++---
 arch/arm/mm/proc-arm1022.S  | 5 ++---
 arch/arm/mm/proc-arm1026.S  | 5 ++---
 arch/arm/mm/proc-arm720.S   | 5 ++---
 arch/arm/mm/proc-arm740.S   | 4 +---
 arch/arm/mm/proc-arm7tdmi.S | 4 +---
 arch/arm/mm/proc-arm920.S   | 5 ++---
 arch/arm/mm/proc-arm922.S   | 5 ++---
 arch/arm/mm/proc-arm925.S   | 5 ++---
 arch/arm/mm/proc-arm926.S   | 4 +---
 arch/arm/mm/proc-arm940.S   | 4 +---
 arch/arm/mm/proc-arm946.S   | 4 +---
 arch/arm/mm/proc-arm9tdmi.S | 4 +---
 arch/arm/mm/proc-fa526.S    | 4 +---
 arch/arm/mm/proc-feroceon.S | 4 +---
 arch/arm/mm/proc-mohawk.S   | 4 +---
 arch/arm/mm/proc-sa110.S    | 4 +---
 arch/arm/mm/proc-sa1100.S   | 5 +----
 arch/arm/mm/proc-v6.S       | 4 +---
 arch/arm/mm/proc-v7.S       | 4 +---
 arch/arm/mm/proc-v7m.S      | 3 ++-
 arch/arm/mm/proc-xsc3.S     | 4 +---
 arch/arm/mm/proc-xscale.S   | 4 +---
 24 files changed, 33 insertions(+), 72 deletions(-)

diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 774ef1323554..48e72b1a6351 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -470,13 +470,11 @@ ENTRY(cpu_arm1020_set_pte_ext)
 arm1020_crval:
 	crval	clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort
 
-
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5t"
 	string	cpu_elf_name, "v5"
 
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index ae3c27b71594..1efc99b158b4 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -451,12 +451,11 @@ ENTRY(cpu_arm1020e_set_pte_ext)
 arm1020e_crval:
 	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
 	string	cpu_arm1020e_name, "ARM1020E"
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index dbb2413fe04d..84b911236d9c 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -436,12 +436,11 @@ ENTRY(cpu_arm1022_set_pte_ext)
 arm1022_crval:
 	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
 	string	cpu_arm1022_name, "ARM1022"
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 0b37b2cef9d3..802a54128041 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -430,12 +430,11 @@ ENTRY(cpu_arm1026_set_pte_ext)
 arm1026_crval:
 	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934
 
-	__INITDATA
+	.section .rodata
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort
 
-	.section .rodata
-
 	string	cpu_arch_name, "armv5tej"
 	string	cpu_elf_name, "v5"
 	.align
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index 3651cd70e418..6b2215ea8b25 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -169,12 +169,11 @@ ENDPROC(cpu_arm720_reset)
 arm720_crval:
 	crval	clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130
 
-		__INITDATA
+		.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort
 
-		.section ".rodata"
-
 	string	cpu_arch_name, "armv4t"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm710_name, "ARM710T"
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 024fb7732407..95a3a2ea0076 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -119,13 +119,11 @@ ENDPROC(cpu_arm740_reset)
 
 	.size	__arm740_setup, . - __arm740_setup
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm740_name, "ARM740T"
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 25472d94426d..32e55e146ce1 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -56,13 +56,11 @@ ENDPROC(cpu_arm7tdmi_reset)
 		ret	lr
 		.size	__arm7tdmi_setup, . - __arm7tdmi_setup
 
-		__INITDATA
+		.section ".rodata"
 
 		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 		define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
 
-		.section ".rodata"
-
 		string	cpu_arch_name, "armv4t"
 		string	cpu_elf_name, "v4"
 		string	cpu_arm7tdmi_name, "ARM7TDMI"
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 7a14bd4414c9..68ffbdafe3bd 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -436,12 +436,11 @@ ENDPROC(cpu_arm920_do_resume)
 arm920_crval:
 	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4t"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm920_name, "ARM920T"
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index edccfcdcd551..dddff4955e63 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -414,12 +414,11 @@ ENTRY(cpu_arm922_set_pte_ext)
 arm922_crval:
 	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4t"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm922_name, "ARM922T"
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index 32a47cc19076..0a5006a04fb9 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -479,12 +479,11 @@ ENTRY(cpu_arm925_set_pte_ext)
 arm925_crval:
 	crval	clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130
 
-	__INITDATA
+	.section ".rodata"
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4t"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm925_name, "ARM925T"
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index fb827c633693..e6582a96c11f 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -461,13 +461,11 @@ ENDPROC(cpu_arm926_do_resume)
 arm926_crval:
 	crval	clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5tej"
 	string	cpu_elf_name, "v5"
 	string	cpu_arm926_name, "ARM926EJ-S"
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index ee5b66f847c4..afb2d8018bbf 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -331,13 +331,11 @@ ENDPROC(arm940_dma_unmap_area)
 
 	.size	__arm940_setup, . - __arm940_setup
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4t"
 	string	cpu_elf_name, "v4"
 	string	cpu_arm940_name, "ARM940T"
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 7361837edc31..bff88a20c37d 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -386,13 +386,11 @@ ENTRY(cpu_arm946_dcache_clean_area)
 
 	.size	__arm946_setup, . - __arm946_setup
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5t"
 	string	cpu_arm946_name, "ARM946E-S"
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index 7fac8c612134..06b3503b5513 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -56,13 +56,11 @@ ENDPROC(cpu_arm9tdmi_reset)
 		ret	lr
 		.size	__arm9tdmi_setup, . - __arm9tdmi_setup
 
-		__INITDATA
+		.section ".rodata"
 
 		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 		define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
-		.section ".rodata"
-
 		string	cpu_arch_name, "armv4t"
 		string	cpu_elf_name, "v4"
 		string	cpu_arm9tdmi_name, "ARM9TDMI"
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
index 4001b73af4ee..c54d26708961 100644
--- a/arch/arm/mm/proc-fa526.S
+++ b/arch/arm/mm/proc-fa526.S
@@ -177,13 +177,11 @@ ENTRY(cpu_fa526_set_pte_ext)
 fa526_cr1_set:
 	.word	0x397D
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4"
 	string	cpu_elf_name, "v4"
 	string	cpu_fa526_name, "FA526"
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index 92e08bf37aad..f2e4ee0de8ba 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -568,13 +568,11 @@ ENDPROC(cpu_feroceon_do_resume)
 feroceon_crval:
 	crval	clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
 	string	cpu_feroceon_name, "Feroceon"
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
index 6f07d2ef4ff2..6de3bb48ede4 100644
--- a/arch/arm/mm/proc-mohawk.S
+++ b/arch/arm/mm/proc-mohawk.S
@@ -416,13 +416,11 @@ ENDPROC(cpu_mohawk_do_resume)
 mohawk_crval:
 	crval	clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
 	string	cpu_mohawk_name, "Marvell 88SV331x"
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index ee2ce496239f..73818d7c19d2 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -186,13 +186,11 @@ ENTRY(cpu_sa110_set_pte_ext)
 sa110_crval:
 	crval	clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4"
 	string	cpu_elf_name, "v4"
 	string	cpu_sa110_name, "StrongARM-110"
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 222d5836f666..377b06831b4c 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -224,17 +224,14 @@ ENDPROC(cpu_sa1100_do_resume)
 sa1100_crval:
 	crval	clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130
 
-	__INITDATA
-
 /*
  * SA1100 and SA1110 share the same function calls
  */
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv4"
 	string	cpu_elf_name, "v4"
 	string	cpu_sa1100_name, "StrongARM-1100"
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 06d890a2342b..7d18cda06b35 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -253,13 +253,11 @@ ENDPROC(cpu_v6_do_resume)
 v6_crval:
 	crval	clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv6"
 	string	cpu_elf_name, "v6"
 	.align
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 339eb17c9808..bc028c5f262a 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -552,7 +552,7 @@ ENDPROC(__v7_setup)
 __v7_setup_stack:
 	.space	4 * 7				@ 7 registers
 
-	__INITDATA
+	.section ".rodata"
 
 	.weak cpu_v7_bugs_init
 
@@ -631,8 +631,6 @@ ENDPROC(__v7_setup)
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv7"
 	string	cpu_elf_name, "v7"
 	.align
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 47a5acc64433..8daf1b320fb4 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -166,6 +166,8 @@ ENDPROC(__v7m_setup)
 /*
  * Cortex-M7 processor functions
  */
+	.section ".rodata"
+
 	globl_equ	cpu_cm7_proc_init,	cpu_v7m_proc_init
 	globl_equ	cpu_cm7_reset,		cpu_v7m_reset
 	globl_equ	cpu_cm7_do_idle,	cpu_v7m_do_idle
@@ -174,7 +176,6 @@ ENDPROC(__v7m_setup)
 	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 	define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
 
-	.section ".rodata"
 	string cpu_arch_name, "armv7m"
 	string cpu_elf_name "v7m"
 	string cpu_v7m_name "ARMv7-M"
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 293dcc2c441f..ceffd974dd30 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -486,13 +486,11 @@ ENDPROC(cpu_xsc3_do_resume)
 xsc3_crval:
 	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
 	string	cpu_xsc3_name, "XScale-V3 based processor"
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 3d75b7972fd1..27e1a9df561e 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -586,13 +586,11 @@ ENDPROC(cpu_xscale_do_resume)
 xscale_crval:
 	crval	clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900
 
-	__INITDATA
+	.section ".rodata"
 
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
 	define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
 
-	.section ".rodata"
-
 	string	cpu_arch_name, "armv5te"
 	string	cpu_elf_name, "v5"
Krzysztof Kozlowski Dec. 6, 2018, 2:30 p.m. UTC | #8
On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
>
> On Thu, Dec 06, 2018 at 02:54:10PM +0100, Krzysztof Kozlowski wrote:
> > On Thu, 6 Dec 2018 at 13:40, Russell King - ARM Linux
> > <linux@armlinux.org.uk> wrote:
> > >
> > > On Thu, Dec 06, 2018 at 11:24:27AM +0100, Krzysztof Kozlowski wrote:
> > > > On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
> > > > <linux@armlinux.org.uk> wrote:
> > > > > I've no idea based on what you've supplied given that the SoC
> > > > > maintainers are responsible for writing the code to deal with hotplug
> > > > > etc, and Exynos's code there is something of a maze.  It's not clear
> > > > > which bits are being used.  I think you at the very least need to debug
> > > > > to find out whether the problem is at CPU down or CPU up.
> > > > >
> > > > > From the ARM architecture point of view, for Cortex A9, all the
> > > > > processor function instances should be identical.  The only difference
> > > > > as a result of the patch is that we'll be calling smp_processor_id()
> > > > > early (which should be fine), and indirecting through the cpu_vtable[]
> > > > > array rather than merely dereferencing the processor struct.
> > > > >
> > > > > What about checking dmesg - messages from offline CPUs do not appear
> > > > > on the console(s) but are still logged in the kernel log.
> > > > >
> > > > > You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> > > > > always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> > > > > that's causing your problem or not.  If it is, then try and work out
> > > > > which of the processor functions is causing it by restoring
> > > > > PROC_VTABLE() and then switching each from PROC_VTABLE() to
> > > > > PROC_TABLE() until it does work.
> > > >
> > > > Thanks for hints!
> > >
> > > So I can plan, how long do you think it will take to get some results
> > > from my suggestions above?
> >
> > For Suspend to RAM, on v4.20-rc3, this warning appears:
> > [   84.046722] WARNING: CPU: 1 PID: 0 at
> > ../arch/arm/include/asm/proc-fns.h:124
> > secondary_start_kernel+0x214/0x26c
> > (difference between dcache_clean_area)
> > The secondary CPUs bringup fails with ETIMEDOUT.
>
> That basically means that the dcache_clean_area method for CPU1
> differs from the dcache_clean_area method for CPU0.  If all your
> CPUs are identical, that definitely should not be happening.
>
> Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
>
> Please try this patch.

This fixes both hotplug and suspend to RAM. I was trying to narrow why
the pointers to all processor functions differ. During first boot they
were OK but it seems they were changed just before suspend.

Best regards,
Krzysztof
Russell King (Oracle) Dec. 6, 2018, 2:37 p.m. UTC | #9
On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
> On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> >
> > On Thu, Dec 06, 2018 at 02:54:10PM +0100, Krzysztof Kozlowski wrote:
> > > On Thu, 6 Dec 2018 at 13:40, Russell King - ARM Linux
> > > <linux@armlinux.org.uk> wrote:
> > > >
> > > > On Thu, Dec 06, 2018 at 11:24:27AM +0100, Krzysztof Kozlowski wrote:
> > > > > On Thu, 6 Dec 2018 at 11:01, Russell King - ARM Linux
> > > > > <linux@armlinux.org.uk> wrote:
> > > > > > I've no idea based on what you've supplied given that the SoC
> > > > > > maintainers are responsible for writing the code to deal with hotplug
> > > > > > etc, and Exynos's code there is something of a maze.  It's not clear
> > > > > > which bits are being used.  I think you at the very least need to debug
> > > > > > to find out whether the problem is at CPU down or CPU up.
> > > > > >
> > > > > > From the ARM architecture point of view, for Cortex A9, all the
> > > > > > processor function instances should be identical.  The only difference
> > > > > > as a result of the patch is that we'll be calling smp_processor_id()
> > > > > > early (which should be fine), and indirecting through the cpu_vtable[]
> > > > > > array rather than merely dereferencing the processor struct.
> > > > > >
> > > > > > What about checking dmesg - messages from offline CPUs do not appear
> > > > > > on the console(s) but are still logged in the kernel log.
> > > > > >
> > > > > > You could try making PROC_VTABLE() the same as PROC_TABLE() (iow,
> > > > > > always access cpu_vtable[0]) to see whether it's the smp_processor_id()
> > > > > > that's causing your problem or not.  If it is, then try and work out
> > > > > > which of the processor functions is causing it by restoring
> > > > > > PROC_VTABLE() and then switching each from PROC_VTABLE() to
> > > > > > PROC_TABLE() until it does work.
> > > > >
> > > > > Thanks for hints!
> > > >
> > > > So I can plan, how long do you think it will take to get some results
> > > > from my suggestions above?
> > >
> > > For Suspend to RAM, on v4.20-rc3, this warning appears:
> > > [   84.046722] WARNING: CPU: 1 PID: 0 at
> > > ../arch/arm/include/asm/proc-fns.h:124
> > > secondary_start_kernel+0x214/0x26c
> > > (difference between dcache_clean_area)
> > > The secondary CPUs bringup fails with ETIMEDOUT.
> >
> > That basically means that the dcache_clean_area method for CPU1
> > differs from the dcache_clean_area method for CPU0.  If all your
> > CPUs are identical, that definitely should not be happening.
> >
> > Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
> >
> > Please try this patch.
> 
> This fixes both hotplug and suspend to RAM. I was trying to narrow why
> the pointers to all processor functions differ. During first boot they
> were OK but it seems they were changed just before suspend.

Thanks for testing.  I think this is probably a better patch which
should end up with the same result.

I suspect no one else has noticed because most people have big.Little
support disabled - that'd explain why it doesn't show up on OMAP4.

diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 81d0efb055c6..44f9776139a8 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -274,6 +274,13 @@
 	.endm
 
 .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
+/*
+ * If we are building for big.Little with branch predictor hardening,
+ * we need the processor function tables to remain available after boot.
+ */
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+	.rodata
+#endif
 	.type	\name\()_processor_functions, #object
 	.align 2
 ENTRY(\name\()_processor_functions)
@@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
 	.endif
 
 	.size	\name\()_processor_functions, . - \name\()_processor_functions
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+	.previous
+#endif
 .endm
 
 .macro define_cache_functions name:req
Krzysztof Kozlowski Dec. 6, 2018, 3:03 p.m. UTC | #10
On Thu, 6 Dec 2018 at 15:37, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
>
> On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
> > On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
> > > That basically means that the dcache_clean_area method for CPU1
> > > differs from the dcache_clean_area method for CPU0.  If all your
> > > CPUs are identical, that definitely should not be happening.
> > >
> > > Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
> > >
> > > Please try this patch.
> >
> > This fixes both hotplug and suspend to RAM. I was trying to narrow why
> > the pointers to all processor functions differ. During first boot they
> > were OK but it seems they were changed just before suspend.
>
> Thanks for testing.  I think this is probably a better patch which
> should end up with the same result.
>
> I suspect no one else has noticed because most people have big.Little
> support disabled - that'd explain why it doesn't show up on OMAP4.
>
> diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> index 81d0efb055c6..44f9776139a8 100644
> --- a/arch/arm/mm/proc-macros.S
> +++ b/arch/arm/mm/proc-macros.S
> @@ -274,6 +274,13 @@
>         .endm
>
>  .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
> +/*
> + * If we are building for big.Little with branch predictor hardening,
> + * we need the processor function tables to remain available after boot.
> + */
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +       .rodata
> +#endif
>         .type   \name\()_processor_functions, #object
>         .align 2
>  ENTRY(\name\()_processor_functions)
> @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
>         .endif
>
>         .size   \name\()_processor_functions, . - \name\()_processor_functions
> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> +       .previous
> +#endif
>  .endm
>
>  .macro define_cache_functions name:req

Does not compile:
../arch/arm/mm/proc-v7.S: Assembler messages:
../arch/arm/mm/proc-v7.S:560: Error: unknown pseudo-op: `.rodata'
../arch/arm/mm/proc-v7.S:575: Error: unknown pseudo-op: `.rodata'
../arch/arm/mm/proc-v7.S:596: Error: unknown pseudo-op: `.rodata'
../arch/arm/mm/proc-v7.S:611: Error: unknown pseudo-op: `.rodata'
../arch/arm/mm/proc-v7.S:629: Error: unknown pseudo-op: `.rodata'

I am building with exynos_defconfig.

Best regards,
Krzysztof
Russell King (Oracle) Dec. 6, 2018, 3:31 p.m. UTC | #11
On Thu, Dec 06, 2018 at 04:03:27PM +0100, Krzysztof Kozlowski wrote:
> On Thu, 6 Dec 2018 at 15:37, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> >
> > On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
> > > On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
> > > > That basically means that the dcache_clean_area method for CPU1
> > > > differs from the dcache_clean_area method for CPU0.  If all your
> > > > CPUs are identical, that definitely should not be happening.
> > > >
> > > > Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
> > > >
> > > > Please try this patch.
> > >
> > > This fixes both hotplug and suspend to RAM. I was trying to narrow why
> > > the pointers to all processor functions differ. During first boot they
> > > were OK but it seems they were changed just before suspend.
> >
> > Thanks for testing.  I think this is probably a better patch which
> > should end up with the same result.
> >
> > I suspect no one else has noticed because most people have big.Little
> > support disabled - that'd explain why it doesn't show up on OMAP4.
> >
> > diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> > index 81d0efb055c6..44f9776139a8 100644
> > --- a/arch/arm/mm/proc-macros.S
> > +++ b/arch/arm/mm/proc-macros.S
> > @@ -274,6 +274,13 @@
> >         .endm
> >
> >  .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
> > +/*
> > + * If we are building for big.Little with branch predictor hardening,
> > + * we need the processor function tables to remain available after boot.
> > + */
> > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > +       .rodata
> > +#endif
> >         .type   \name\()_processor_functions, #object
> >         .align 2
> >  ENTRY(\name\()_processor_functions)
> > @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
> >         .endif
> >
> >         .size   \name\()_processor_functions, . - \name\()_processor_functions
> > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > +       .previous
> > +#endif
> >  .endm
> >
> >  .macro define_cache_functions name:req
> 
> Does not compile:
> ../arch/arm/mm/proc-v7.S: Assembler messages:
> ../arch/arm/mm/proc-v7.S:560: Error: unknown pseudo-op: `.rodata'
> ../arch/arm/mm/proc-v7.S:575: Error: unknown pseudo-op: `.rodata'
> ../arch/arm/mm/proc-v7.S:596: Error: unknown pseudo-op: `.rodata'
> ../arch/arm/mm/proc-v7.S:611: Error: unknown pseudo-op: `.rodata'
> ../arch/arm/mm/proc-v7.S:629: Error: unknown pseudo-op: `.rodata'

It should be .section ".rodata", sorry.
Marek Szyprowski Dec. 6, 2018, 3:54 p.m. UTC | #12
On 2018-12-06 16:31, Russell King - ARM Linux wrote:
> On Thu, Dec 06, 2018 at 04:03:27PM +0100, Krzysztof Kozlowski wrote:
>> On Thu, 6 Dec 2018 at 15:37, Russell King - ARM Linux
>> <linux@armlinux.org.uk> wrote:
>>> On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
>>>> On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
>>>>> That basically means that the dcache_clean_area method for CPU1
>>>>> differs from the dcache_clean_area method for CPU0.  If all your
>>>>> CPUs are identical, that definitely should not be happening.
>>>>>
>>>>> Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
>>>>>
>>>>> Please try this patch.
>>>> This fixes both hotplug and suspend to RAM. I was trying to narrow why
>>>> the pointers to all processor functions differ. During first boot they
>>>> were OK but it seems they were changed just before suspend.
>>> Thanks for testing.  I think this is probably a better patch which
>>> should end up with the same result.
>>>
>>> I suspect no one else has noticed because most people have big.Little
>>> support disabled - that'd explain why it doesn't show up on OMAP4.
>>>
>>> diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
>>> index 81d0efb055c6..44f9776139a8 100644
>>> --- a/arch/arm/mm/proc-macros.S
>>> +++ b/arch/arm/mm/proc-macros.S
>>> @@ -274,6 +274,13 @@
>>>         .endm
>>>
>>>  .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
>>> +/*
>>> + * If we are building for big.Little with branch predictor hardening,
>>> + * we need the processor function tables to remain available after boot.
>>> + */
>>> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
>>> +       .rodata
>>> +#endif
>>>         .type   \name\()_processor_functions, #object
>>>         .align 2
>>>  ENTRY(\name\()_processor_functions)
>>> @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
>>>         .endif
>>>
>>>         .size   \name\()_processor_functions, . - \name\()_processor_functions
>>> +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
>>> +       .previous
>>> +#endif
>>>  .endm
>>>
>>>  .macro define_cache_functions name:req
>> Does not compile:
>> ../arch/arm/mm/proc-v7.S: Assembler messages:
>> ../arch/arm/mm/proc-v7.S:560: Error: unknown pseudo-op: `.rodata'
>> ../arch/arm/mm/proc-v7.S:575: Error: unknown pseudo-op: `.rodata'
>> ../arch/arm/mm/proc-v7.S:596: Error: unknown pseudo-op: `.rodata'
>> ../arch/arm/mm/proc-v7.S:611: Error: unknown pseudo-op: `.rodata'
>> ../arch/arm/mm/proc-v7.S:629: Error: unknown pseudo-op: `.rodata'
> It should be .section ".rodata", sorry.


With the above change, the issue is fixed, both for hotplug and s2r.


Best regards
Krzysztof Kozlowski Dec. 6, 2018, 3:58 p.m. UTC | #13
On Thu, 6 Dec 2018 at 16:31, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
>
> On Thu, Dec 06, 2018 at 04:03:27PM +0100, Krzysztof Kozlowski wrote:
> > On Thu, 6 Dec 2018 at 15:37, Russell King - ARM Linux
> > <linux@armlinux.org.uk> wrote:
> > >
> > > On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
> > > > On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
> > > > > That basically means that the dcache_clean_area method for CPU1
> > > > > differs from the dcache_clean_area method for CPU0.  If all your
> > > > > CPUs are identical, that definitely should not be happening.
> > > > >
> > > > > Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
> > > > >
> > > > > Please try this patch.
> > > >
> > > > This fixes both hotplug and suspend to RAM. I was trying to narrow why
> > > > the pointers to all processor functions differ. During first boot they
> > > > were OK but it seems they were changed just before suspend.
> > >
> > > Thanks for testing.  I think this is probably a better patch which
> > > should end up with the same result.
> > >
> > > I suspect no one else has noticed because most people have big.Little
> > > support disabled - that'd explain why it doesn't show up on OMAP4.
> > >
> > > diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> > > index 81d0efb055c6..44f9776139a8 100644
> > > --- a/arch/arm/mm/proc-macros.S
> > > +++ b/arch/arm/mm/proc-macros.S
> > > @@ -274,6 +274,13 @@
> > >         .endm
> > >
> > >  .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
> > > +/*
> > > + * If we are building for big.Little with branch predictor hardening,
> > > + * we need the processor function tables to remain available after boot.
> > > + */
> > > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > > +       .rodata
> > > +#endif
> > >         .type   \name\()_processor_functions, #object
> > >         .align 2
> > >  ENTRY(\name\()_processor_functions)
> > > @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
> > >         .endif
> > >
> > >         .size   \name\()_processor_functions, . - \name\()_processor_functions
> > > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > > +       .previous
> > > +#endif
> > >  .endm
> > >
> > >  .macro define_cache_functions name:req
> >
> > Does not compile:
> > ../arch/arm/mm/proc-v7.S: Assembler messages:
> > ../arch/arm/mm/proc-v7.S:560: Error: unknown pseudo-op: `.rodata'
> > ../arch/arm/mm/proc-v7.S:575: Error: unknown pseudo-op: `.rodata'
> > ../arch/arm/mm/proc-v7.S:596: Error: unknown pseudo-op: `.rodata'
> > ../arch/arm/mm/proc-v7.S:611: Error: unknown pseudo-op: `.rodata'
> > ../arch/arm/mm/proc-v7.S:629: Error: unknown pseudo-op: `.rodata'
>
> It should be .section ".rodata", sorry.

Tested hotplug and suspend on Odroid U3 (Exynos4412), hotplug on
Odroid XU3 and HC1 (Exynos5422).
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>

On Odroid U3 and HC1 the cpuhotplug_04 from Linaro's PM-QA fails:
cpuhotplug_04.0/cpu1: checking affinity is set...                           Err
v4.19 also fails here.

On HC1 the board hang once during cpuhotplug_05 test
(https://wiki.linaro.org/WorkingGroups/PowerManagement/Resources/TestSuite/PmQaSpecification#cpuhotplug_05).

Both errors above seems to be unrelated to your fixup. If needed I
could do some more testing during the weekend but in general looks
fine for me.

Thanks!

Best regards,
Krzysztof
Ard Biesheuvel Dec. 7, 2018, 9:11 a.m. UTC | #14
On Thu, 6 Dec 2018 at 16:58, Krzysztof Kozlowski <krzk@kernel.org> wrote:
>
> On Thu, 6 Dec 2018 at 16:31, Russell King - ARM Linux
> <linux@armlinux.org.uk> wrote:
> >
> > On Thu, Dec 06, 2018 at 04:03:27PM +0100, Krzysztof Kozlowski wrote:
> > > On Thu, 6 Dec 2018 at 15:37, Russell King - ARM Linux
> > > <linux@armlinux.org.uk> wrote:
> > > >
> > > > On Thu, Dec 06, 2018 at 03:30:22PM +0100, Krzysztof Kozlowski wrote:
> > > > > On Thu, 6 Dec 2018 at 15:07, Russell King - ARM Linux
> > > > > > That basically means that the dcache_clean_area method for CPU1
> > > > > > differs from the dcache_clean_area method for CPU0.  If all your
> > > > > > CPUs are identical, that definitely should not be happening.
> > > > > >
> > > > > > Hmm.  Interestingly, OMAP4430 passes hotplug tests just fine.
> > > > > >
> > > > > > Please try this patch.
> > > > >
> > > > > This fixes both hotplug and suspend to RAM. I was trying to narrow why
> > > > > the pointers to all processor functions differ. During first boot they
> > > > > were OK but it seems they were changed just before suspend.
> > > >
> > > > Thanks for testing.  I think this is probably a better patch which
> > > > should end up with the same result.
> > > >
> > > > I suspect no one else has noticed because most people have big.Little
> > > > support disabled - that'd explain why it doesn't show up on OMAP4.
> > > >
> > > > diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
> > > > index 81d0efb055c6..44f9776139a8 100644
> > > > --- a/arch/arm/mm/proc-macros.S
> > > > +++ b/arch/arm/mm/proc-macros.S
> > > > @@ -274,6 +274,13 @@
> > > >         .endm
> > > >
> > > >  .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
> > > > +/*
> > > > + * If we are building for big.Little with branch predictor hardening,
> > > > + * we need the processor function tables to remain available after boot.
> > > > + */
> > > > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > > > +       .rodata
> > > > +#endif
> > > >         .type   \name\()_processor_functions, #object
> > > >         .align 2
> > > >  ENTRY(\name\()_processor_functions)
> > > > @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
> > > >         .endif
> > > >
> > > >         .size   \name\()_processor_functions, . - \name\()_processor_functions
> > > > +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> > > > +       .previous
> > > > +#endif
> > > >  .endm
> > > >
> > > >  .macro define_cache_functions name:req

Russell,

I noticed that the patch merged by Linus has

#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)

Was that what you intended?
Russell King (Oracle) Dec. 7, 2018, 10:37 a.m. UTC | #15
On Fri, Dec 07, 2018 at 10:11:42AM +0100, Ard Biesheuvel wrote:
> Russell,
> 
> I noticed that the patch merged by Linus has
> 
> #if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
> 
> Was that what you intended?

Of course not - but it'll be harmless.  That's what happens when you
end up hacking the patch to test it, have a cold and get distracted
with OMAP4 bug.  Since it's harmless, I won't be intending to fix it
with any urgency, but thanks for pointing it out.
diff mbox series

Patch

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index c259cc49c641..fe58fb3c4b4e 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -104,12 +104,32 @@  extern void cpu_do_resume(void *);
 #else
 
 extern struct processor processor;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+#include <linux/smp.h>
+/*
+ * This can't be a per-cpu variable because we need to access it before
+ * per-cpu has been initialised.
+ */
+extern struct processor *cpu_vtable[];
+#define PROC_VTABLE(f)			cpu_vtable[smp_processor_id()]->f
+#define PROC_TABLE(f)			cpu_vtable[0]->f
+static inline void init_proc_vtable(const struct processor *p)
+{
+	unsigned int cpu = smp_processor_id();
+	*cpu_vtable[cpu] = *p;
+	WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area !=
+		     cpu_vtable[0]->dcache_clean_area);
+	WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext !=
+		     cpu_vtable[0]->set_pte_ext);
+}
+#else
 #define PROC_VTABLE(f)			processor.f
 #define PROC_TABLE(f)			processor.f
 static inline void init_proc_vtable(const struct processor *p)
 {
 	processor = *p;
 }
+#endif
 
 #define cpu_proc_init			PROC_VTABLE(_proc_init)
 #define cpu_check_bugs			PROC_VTABLE(check_bugs)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c214bd14a1fe..cd46a595422c 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -115,6 +115,11 @@  EXPORT_SYMBOL(elf_hwcap2);
 
 #ifdef MULTI_CPU
 struct processor processor __ro_after_init;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+struct processor *cpu_vtable[NR_CPUS] = {
+	[0] = &processor,
+};
+#endif
 #endif
 #ifdef MULTI_TLB
 struct cpu_tlb_fns cpu_tlb __ro_after_init;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5ad0b67b9e33..82b879db32ee 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -42,6 +42,7 @@ 
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/procinfo.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -102,6 +103,30 @@  static unsigned long get_arch_pgd(pgd_t *pgd)
 #endif
 }
 
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+	if (!cpu_vtable[cpu])
+		cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL);
+
+	return cpu_vtable[cpu] ? 0 : -ENOMEM;
+}
+
+static void secondary_biglittle_init(void)
+{
+	init_proc_vtable(lookup_processor(read_cpuid_id())->proc);
+}
+#else
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+	return 0;
+}
+
+static void secondary_biglittle_init(void)
+{
+}
+#endif
+
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -109,6 +134,10 @@  int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (!smp_ops.smp_boot_secondary)
 		return -ENOSYS;
 
+	ret = secondary_biglittle_prepare(cpu);
+	if (ret)
+		return ret;
+
 	/*
 	 * We need to tell the secondary core where to find
 	 * its stack and the page tables.
@@ -360,6 +389,8 @@  asmlinkage void secondary_start_kernel(void)
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu;
 
+	secondary_biglittle_init();
+
 	/*
 	 * The identity mapping is uncached (strongly ordered), so
 	 * switch away from it before attempting any exclusive accesses.
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 5544b82a2e7a..9a07916af8dd 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -52,8 +52,6 @@  static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A17:
 	case ARM_CPU_PART_CORTEX_A73:
 	case ARM_CPU_PART_CORTEX_A75:
-		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
-			goto bl_error;
 		per_cpu(harden_branch_predictor_fn, cpu) =
 			harden_branch_predictor_bpiall;
 		spectre_v2_method = "BPIALL";
@@ -61,8 +59,6 @@  static void cpu_v7_spectre_init(void)
 
 	case ARM_CPU_PART_CORTEX_A15:
 	case ARM_CPU_PART_BRAHMA_B15:
-		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
-			goto bl_error;
 		per_cpu(harden_branch_predictor_fn, cpu) =
 			harden_branch_predictor_iciallu;
 		spectre_v2_method = "ICIALLU";
@@ -88,11 +84,9 @@  static void cpu_v7_spectre_init(void)
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
 				break;
-			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
-				goto bl_error;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_hvc_arch_workaround_1;
-			processor.switch_mm = cpu_v7_hvc_switch_mm;
+			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
 			spectre_v2_method = "hypervisor";
 			break;
 
@@ -101,11 +95,9 @@  static void cpu_v7_spectre_init(void)
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
 				break;
-			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
-				goto bl_error;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_smc_arch_workaround_1;
-			processor.switch_mm = cpu_v7_smc_switch_mm;
+			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
 			spectre_v2_method = "firmware";
 			break;
 
@@ -119,11 +111,6 @@  static void cpu_v7_spectre_init(void)
 	if (spectre_v2_method)
 		pr_info("CPU%u: Spectre v2: using %s workaround\n",
 			smp_processor_id(), spectre_v2_method);
-	return;
-
-bl_error:
-	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
-		cpu);
 }
 #else
 static void cpu_v7_spectre_init(void)