diff mbox series

[kvm-unit-tests,v4,1/2] arm/psci: Test that CPU 1 has been successfully brought online

Message ID 20230118144912.32049-2-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series arm: Add PSCI CPU_OFF test | expand

Commit Message

Alexandru Elisei Jan. 18, 2023, 2:49 p.m. UTC
For the PSCI CPU_ON function test, all other CPUs perform a CPU_ON call
that target CPU 1. The test is considered a success if CPU_ON returns PSCI
SUCCESS exactly once, and for the rest of the calls PSCI ALREADY_ON.

Enhance the test by checking that CPU 1 is actually online and able to
execute code. Also make the test more robust by checking that the CPU_ON
call returns, instead of assuming that it will always succeed and
hanging indefinitely if it doesn't.

Since the CPU 1 thread is now being set up properly by kvm-unit-tests
when being brought online, it becomes possible to add other tests in the
future that require all CPUs.

The include header order in arm/psci.c has been changed to be in
alphabetic order. This means moving the errata.h include before
libcflat.h, which causes compilation to fail because of missing includes
in errata.h. Fix that also by including the needed header in errata.h.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
 arm/psci.c        | 60 ++++++++++++++++++++++++++++++++++++-----------
 lib/arm/asm/smp.h |  1 +
 lib/arm/smp.c     | 12 +++++++---
 lib/errata.h      |  2 ++
 4 files changed, 58 insertions(+), 17 deletions(-)

Comments

Andrew Jones Jan. 18, 2023, 6:35 p.m. UTC | #1
On Wed, Jan 18, 2023 at 02:49:11PM +0000, Alexandru Elisei wrote:
> For the PSCI CPU_ON function test, all other CPUs perform a CPU_ON call
> that target CPU 1. The test is considered a success if CPU_ON returns PSCI
> SUCCESS exactly once, and for the rest of the calls PSCI ALREADY_ON.
> 
> Enhance the test by checking that CPU 1 is actually online and able to
> execute code. Also make the test more robust by checking that the CPU_ON
> call returns, instead of assuming that it will always succeed and
> hanging indefinitely if it doesn't.
> 
> Since the CPU 1 thread is now being set up properly by kvm-unit-tests
> when being brought online, it becomes possible to add other tests in the
> future that require all CPUs.
> 
> The include header order in arm/psci.c has been changed to be in
> alphabetic order. This means moving the errata.h include before
> libcflat.h, which causes compilation to fail because of missing includes
> in errata.h. Fix that also by including the needed header in errata.h.
> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>  arm/psci.c        | 60 ++++++++++++++++++++++++++++++++++++-----------
>  lib/arm/asm/smp.h |  1 +
>  lib/arm/smp.c     | 12 +++++++---
>  lib/errata.h      |  2 ++
>  4 files changed, 58 insertions(+), 17 deletions(-)
> 
> diff --git a/arm/psci.c b/arm/psci.c
> index efa0722c0566..e96be941953b 100644
> --- a/arm/psci.c
> +++ b/arm/psci.c
> @@ -7,11 +7,13 @@
>   *
>   * This work is licensed under the terms of the GNU LGPL, version 2.
>   */
> -#include <libcflat.h>
>  #include <errata.h>
> +#include <libcflat.h>
> +
> +#include <asm/delay.h>
>  #include <asm/processor.h>
> -#include <asm/smp.h>
>  #include <asm/psci.h>
> +#include <asm/smp.h>
>  
>  static bool invalid_function_exception;
>  
> @@ -72,14 +74,23 @@ static int cpu_on_ret[NR_CPUS];
>  static cpumask_t cpu_on_ready, cpu_on_done;
>  static volatile int cpu_on_start;
>  
> -static void cpu_on_secondary_entry(void)
> +extern void secondary_entry(void);
> +static void cpu_on_do_wake_target(void)
>  {
>  	int cpu = smp_processor_id();
>  
>  	cpumask_set_cpu(cpu, &cpu_on_ready);
>  	while (!cpu_on_start)
>  		cpu_relax();
> -	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(halt));
> +	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> +	cpumask_set_cpu(cpu, &cpu_on_done);
> +}
> +
> +static void cpu_on_target(void)
> +{
> +	int cpu = smp_processor_id();
> +
> +	cpu_on_ret[cpu] = PSCI_RET_ALREADY_ON;

I'm not sure this is better than just skipping cpu1 in the check loop, as
is done now, but OK. 

>  	cpumask_set_cpu(cpu, &cpu_on_done);
>  }
>  
> @@ -87,33 +98,53 @@ static bool psci_cpu_on_test(void)
>  {
>  	bool failed = false;
>  	int ret_success = 0;
> -	int cpu;
> -
> -	cpumask_set_cpu(1, &cpu_on_ready);
> -	cpumask_set_cpu(1, &cpu_on_done);
> +	int i, cpu;
>  
>  	for_each_present_cpu(cpu) {
>  		if (cpu < 2)
>  			continue;
> -		smp_boot_secondary(cpu, cpu_on_secondary_entry);
> +		smp_boot_secondary(cpu, cpu_on_do_wake_target);
>  	}
>  
>  	cpumask_set_cpu(0, &cpu_on_ready);
> +	cpumask_set_cpu(1, &cpu_on_ready);
>  	while (!cpumask_full(&cpu_on_ready))
>  		cpu_relax();
>  
> +	/*
> +	 * Wait for all other CPUs to be online before configuring the thread
> +	 * for the target CPU, as all secondaries are set up using the same
> +	 * global variable.
> +	 */

This comment says "Wait", so I'd move the while loop under it.

> +	smp_prepare_secondary(1, cpu_on_target);

This new smp_prepare_secondary() function should be local to this unit
test, please see my justification below.

> +
>  	cpu_on_start = 1;
>  	smp_mb();
>  
> -	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(halt));
> +	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(secondary_entry));
>  	cpumask_set_cpu(0, &cpu_on_done);
>  
> -	while (!cpumask_full(&cpu_on_done))
> -		cpu_relax();
> +	report_info("waiting for CPU1 to come online...");
> +	for (i = 0; i < 10; i++) {
> +		mdelay(100);
> +		if (cpumask_full(&cpu_on_done))
> +			break;
> +	}
> +
> +	if (!cpumask_full(&cpu_on_done)) {
> +		for_each_present_cpu(cpu) {
> +			if (!cpumask_test_cpu(cpu, &cpu_on_done)) {
> +				if (cpu == 1)
> +					report_info("CPU1 failed to come online");
> +				else
> +					report_info("CPU%d failed to online CPU1", cpu);
> +			}
> +		}
> +		failed = true;
> +		goto out;

We could still run the other checks below, perhaps guarded with
cpumask_test_cpu(cpu, &cpu_on_done), rather than bail early. I'm
also OK with bailing early though. But, for that, I'd just return
false right here rather than introduce the goto.

> +	}
>  
>  	for_each_present_cpu(cpu) {
> -		if (cpu == 1)
> -			continue;
>  		if (cpu_on_ret[cpu] == PSCI_RET_SUCCESS) {
>  			ret_success++;
>  		} else if (cpu_on_ret[cpu] != PSCI_RET_ALREADY_ON) {
> @@ -127,6 +158,7 @@ static bool psci_cpu_on_test(void)
>  		failed = true;
>  	}
>  
> +out:
>  	return !failed;
>  }
>  
> diff --git a/lib/arm/asm/smp.h b/lib/arm/asm/smp.h
> index 077afde85520..ff2ef8f88247 100644
> --- a/lib/arm/asm/smp.h
> +++ b/lib/arm/asm/smp.h
> @@ -49,6 +49,7 @@ static inline void set_cpu_idle(int cpu, bool idle)
>  }
>  
>  typedef void (*secondary_entry_fn)(void);
> +extern void smp_prepare_secondary(int cpu, secondary_entry_fn entry);
>  extern void smp_boot_secondary(int cpu, secondary_entry_fn entry);
>  extern void on_cpu_async(int cpu, void (*func)(void *data), void *data);
>  extern void on_cpu(int cpu, void (*func)(void *data), void *data);
> diff --git a/lib/arm/smp.c b/lib/arm/smp.c
> index 98a5054e039b..947f417f4aea 100644
> --- a/lib/arm/smp.c
> +++ b/lib/arm/smp.c
> @@ -58,13 +58,19 @@ secondary_entry_fn secondary_cinit(void)
>  	return entry;
>  }
>  
> -static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> +void smp_prepare_secondary(int cpu, secondary_entry_fn entry)

I'd rather not create an unsafe library function, especially one named
without the leading underscores. It's OK for a unit test to duplicate
__smp_boot_secondary() (secondary_data is available), but then the unit
test also needs to ensure it does its own synchronization, as you do
with the wait on cpu_on_ready already.

>  {
> -	int ret;
> -
>  	secondary_data.stack = thread_stack_alloc();
>  	secondary_data.entry = entry;
>  	mmu_mark_disabled(cpu);
> +}
> +
> +static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> +{
> +	int ret;
> +
> +	smp_prepare_secondary(cpu, entry);
> +
>  	ret = cpu_psci_cpu_boot(cpu);
>  	assert(ret == 0);
>  
> diff --git a/lib/errata.h b/lib/errata.h
> index 5af0eb3bf8e2..de8205d8b370 100644
> --- a/lib/errata.h
> +++ b/lib/errata.h
> @@ -6,6 +6,8 @@
>   */
>  #ifndef _ERRATA_H_
>  #define _ERRATA_H_
> +#include <libcflat.h>
> +
>  #include "config.h"
>  
>  #ifndef CONFIG_ERRATA_FORCE
> -- 
> 2.25.1
>

Thanks,
drew
Alexandru Elisei Jan. 19, 2023, 10:32 a.m. UTC | #2
Hi Drew,

Thanks for the quick review!

On Wed, Jan 18, 2023 at 07:35:12PM +0100, Andrew Jones wrote:
> On Wed, Jan 18, 2023 at 02:49:11PM +0000, Alexandru Elisei wrote:
> > For the PSCI CPU_ON function test, all other CPUs perform a CPU_ON call
> > that target CPU 1. The test is considered a success if CPU_ON returns PSCI
> > SUCCESS exactly once, and for the rest of the calls PSCI ALREADY_ON.
> > 
> > Enhance the test by checking that CPU 1 is actually online and able to
> > execute code. Also make the test more robust by checking that the CPU_ON
> > call returns, instead of assuming that it will always succeed and
> > hanging indefinitely if it doesn't.
> > 
> > Since the CPU 1 thread is now being set up properly by kvm-unit-tests
> > when being brought online, it becomes possible to add other tests in the
> > future that require all CPUs.
> > 
> > The include header order in arm/psci.c has been changed to be in
> > alphabetic order. This means moving the errata.h include before
> > libcflat.h, which causes compilation to fail because of missing includes
> > in errata.h. Fix that also by including the needed header in errata.h.
> > 
> > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > ---
> >  arm/psci.c        | 60 ++++++++++++++++++++++++++++++++++++-----------
> >  lib/arm/asm/smp.h |  1 +
> >  lib/arm/smp.c     | 12 +++++++---
> >  lib/errata.h      |  2 ++
> >  4 files changed, 58 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arm/psci.c b/arm/psci.c
> > index efa0722c0566..e96be941953b 100644
> > --- a/arm/psci.c
> > +++ b/arm/psci.c
> > @@ -7,11 +7,13 @@
> >   *
> >   * This work is licensed under the terms of the GNU LGPL, version 2.
> >   */
> > -#include <libcflat.h>
> >  #include <errata.h>
> > +#include <libcflat.h>
> > +
> > +#include <asm/delay.h>
> >  #include <asm/processor.h>
> > -#include <asm/smp.h>
> >  #include <asm/psci.h>
> > +#include <asm/smp.h>
> >  
> >  static bool invalid_function_exception;
> >  
> > @@ -72,14 +74,23 @@ static int cpu_on_ret[NR_CPUS];
> >  static cpumask_t cpu_on_ready, cpu_on_done;
> >  static volatile int cpu_on_start;
> >  
> > -static void cpu_on_secondary_entry(void)
> > +extern void secondary_entry(void);
> > +static void cpu_on_do_wake_target(void)
> >  {
> >  	int cpu = smp_processor_id();
> >  
> >  	cpumask_set_cpu(cpu, &cpu_on_ready);
> >  	while (!cpu_on_start)
> >  		cpu_relax();
> > -	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(halt));
> > +	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> > +	cpumask_set_cpu(cpu, &cpu_on_done);
> > +}
> > +
> > +static void cpu_on_target(void)
> > +{
> > +	int cpu = smp_processor_id();
> > +
> > +	cpu_on_ret[cpu] = PSCI_RET_ALREADY_ON;
> 
> I'm not sure this is better than just skipping cpu1 in the check loop, as
> is done now, but OK. 

I think that's a good idea, I'll remove this and skip CPU 1 in the check
loop, because CPU 1 never invokes psci_cpu_on for itself.

> 
> >  	cpumask_set_cpu(cpu, &cpu_on_done);
> >  }
> >  
> > @@ -87,33 +98,53 @@ static bool psci_cpu_on_test(void)
> >  {
> >  	bool failed = false;
> >  	int ret_success = 0;
> > -	int cpu;
> > -
> > -	cpumask_set_cpu(1, &cpu_on_ready);
> > -	cpumask_set_cpu(1, &cpu_on_done);
> > +	int i, cpu;
> >  
> >  	for_each_present_cpu(cpu) {
> >  		if (cpu < 2)
> >  			continue;
> > -		smp_boot_secondary(cpu, cpu_on_secondary_entry);
> > +		smp_boot_secondary(cpu, cpu_on_do_wake_target);
> >  	}
> >  
> >  	cpumask_set_cpu(0, &cpu_on_ready);
> > +	cpumask_set_cpu(1, &cpu_on_ready);
> >  	while (!cpumask_full(&cpu_on_ready))
> >  		cpu_relax();
> >  
> > +	/*
> > +	 * Wait for all other CPUs to be online before configuring the thread
> > +	 * for the target CPU, as all secondaries are set up using the same
> > +	 * global variable.
> > +	 */
> 
> This comment says "Wait", so I'd move the while loop under it.

Can I reword it to keep it here?

	/*
	 * Configure CPU 1 after all the secondaries are online to avoid
	 * secondary_data being overwritten.
	 */

What do you think?

> 
> > +	smp_prepare_secondary(1, cpu_on_target);
> 
> This new smp_prepare_secondary() function should be local to this unit
> test, please see my justification below.

Reply below.

> 
> > +
> >  	cpu_on_start = 1;
> >  	smp_mb();
> >  
> > -	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(halt));
> > +	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> >  	cpumask_set_cpu(0, &cpu_on_done);
> >  
> > -	while (!cpumask_full(&cpu_on_done))
> > -		cpu_relax();
> > +	report_info("waiting for CPU1 to come online...");
> > +	for (i = 0; i < 10; i++) {
> > +		mdelay(100);
> > +		if (cpumask_full(&cpu_on_done))
> > +			break;
> > +	}
> > +
> > +	if (!cpumask_full(&cpu_on_done)) {
> > +		for_each_present_cpu(cpu) {
> > +			if (!cpumask_test_cpu(cpu, &cpu_on_done)) {
> > +				if (cpu == 1)
> > +					report_info("CPU1 failed to come online");
> > +				else
> > +					report_info("CPU%d failed to online CPU1", cpu);
> > +			}
> > +		}
> > +		failed = true;
> > +		goto out;
> 
> We could still run the other checks below, perhaps guarded with
> cpumask_test_cpu(cpu, &cpu_on_done), rather than bail early. I'm
> also OK with bailing early though. But, for that, I'd just return
> false right here rather than introduce the goto.

I like the idea of checking the return value for the CPU_ON calls that have
returned (I'll use for_each_cpu(cpu, &cpu_on _done) in the for loop below).

> 
> > +	}
> >  
> >  	for_each_present_cpu(cpu) {
> > -		if (cpu == 1)
> > -			continue;
> >  		if (cpu_on_ret[cpu] == PSCI_RET_SUCCESS) {
> >  			ret_success++;
> >  		} else if (cpu_on_ret[cpu] != PSCI_RET_ALREADY_ON) {
> > @@ -127,6 +158,7 @@ static bool psci_cpu_on_test(void)
> >  		failed = true;
> >  	}
> >  
> > +out:
> >  	return !failed;
> >  }
> >  
> > diff --git a/lib/arm/asm/smp.h b/lib/arm/asm/smp.h
> > index 077afde85520..ff2ef8f88247 100644
> > --- a/lib/arm/asm/smp.h
> > +++ b/lib/arm/asm/smp.h
> > @@ -49,6 +49,7 @@ static inline void set_cpu_idle(int cpu, bool idle)
> >  }
> >  
> >  typedef void (*secondary_entry_fn)(void);
> > +extern void smp_prepare_secondary(int cpu, secondary_entry_fn entry);
> >  extern void smp_boot_secondary(int cpu, secondary_entry_fn entry);
> >  extern void on_cpu_async(int cpu, void (*func)(void *data), void *data);
> >  extern void on_cpu(int cpu, void (*func)(void *data), void *data);
> > diff --git a/lib/arm/smp.c b/lib/arm/smp.c
> > index 98a5054e039b..947f417f4aea 100644
> > --- a/lib/arm/smp.c
> > +++ b/lib/arm/smp.c
> > @@ -58,13 +58,19 @@ secondary_entry_fn secondary_cinit(void)
> >  	return entry;
> >  }
> >  
> > -static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> > +void smp_prepare_secondary(int cpu, secondary_entry_fn entry)
> 
> I'd rather not create an unsafe library function, especially one named
> without the leading underscores. It's OK for a unit test to duplicate
> __smp_boot_secondary() (secondary_data is available), but then the unit
> test also needs to ensure it does its own synchronization, as you do
> with the wait on cpu_on_ready already.

I created the function to share the code between __smp_boot_secondary and
the PSCI test. Cache maintenance would have to be added here (one CPU
writes to secondary_data, another CPU reads from it, possible with the MMU
off), and I would prefer to keep it in one place, in the library code,
rather than expose the boot cache maintenance requirements to tests.

If I rename it to __smp_boot_secondary(), would you find that acceptable?

Thanks,
Alex

> 
> >  {
> > -	int ret;
> > -
> >  	secondary_data.stack = thread_stack_alloc();
> >  	secondary_data.entry = entry;
> >  	mmu_mark_disabled(cpu);
> > +}
> > +
> > +static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> > +{
> > +	int ret;
> > +
> > +	smp_prepare_secondary(cpu, entry);
> > +
> >  	ret = cpu_psci_cpu_boot(cpu);
> >  	assert(ret == 0);
> >  
> > diff --git a/lib/errata.h b/lib/errata.h
> > index 5af0eb3bf8e2..de8205d8b370 100644
> > --- a/lib/errata.h
> > +++ b/lib/errata.h
> > @@ -6,6 +6,8 @@
> >   */
> >  #ifndef _ERRATA_H_
> >  #define _ERRATA_H_
> > +#include <libcflat.h>
> > +
> >  #include "config.h"
> >  
> >  #ifndef CONFIG_ERRATA_FORCE
> > -- 
> > 2.25.1
> >
> 
> Thanks,
> drew
Andrew Jones Jan. 19, 2023, 11:01 a.m. UTC | #3
On Thu, Jan 19, 2023 at 10:32:07AM +0000, Alexandru Elisei wrote:
> Hi Drew,
> 
> Thanks for the quick review!
> 
> On Wed, Jan 18, 2023 at 07:35:12PM +0100, Andrew Jones wrote:
> > On Wed, Jan 18, 2023 at 02:49:11PM +0000, Alexandru Elisei wrote:
> > > For the PSCI CPU_ON function test, all other CPUs perform a CPU_ON call
> > > that target CPU 1. The test is considered a success if CPU_ON returns PSCI
> > > SUCCESS exactly once, and for the rest of the calls PSCI ALREADY_ON.
> > > 
> > > Enhance the test by checking that CPU 1 is actually online and able to
> > > execute code. Also make the test more robust by checking that the CPU_ON
> > > call returns, instead of assuming that it will always succeed and
> > > hanging indefinitely if it doesn't.
> > > 
> > > Since the CPU 1 thread is now being set up properly by kvm-unit-tests
> > > when being brought online, it becomes possible to add other tests in the
> > > future that require all CPUs.
> > > 
> > > The include header order in arm/psci.c has been changed to be in
> > > alphabetic order. This means moving the errata.h include before
> > > libcflat.h, which causes compilation to fail because of missing includes
> > > in errata.h. Fix that also by including the needed header in errata.h.
> > > 
> > > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > > ---
> > >  arm/psci.c        | 60 ++++++++++++++++++++++++++++++++++++-----------
> > >  lib/arm/asm/smp.h |  1 +
> > >  lib/arm/smp.c     | 12 +++++++---
> > >  lib/errata.h      |  2 ++
> > >  4 files changed, 58 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/arm/psci.c b/arm/psci.c
> > > index efa0722c0566..e96be941953b 100644
> > > --- a/arm/psci.c
> > > +++ b/arm/psci.c
> > > @@ -7,11 +7,13 @@
> > >   *
> > >   * This work is licensed under the terms of the GNU LGPL, version 2.
> > >   */
> > > -#include <libcflat.h>
> > >  #include <errata.h>
> > > +#include <libcflat.h>
> > > +
> > > +#include <asm/delay.h>
> > >  #include <asm/processor.h>
> > > -#include <asm/smp.h>
> > >  #include <asm/psci.h>
> > > +#include <asm/smp.h>
> > >  
> > >  static bool invalid_function_exception;
> > >  
> > > @@ -72,14 +74,23 @@ static int cpu_on_ret[NR_CPUS];
> > >  static cpumask_t cpu_on_ready, cpu_on_done;
> > >  static volatile int cpu_on_start;
> > >  
> > > -static void cpu_on_secondary_entry(void)
> > > +extern void secondary_entry(void);
> > > +static void cpu_on_do_wake_target(void)
> > >  {
> > >  	int cpu = smp_processor_id();
> > >  
> > >  	cpumask_set_cpu(cpu, &cpu_on_ready);
> > >  	while (!cpu_on_start)
> > >  		cpu_relax();
> > > -	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(halt));
> > > +	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> > > +	cpumask_set_cpu(cpu, &cpu_on_done);
> > > +}
> > > +
> > > +static void cpu_on_target(void)
> > > +{
> > > +	int cpu = smp_processor_id();
> > > +
> > > +	cpu_on_ret[cpu] = PSCI_RET_ALREADY_ON;
> > 
> > I'm not sure this is better than just skipping cpu1 in the check loop, as
> > is done now, but OK. 
> 
> I think that's a good idea, I'll remove this and skip CPU 1 in the check
> loop, because CPU 1 never invokes psci_cpu_on for itself.
> 
> > 
> > >  	cpumask_set_cpu(cpu, &cpu_on_done);
> > >  }
> > >  
> > > @@ -87,33 +98,53 @@ static bool psci_cpu_on_test(void)
> > >  {
> > >  	bool failed = false;
> > >  	int ret_success = 0;
> > > -	int cpu;
> > > -
> > > -	cpumask_set_cpu(1, &cpu_on_ready);
> > > -	cpumask_set_cpu(1, &cpu_on_done);
> > > +	int i, cpu;
> > >  
> > >  	for_each_present_cpu(cpu) {
> > >  		if (cpu < 2)
> > >  			continue;
> > > -		smp_boot_secondary(cpu, cpu_on_secondary_entry);
> > > +		smp_boot_secondary(cpu, cpu_on_do_wake_target);
> > >  	}
> > >  
> > >  	cpumask_set_cpu(0, &cpu_on_ready);
> > > +	cpumask_set_cpu(1, &cpu_on_ready);
> > >  	while (!cpumask_full(&cpu_on_ready))
> > >  		cpu_relax();
> > >  
> > > +	/*
> > > +	 * Wait for all other CPUs to be online before configuring the thread
> > > +	 * for the target CPU, as all secondaries are set up using the same
> > > +	 * global variable.
> > > +	 */
> > 
> > This comment says "Wait", so I'd move the while loop under it.
> 
> Can I reword it to keep it here?
> 
> 	/*
> 	 * Configure CPU 1 after all the secondaries are online to avoid
> 	 * secondary_data being overwritten.
> 	 */
> 
> What do you think?

Works for me.

> 
> > 
> > > +	smp_prepare_secondary(1, cpu_on_target);
> > 
> > This new smp_prepare_secondary() function should be local to this unit
> > test, please see my justification below.
> 
> Reply below.
> 
> > 
> > > +
> > >  	cpu_on_start = 1;
> > >  	smp_mb();
> > >  
> > > -	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(halt));
> > > +	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> > >  	cpumask_set_cpu(0, &cpu_on_done);
> > >  
> > > -	while (!cpumask_full(&cpu_on_done))
> > > -		cpu_relax();
> > > +	report_info("waiting for CPU1 to come online...");
> > > +	for (i = 0; i < 10; i++) {
> > > +		mdelay(100);
> > > +		if (cpumask_full(&cpu_on_done))
> > > +			break;
> > > +	}
> > > +
> > > +	if (!cpumask_full(&cpu_on_done)) {
> > > +		for_each_present_cpu(cpu) {
> > > +			if (!cpumask_test_cpu(cpu, &cpu_on_done)) {
> > > +				if (cpu == 1)
> > > +					report_info("CPU1 failed to come online");
> > > +				else
> > > +					report_info("CPU%d failed to online CPU1", cpu);
> > > +			}
> > > +		}
> > > +		failed = true;
> > > +		goto out;
> > 
> > We could still run the other checks below, perhaps guarded with
> > cpumask_test_cpu(cpu, &cpu_on_done), rather than bail early. I'm
> > also OK with bailing early though. But, for that, I'd just return
> > false right here rather than introduce the goto.
> 
> I like the idea of checking the return value for the CPU_ON calls that have
> returned (I'll use for_each_cpu(cpu, &cpu_on _done) in the for loop below).
> 
> > 
> > > +	}
> > >  
> > >  	for_each_present_cpu(cpu) {
> > > -		if (cpu == 1)
> > > -			continue;
> > >  		if (cpu_on_ret[cpu] == PSCI_RET_SUCCESS) {
> > >  			ret_success++;
> > >  		} else if (cpu_on_ret[cpu] != PSCI_RET_ALREADY_ON) {
> > > @@ -127,6 +158,7 @@ static bool psci_cpu_on_test(void)
> > >  		failed = true;
> > >  	}
> > >  
> > > +out:
> > >  	return !failed;
> > >  }
> > >  
> > > diff --git a/lib/arm/asm/smp.h b/lib/arm/asm/smp.h
> > > index 077afde85520..ff2ef8f88247 100644
> > > --- a/lib/arm/asm/smp.h
> > > +++ b/lib/arm/asm/smp.h
> > > @@ -49,6 +49,7 @@ static inline void set_cpu_idle(int cpu, bool idle)
> > >  }
> > >  
> > >  typedef void (*secondary_entry_fn)(void);
> > > +extern void smp_prepare_secondary(int cpu, secondary_entry_fn entry);
> > >  extern void smp_boot_secondary(int cpu, secondary_entry_fn entry);
> > >  extern void on_cpu_async(int cpu, void (*func)(void *data), void *data);
> > >  extern void on_cpu(int cpu, void (*func)(void *data), void *data);
> > > diff --git a/lib/arm/smp.c b/lib/arm/smp.c
> > > index 98a5054e039b..947f417f4aea 100644
> > > --- a/lib/arm/smp.c
> > > +++ b/lib/arm/smp.c
> > > @@ -58,13 +58,19 @@ secondary_entry_fn secondary_cinit(void)
> > >  	return entry;
> > >  }
> > >  
> > > -static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> > > +void smp_prepare_secondary(int cpu, secondary_entry_fn entry)
> > 
> > I'd rather not create an unsafe library function, especially one named
> > without the leading underscores. It's OK for a unit test to duplicate
> > __smp_boot_secondary() (secondary_data is available), but then the unit
> > test also needs to ensure it does its own synchronization, as you do
> > with the wait on cpu_on_ready already.
> 
> I created the function to share the code between __smp_boot_secondary and
> the PSCI test. Cache maintenance would have to be added here (one CPU
> writes to secondary_data, another CPU reads from it, possible with the MMU
> off), and I would prefer to keep it in one place, in the library code,
> rather than expose the boot cache maintenance requirements to tests.
> 
> If I rename it to __smp_boot_secondary(), would you find that acceptable?

I guess you mean __smp_prepare_secondary(). I suppose, but at this time
we're only avoiding duplicating 3 lines, so it looks like premature
refactoring. We can do it now though, since I hope to see the cache
maint patches soon. Please add a comment above the new __ function warning
users about its risks.

Another option is to make an smp_prepare_secondary() that is safe to
use without synchronization. We'd just need to make secondary_data per
cpu.

Thanks,
drew


> 
> Thanks,
> Alex
> 
> > 
> > >  {
> > > -	int ret;
> > > -
> > >  	secondary_data.stack = thread_stack_alloc();
> > >  	secondary_data.entry = entry;
> > >  	mmu_mark_disabled(cpu);
> > > +}
> > > +
> > > +static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> > > +{
> > > +	int ret;
> > > +
> > > +	smp_prepare_secondary(cpu, entry);
> > > +
> > >  	ret = cpu_psci_cpu_boot(cpu);
> > >  	assert(ret == 0);
> > >  
> > > diff --git a/lib/errata.h b/lib/errata.h
> > > index 5af0eb3bf8e2..de8205d8b370 100644
> > > --- a/lib/errata.h
> > > +++ b/lib/errata.h
> > > @@ -6,6 +6,8 @@
> > >   */
> > >  #ifndef _ERRATA_H_
> > >  #define _ERRATA_H_
> > > +#include <libcflat.h>
> > > +
> > >  #include "config.h"
> > >  
> > >  #ifndef CONFIG_ERRATA_FORCE
> > > -- 
> > > 2.25.1
> > >
> > 
> > Thanks,
> > drew
Andrew Jones Jan. 19, 2023, 11:46 a.m. UTC | #4
On Wed, Jan 18, 2023 at 02:49:11PM +0000, Alexandru Elisei wrote:
> For the PSCI CPU_ON function test, all other CPUs perform a CPU_ON call
> that target CPU 1. The test is considered a success if CPU_ON returns PSCI
> SUCCESS exactly once, and for the rest of the calls PSCI ALREADY_ON.
> 
> Enhance the test by checking that CPU 1 is actually online and able to
> execute code. Also make the test more robust by checking that the CPU_ON
> call returns, instead of assuming that it will always succeed and
> hanging indefinitely if it doesn't.
> 
> Since the CPU 1 thread is now being set up properly by kvm-unit-tests
> when being brought online, it becomes possible to add other tests in the
> future that require all CPUs.
> 
> The include header order in arm/psci.c has been changed to be in
> alphabetic order. This means moving the errata.h include before
> libcflat.h, which causes compilation to fail because of missing includes
> in errata.h. Fix that also by including the needed header in errata.h.
> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
>  arm/psci.c        | 60 ++++++++++++++++++++++++++++++++++++-----------
>  lib/arm/asm/smp.h |  1 +
>  lib/arm/smp.c     | 12 +++++++---
>  lib/errata.h      |  2 ++
>  4 files changed, 58 insertions(+), 17 deletions(-)
> 
> diff --git a/arm/psci.c b/arm/psci.c
> index efa0722c0566..e96be941953b 100644
> --- a/arm/psci.c
> +++ b/arm/psci.c
> @@ -7,11 +7,13 @@
>   *
>   * This work is licensed under the terms of the GNU LGPL, version 2.
>   */
> -#include <libcflat.h>
>  #include <errata.h>
> +#include <libcflat.h>
> +
> +#include <asm/delay.h>
>  #include <asm/processor.h>
> -#include <asm/smp.h>
>  #include <asm/psci.h>
> +#include <asm/smp.h>
>  
>  static bool invalid_function_exception;
>  
> @@ -72,14 +74,23 @@ static int cpu_on_ret[NR_CPUS];
>  static cpumask_t cpu_on_ready, cpu_on_done;
>  static volatile int cpu_on_start;
>  
> -static void cpu_on_secondary_entry(void)
> +extern void secondary_entry(void);
> +static void cpu_on_do_wake_target(void)
>  {
>  	int cpu = smp_processor_id();
>  
>  	cpumask_set_cpu(cpu, &cpu_on_ready);
>  	while (!cpu_on_start)
>  		cpu_relax();
> -	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(halt));
> +	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(secondary_entry));
> +	cpumask_set_cpu(cpu, &cpu_on_done);
> +}
> +
> +static void cpu_on_target(void)
> +{
> +	int cpu = smp_processor_id();
> +
> +	cpu_on_ret[cpu] = PSCI_RET_ALREADY_ON;
>  	cpumask_set_cpu(cpu, &cpu_on_done);
>  }
>  
> @@ -87,33 +98,53 @@ static bool psci_cpu_on_test(void)
>  {
>  	bool failed = false;
>  	int ret_success = 0;
> -	int cpu;
> -
> -	cpumask_set_cpu(1, &cpu_on_ready);
> -	cpumask_set_cpu(1, &cpu_on_done);
> +	int i, cpu;
>  
>  	for_each_present_cpu(cpu) {
>  		if (cpu < 2)
>  			continue;
> -		smp_boot_secondary(cpu, cpu_on_secondary_entry);
> +		smp_boot_secondary(cpu, cpu_on_do_wake_target);
>  	}
>  
>  	cpumask_set_cpu(0, &cpu_on_ready);
> +	cpumask_set_cpu(1, &cpu_on_ready);
>  	while (!cpumask_full(&cpu_on_ready))
>  		cpu_relax();
>  
> +	/*
> +	 * Wait for all other CPUs to be online before configuring the thread
> +	 * for the target CPU, as all secondaries are set up using the same
> +	 * global variable.
> +	 */
> +	smp_prepare_secondary(1, cpu_on_target);
> +
>  	cpu_on_start = 1;
>  	smp_mb();
>  
> -	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(halt));
> +	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(secondary_entry));
>  	cpumask_set_cpu(0, &cpu_on_done);
>  
> -	while (!cpumask_full(&cpu_on_done))
> -		cpu_relax();
> +	report_info("waiting for CPU1 to come online...");
> +	for (i = 0; i < 10; i++) {
> +		mdelay(100);

Changing this to

     for (i = 0; i < 100; i++) {
             mdelay(10);

may speed it up a bit.

Thanks,
drew

> +		if (cpumask_full(&cpu_on_done))
> +			break;
> +	}
> +
> +	if (!cpumask_full(&cpu_on_done)) {
> +		for_each_present_cpu(cpu) {
> +			if (!cpumask_test_cpu(cpu, &cpu_on_done)) {
> +				if (cpu == 1)
> +					report_info("CPU1 failed to come online");
> +				else
> +					report_info("CPU%d failed to online CPU1", cpu);
> +			}
> +		}
> +		failed = true;
> +		goto out;
> +	}
>  
>  	for_each_present_cpu(cpu) {
> -		if (cpu == 1)
> -			continue;
>  		if (cpu_on_ret[cpu] == PSCI_RET_SUCCESS) {
>  			ret_success++;
>  		} else if (cpu_on_ret[cpu] != PSCI_RET_ALREADY_ON) {
> @@ -127,6 +158,7 @@ static bool psci_cpu_on_test(void)
>  		failed = true;
>  	}
>  
> +out:
>  	return !failed;
>  }
>  
> diff --git a/lib/arm/asm/smp.h b/lib/arm/asm/smp.h
> index 077afde85520..ff2ef8f88247 100644
> --- a/lib/arm/asm/smp.h
> +++ b/lib/arm/asm/smp.h
> @@ -49,6 +49,7 @@ static inline void set_cpu_idle(int cpu, bool idle)
>  }
>  
>  typedef void (*secondary_entry_fn)(void);
> +extern void smp_prepare_secondary(int cpu, secondary_entry_fn entry);
>  extern void smp_boot_secondary(int cpu, secondary_entry_fn entry);
>  extern void on_cpu_async(int cpu, void (*func)(void *data), void *data);
>  extern void on_cpu(int cpu, void (*func)(void *data), void *data);
> diff --git a/lib/arm/smp.c b/lib/arm/smp.c
> index 98a5054e039b..947f417f4aea 100644
> --- a/lib/arm/smp.c
> +++ b/lib/arm/smp.c
> @@ -58,13 +58,19 @@ secondary_entry_fn secondary_cinit(void)
>  	return entry;
>  }
>  
> -static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> +void smp_prepare_secondary(int cpu, secondary_entry_fn entry)
>  {
> -	int ret;
> -
>  	secondary_data.stack = thread_stack_alloc();
>  	secondary_data.entry = entry;
>  	mmu_mark_disabled(cpu);
> +}
> +
> +static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
> +{
> +	int ret;
> +
> +	smp_prepare_secondary(cpu, entry);
> +
>  	ret = cpu_psci_cpu_boot(cpu);
>  	assert(ret == 0);
>  
> diff --git a/lib/errata.h b/lib/errata.h
> index 5af0eb3bf8e2..de8205d8b370 100644
> --- a/lib/errata.h
> +++ b/lib/errata.h
> @@ -6,6 +6,8 @@
>   */
>  #ifndef _ERRATA_H_
>  #define _ERRATA_H_
> +#include <libcflat.h>
> +
>  #include "config.h"
>  
>  #ifndef CONFIG_ERRATA_FORCE
> -- 
> 2.25.1
>
diff mbox series

Patch

diff --git a/arm/psci.c b/arm/psci.c
index efa0722c0566..e96be941953b 100644
--- a/arm/psci.c
+++ b/arm/psci.c
@@ -7,11 +7,13 @@ 
  *
  * This work is licensed under the terms of the GNU LGPL, version 2.
  */
-#include <libcflat.h>
 #include <errata.h>
+#include <libcflat.h>
+
+#include <asm/delay.h>
 #include <asm/processor.h>
-#include <asm/smp.h>
 #include <asm/psci.h>
+#include <asm/smp.h>
 
 static bool invalid_function_exception;
 
@@ -72,14 +74,23 @@  static int cpu_on_ret[NR_CPUS];
 static cpumask_t cpu_on_ready, cpu_on_done;
 static volatile int cpu_on_start;
 
-static void cpu_on_secondary_entry(void)
+extern void secondary_entry(void);
+static void cpu_on_do_wake_target(void)
 {
 	int cpu = smp_processor_id();
 
 	cpumask_set_cpu(cpu, &cpu_on_ready);
 	while (!cpu_on_start)
 		cpu_relax();
-	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(halt));
+	cpu_on_ret[cpu] = psci_cpu_on(cpus[1], __pa(secondary_entry));
+	cpumask_set_cpu(cpu, &cpu_on_done);
+}
+
+static void cpu_on_target(void)
+{
+	int cpu = smp_processor_id();
+
+	cpu_on_ret[cpu] = PSCI_RET_ALREADY_ON;
 	cpumask_set_cpu(cpu, &cpu_on_done);
 }
 
@@ -87,33 +98,53 @@  static bool psci_cpu_on_test(void)
 {
 	bool failed = false;
 	int ret_success = 0;
-	int cpu;
-
-	cpumask_set_cpu(1, &cpu_on_ready);
-	cpumask_set_cpu(1, &cpu_on_done);
+	int i, cpu;
 
 	for_each_present_cpu(cpu) {
 		if (cpu < 2)
 			continue;
-		smp_boot_secondary(cpu, cpu_on_secondary_entry);
+		smp_boot_secondary(cpu, cpu_on_do_wake_target);
 	}
 
 	cpumask_set_cpu(0, &cpu_on_ready);
+	cpumask_set_cpu(1, &cpu_on_ready);
 	while (!cpumask_full(&cpu_on_ready))
 		cpu_relax();
 
+	/*
+	 * Wait for all other CPUs to be online before configuring the thread
+	 * for the target CPU, as all secondaries are set up using the same
+	 * global variable.
+	 */
+	smp_prepare_secondary(1, cpu_on_target);
+
 	cpu_on_start = 1;
 	smp_mb();
 
-	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(halt));
+	cpu_on_ret[0] = psci_cpu_on(cpus[1], __pa(secondary_entry));
 	cpumask_set_cpu(0, &cpu_on_done);
 
-	while (!cpumask_full(&cpu_on_done))
-		cpu_relax();
+	report_info("waiting for CPU1 to come online...");
+	for (i = 0; i < 10; i++) {
+		mdelay(100);
+		if (cpumask_full(&cpu_on_done))
+			break;
+	}
+
+	if (!cpumask_full(&cpu_on_done)) {
+		for_each_present_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, &cpu_on_done)) {
+				if (cpu == 1)
+					report_info("CPU1 failed to come online");
+				else
+					report_info("CPU%d failed to online CPU1", cpu);
+			}
+		}
+		failed = true;
+		goto out;
+	}
 
 	for_each_present_cpu(cpu) {
-		if (cpu == 1)
-			continue;
 		if (cpu_on_ret[cpu] == PSCI_RET_SUCCESS) {
 			ret_success++;
 		} else if (cpu_on_ret[cpu] != PSCI_RET_ALREADY_ON) {
@@ -127,6 +158,7 @@  static bool psci_cpu_on_test(void)
 		failed = true;
 	}
 
+out:
 	return !failed;
 }
 
diff --git a/lib/arm/asm/smp.h b/lib/arm/asm/smp.h
index 077afde85520..ff2ef8f88247 100644
--- a/lib/arm/asm/smp.h
+++ b/lib/arm/asm/smp.h
@@ -49,6 +49,7 @@  static inline void set_cpu_idle(int cpu, bool idle)
 }
 
 typedef void (*secondary_entry_fn)(void);
+extern void smp_prepare_secondary(int cpu, secondary_entry_fn entry);
 extern void smp_boot_secondary(int cpu, secondary_entry_fn entry);
 extern void on_cpu_async(int cpu, void (*func)(void *data), void *data);
 extern void on_cpu(int cpu, void (*func)(void *data), void *data);
diff --git a/lib/arm/smp.c b/lib/arm/smp.c
index 98a5054e039b..947f417f4aea 100644
--- a/lib/arm/smp.c
+++ b/lib/arm/smp.c
@@ -58,13 +58,19 @@  secondary_entry_fn secondary_cinit(void)
 	return entry;
 }
 
-static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
+void smp_prepare_secondary(int cpu, secondary_entry_fn entry)
 {
-	int ret;
-
 	secondary_data.stack = thread_stack_alloc();
 	secondary_data.entry = entry;
 	mmu_mark_disabled(cpu);
+}
+
+static void __smp_boot_secondary(int cpu, secondary_entry_fn entry)
+{
+	int ret;
+
+	smp_prepare_secondary(cpu, entry);
+
 	ret = cpu_psci_cpu_boot(cpu);
 	assert(ret == 0);
 
diff --git a/lib/errata.h b/lib/errata.h
index 5af0eb3bf8e2..de8205d8b370 100644
--- a/lib/errata.h
+++ b/lib/errata.h
@@ -6,6 +6,8 @@ 
  */
 #ifndef _ERRATA_H_
 #define _ERRATA_H_
+#include <libcflat.h>
+
 #include "config.h"
 
 #ifndef CONFIG_ERRATA_FORCE