diff mbox series

[v2,1/8] RISC-V: Do not wait indefinitely in __cpu_up

Message ID 1546940318-9752-2-git-send-email-atish.patra@wdc.com (mailing list archive)
State New, archived
Headers show
Series Various SMP related fixes | expand

Commit Message

Atish Patra Jan. 8, 2019, 9:38 a.m. UTC
In SMP path, __cpu_up waits for other CPU to come online
indefinitely. This is wrong as other CPU might be disabled
in machine mode and possible CPU is set to the cpus present
in DT.

Introduce a completion variable and waits only for a second.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
---
 arch/riscv/kernel/smpboot.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Jan. 15, 2019, 1:51 p.m. UTC | #1
>  
>  void *__cpu_up_stack_pointer[NR_CPUS];
>  void *__cpu_up_task_pointer[NR_CPUS];
> +static DECLARE_COMPLETION(cpu_running);
>  
>  void __init smp_prepare_boot_cpu(void)
>  {
> @@ -81,6 +82,7 @@ void __init setup_smp(void)
>  
>  int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>  {
> +	int ret = 0;
>  	int hartid = cpuid_to_hartid_map(cpu);
>  	tidle->thread_info.cpu = cpu;
>  
> @@ -96,10 +98,15 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>  		  task_stack_page(tidle) + THREAD_SIZE);
>  	WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
>  
> -	while (!cpu_online(cpu))
> -		cpu_relax();
> +	wait_for_completion_timeout(&cpu_running,
> +					    msecs_to_jiffies(1000));

Having a global completion here worries me.  I bet we have some higher
level serialization, but can we comment or even better lockdep assert on
that?

Also please use up your available lines (72 in commit logs, 80 in source
files) instead of adding spurious line wraps.
Atish Patra Jan. 18, 2019, 2:35 a.m. UTC | #2
On 1/15/19 5:51 AM, Christoph Hellwig wrote:
>>   
>>   void *__cpu_up_stack_pointer[NR_CPUS];
>>   void *__cpu_up_task_pointer[NR_CPUS];
>> +static DECLARE_COMPLETION(cpu_running);
>>   
>>   void __init smp_prepare_boot_cpu(void)
>>   {
>> @@ -81,6 +82,7 @@ void __init setup_smp(void)
>>   
>>   int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>>   {
>> +	int ret = 0;
>>   	int hartid = cpuid_to_hartid_map(cpu);
>>   	tidle->thread_info.cpu = cpu;
>>   
>> @@ -96,10 +98,15 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
>>   		  task_stack_page(tidle) + THREAD_SIZE);
>>   	WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
>>   
>> -	while (!cpu_online(cpu))
>> -		cpu_relax();
>> +	wait_for_completion_timeout(&cpu_running,
>> +					    msecs_to_jiffies(1000));
> 
> Having a global completion here worries me.  I bet we have some higher
> level serialization, but can we comment or even better lockdep assert on
> that?
> 

Yes. It is serialized from smp.c in smp_init(). It brings one cpu online
at a time for preset_cpu mask.

Do we still need a lockdep assert ?

Regards,
Atish
> Also please use up your available lines (72 in commit logs, 80 in source
> files) instead of adding spurious line wraps.
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Christoph Hellwig Jan. 18, 2019, 7:20 a.m. UTC | #3
On Thu, Jan 17, 2019 at 06:35:39PM -0800, Atish Patra wrote:
> On 1/15/19 5:51 AM, Christoph Hellwig wrote:
> > >   void *__cpu_up_stack_pointer[NR_CPUS];
> > >   void *__cpu_up_task_pointer[NR_CPUS];
> > > +static DECLARE_COMPLETION(cpu_running);
> > >   void __init smp_prepare_boot_cpu(void)
> > >   {
> > > @@ -81,6 +82,7 @@ void __init setup_smp(void)
> > >   int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > >   {
> > > +	int ret = 0;
> > >   	int hartid = cpuid_to_hartid_map(cpu);
> > >   	tidle->thread_info.cpu = cpu;
> > > @@ -96,10 +98,15 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> > >   		  task_stack_page(tidle) + THREAD_SIZE);
> > >   	WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
> > > -	while (!cpu_online(cpu))
> > > -		cpu_relax();
> > > +	wait_for_completion_timeout(&cpu_running,
> > > +					    msecs_to_jiffies(1000));
> > 
> > Having a global completion here worries me.  I bet we have some higher
> > level serialization, but can we comment or even better lockdep assert on
> > that?
> > 
> 
> Yes. It is serialized from smp.c in smp_init(). It brings one cpu online
> at a time for preset_cpu mask.
> 
> Do we still need a lockdep assert ?

I guess the real lock is through cpu_hotplug_lock.  And yes, a comment
or even better lockdep assert would be good.
diff mbox series

Patch

diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index fc185eca..32e14572 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -39,6 +39,7 @@ 
 
 void *__cpu_up_stack_pointer[NR_CPUS];
 void *__cpu_up_task_pointer[NR_CPUS];
+static DECLARE_COMPLETION(cpu_running);
 
 void __init smp_prepare_boot_cpu(void)
 {
@@ -81,6 +82,7 @@  void __init setup_smp(void)
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
+	int ret = 0;
 	int hartid = cpuid_to_hartid_map(cpu);
 	tidle->thread_info.cpu = cpu;
 
@@ -96,10 +98,15 @@  int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 		  task_stack_page(tidle) + THREAD_SIZE);
 	WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
 
-	while (!cpu_online(cpu))
-		cpu_relax();
+	wait_for_completion_timeout(&cpu_running,
+					    msecs_to_jiffies(1000));
 
-	return 0;
+	if (!cpu_online(cpu)) {
+		pr_crit("CPU%u: failed to come online\n", cpu);
+		ret = -EIO;
+	}
+
+	return ret;
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -125,6 +132,7 @@  asmlinkage void __init smp_callin(void)
 	 * a local TLB flush right now just in case.
 	 */
 	local_flush_tlb_all();
+	complete(&cpu_running);
 	/*
 	 * Disable preemption before enabling interrupts, so we don't try to
 	 * schedule a CPU that hasn't actually started yet.