diff mbox series

[v4,2/9] riscv: smp: fail booting up smp if inconsistent vlen is detected

Message ID 20240412-zve-detection-v4-2-e0c45bb6b253@sifive.com (mailing list archive)
State New
Headers show
Series Support Zve32[xf] and Zve64[xfd] Vector subextensions | expand

Commit Message

Andy Chiu April 12, 2024, 6:48 a.m. UTC
Currently we only support Vector for SMP platforms, that is, all SMP
cores have the same vlenb. If we happen to detect a mismatching vlen, it
is better to just fail bootting it up to prevent further race/scheduling
issues.

Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
regular call in the early assembly. So a core would be parked right
after a return from smp_callin. Note that a successful smp_callin
does not return.

Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
Reported-by: Conor Dooley <conor.dooley@microchip.com>
Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
Changelog v4:
 - update comment also in the assembly code (Yunhui)
Changelog v2:
 - update commit message to explain asm code change (Conor)
---
 arch/riscv/kernel/head.S    | 19 ++++++++++++-------
 arch/riscv/kernel/smpboot.c | 14 +++++++++-----
 2 files changed, 21 insertions(+), 12 deletions(-)

Comments

Conor Dooley April 18, 2024, 10:17 a.m. UTC | #1
On Fri, Apr 12, 2024 at 02:48:58PM +0800, Andy Chiu wrote:
> Currently we only support Vector for SMP platforms, that is, all SMP
> cores have the same vlenb. If we happen to detect a mismatching vlen, it
> is better to just fail bootting it up to prevent further race/scheduling
> issues.
> 
> Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
> regular call in the early assembly. So a core would be parked right
> after a return from smp_callin. Note that a successful smp_callin
> does not return.
> 
> Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
> Reported-by: Conor Dooley <conor.dooley@microchip.com>
> Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>

Cheers,
Conor.
yunhui cui April 19, 2024, 6:09 a.m. UTC | #2
Hi Andy,

On Fri, Apr 12, 2024 at 2:50 PM Andy Chiu <andy.chiu@sifive.com> wrote:
>
> Currently we only support Vector for SMP platforms, that is, all SMP
> cores have the same vlenb. If we happen to detect a mismatching vlen, it
> is better to just fail bootting it up to prevent further race/scheduling
> issues.
>
> Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
> regular call in the early assembly. So a core would be parked right
> after a return from smp_callin. Note that a successful smp_callin
> does not return.
>
> Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
> Reported-by: Conor Dooley <conor.dooley@microchip.com>
> Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
> Changelog v4:
>  - update comment also in the assembly code (Yunhui)
> Changelog v2:
>  - update commit message to explain asm code change (Conor)
> ---
>  arch/riscv/kernel/head.S    | 19 ++++++++++++-------
>  arch/riscv/kernel/smpboot.c | 14 +++++++++-----
>  2 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 4236a69c35cb..a00f7523cb91 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -165,9 +165,20 @@ secondary_start_sbi:
>  #endif
>         call .Lsetup_trap_vector
>         scs_load_current
> -       tail smp_callin
> +       call smp_callin
>  #endif /* CONFIG_SMP */
>
> +.align 2
> +.Lsecondary_park:
> +       /*
> +        * Park this hart if we:
> +        *  - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
> +        *  - receive an early trap, before setup_trap_vector finished
> +        *  - fail in smp_callin(), as a successful one wouldn't return
> +        */
> +       wfi
> +       j .Lsecondary_park
> +
>  .align 2
>  .Lsetup_trap_vector:
>         /* Set trap vector to exception handler */
> @@ -181,12 +192,6 @@ secondary_start_sbi:
>         csrw CSR_SCRATCH, zero
>         ret
>
> -.align 2
> -.Lsecondary_park:
> -       /* We lack SMP support or have too many harts, so park this hart */
> -       wfi
> -       j .Lsecondary_park
> -
>  SYM_CODE_END(_start)
>
>  SYM_CODE_START(_start_kernel)
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index d41090fc3203..673437ccc13d 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -214,6 +214,15 @@ asmlinkage __visible void smp_callin(void)
>         struct mm_struct *mm = &init_mm;
>         unsigned int curr_cpuid = smp_processor_id();
>
> +       if (has_vector()) {
> +               /*
> +                * Return as early as possible so the hart with a mismatching
> +                * vlen won't boot.
> +                */
> +               if (riscv_v_setup_vsize())
> +                       return;
> +       }
> +
>         /* All kernel threads share the same mm context.  */
>         mmgrab(mm);
>         current->active_mm = mm;
> @@ -226,11 +235,6 @@ asmlinkage __visible void smp_callin(void)
>         numa_add_cpu(curr_cpuid);
>         set_cpu_online(curr_cpuid, 1);
>
> -       if (has_vector()) {
> -               if (riscv_v_setup_vsize())
> -                       elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
> -       }
> -
>         riscv_user_isa_enable();
>
>         /*
>
> --
> 2.44.0.rc2
>
>

Reviewed-by: Yunhui Cui <cuiyunhui@bytedance.com>


Thanks,
Yunhui
Alexandre Ghiti April 24, 2024, 8:01 p.m. UTC | #3
Hi Andy,

On 12/04/2024 08:48, Andy Chiu wrote:
> Currently we only support Vector for SMP platforms, that is, all SMP
> cores have the same vlenb. If we happen to detect a mismatching vlen, it
> is better to just fail bootting it up to prevent further race/scheduling
> issues.
>
> Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
> regular call in the early assembly. So a core would be parked right
> after a return from smp_callin. Note that a successful smp_callin
> does not return.
>
> Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
> Reported-by: Conor Dooley <conor.dooley@microchip.com>
> Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
> Changelog v4:
>   - update comment also in the assembly code (Yunhui)
> Changelog v2:
>   - update commit message to explain asm code change (Conor)
> ---
>   arch/riscv/kernel/head.S    | 19 ++++++++++++-------
>   arch/riscv/kernel/smpboot.c | 14 +++++++++-----
>   2 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 4236a69c35cb..a00f7523cb91 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -165,9 +165,20 @@ secondary_start_sbi:
>   #endif
>   	call .Lsetup_trap_vector
>   	scs_load_current
> -	tail smp_callin
> +	call smp_callin
>   #endif /* CONFIG_SMP */
>   
> +.align 2
> +.Lsecondary_park:
> +	/*
> +	 * Park this hart if we:
> +	 *  - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
> +	 *  - receive an early trap, before setup_trap_vector finished
> +	 *  - fail in smp_callin(), as a successful one wouldn't return
> +	 */
> +	wfi
> +	j .Lsecondary_park
> +
>   .align 2
>   .Lsetup_trap_vector:
>   	/* Set trap vector to exception handler */
> @@ -181,12 +192,6 @@ secondary_start_sbi:
>   	csrw CSR_SCRATCH, zero
>   	ret
>   
> -.align 2
> -.Lsecondary_park:
> -	/* We lack SMP support or have too many harts, so park this hart */
> -	wfi
> -	j .Lsecondary_park
> -
>   SYM_CODE_END(_start)
>   
>   SYM_CODE_START(_start_kernel)
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index d41090fc3203..673437ccc13d 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -214,6 +214,15 @@ asmlinkage __visible void smp_callin(void)
>   	struct mm_struct *mm = &init_mm;
>   	unsigned int curr_cpuid = smp_processor_id();
>   
> +	if (has_vector()) {
> +		/*
> +		 * Return as early as possible so the hart with a mismatching
> +		 * vlen won't boot.
> +		 */
> +		if (riscv_v_setup_vsize())
> +			return;
> +	}
> +
>   	/* All kernel threads share the same mm context.  */
>   	mmgrab(mm);
>   	current->active_mm = mm;
> @@ -226,11 +235,6 @@ asmlinkage __visible void smp_callin(void)
>   	numa_add_cpu(curr_cpuid);
>   	set_cpu_online(curr_cpuid, 1);
>   
> -	if (has_vector()) {
> -		if (riscv_v_setup_vsize())
> -			elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
> -	}
> -
>   	riscv_user_isa_enable();
>   
>   	/*
>

So this should go into -fixes, would you mind sending a single patch for 
this fix?

Your patch 8 is actually already fixed by Clement's patch 
https://lore.kernel.org/linux-riscv/20240409143839.558784-1-cleger@rivosinc.com/ 
and I already mentioned this one to Palmer.

Thanks,

Alex
Andy Chiu May 8, 2024, 8:21 a.m. UTC | #4
On Thu, Apr 25, 2024 at 4:01 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Andy,
>
> On 12/04/2024 08:48, Andy Chiu wrote:
> > Currently we only support Vector for SMP platforms, that is, all SMP
> > cores have the same vlenb. If we happen to detect a mismatching vlen, it
> > is better to just fail bootting it up to prevent further race/scheduling
> > issues.
> >
> > Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
> > regular call in the early assembly. So a core would be parked right
> > after a return from smp_callin. Note that a successful smp_callin
> > does not return.
> >
> > Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
> > Reported-by: Conor Dooley <conor.dooley@microchip.com>
> > Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
> > Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> > ---
> > Changelog v4:
> >   - update comment also in the assembly code (Yunhui)
> > Changelog v2:
> >   - update commit message to explain asm code change (Conor)
> > ---
> >   arch/riscv/kernel/head.S    | 19 ++++++++++++-------
> >   arch/riscv/kernel/smpboot.c | 14 +++++++++-----
> >   2 files changed, 21 insertions(+), 12 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> > index 4236a69c35cb..a00f7523cb91 100644
> > --- a/arch/riscv/kernel/head.S
> > +++ b/arch/riscv/kernel/head.S
> > @@ -165,9 +165,20 @@ secondary_start_sbi:
> >   #endif
> >       call .Lsetup_trap_vector
> >       scs_load_current
> > -     tail smp_callin
> > +     call smp_callin
> >   #endif /* CONFIG_SMP */
> >
> > +.align 2
> > +.Lsecondary_park:
> > +     /*
> > +      * Park this hart if we:
> > +      *  - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
> > +      *  - receive an early trap, before setup_trap_vector finished
> > +      *  - fail in smp_callin(), as a successful one wouldn't return
> > +      */
> > +     wfi
> > +     j .Lsecondary_park
> > +
> >   .align 2
> >   .Lsetup_trap_vector:
> >       /* Set trap vector to exception handler */
> > @@ -181,12 +192,6 @@ secondary_start_sbi:
> >       csrw CSR_SCRATCH, zero
> >       ret
> >
> > -.align 2
> > -.Lsecondary_park:
> > -     /* We lack SMP support or have too many harts, so park this hart */
> > -     wfi
> > -     j .Lsecondary_park
> > -
> >   SYM_CODE_END(_start)
> >
> >   SYM_CODE_START(_start_kernel)
> > diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> > index d41090fc3203..673437ccc13d 100644
> > --- a/arch/riscv/kernel/smpboot.c
> > +++ b/arch/riscv/kernel/smpboot.c
> > @@ -214,6 +214,15 @@ asmlinkage __visible void smp_callin(void)
> >       struct mm_struct *mm = &init_mm;
> >       unsigned int curr_cpuid = smp_processor_id();
> >
> > +     if (has_vector()) {
> > +             /*
> > +              * Return as early as possible so the hart with a mismatching
> > +              * vlen won't boot.
> > +              */
> > +             if (riscv_v_setup_vsize())
> > +                     return;
> > +     }
> > +
> >       /* All kernel threads share the same mm context.  */
> >       mmgrab(mm);
> >       current->active_mm = mm;
> > @@ -226,11 +235,6 @@ asmlinkage __visible void smp_callin(void)
> >       numa_add_cpu(curr_cpuid);
> >       set_cpu_online(curr_cpuid, 1);
> >
> > -     if (has_vector()) {
> > -             if (riscv_v_setup_vsize())
> > -                     elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
> > -     }
> > -
> >       riscv_user_isa_enable();
> >
> >       /*
> >
>
> So this should go into -fixes, would you mind sending a single patch for
> this fix?

I thought it would be magically picked up by a bot as long as we have
a fix tag. Am I assuming something wrong?

>
> Your patch 8 is actually already fixed by Clement's patch
> https://lore.kernel.org/linux-riscv/20240409143839.558784-1-cleger@rivosinc.com/

Okay, I will drop it at the next revision.

> and I already mentioned this one to Palmer.
>
> Thanks,
>
> Alex
>

Thanks,
Andy
Alexandre Ghiti May 8, 2024, 10:43 a.m. UTC | #5
Hi Andy,

On 08/05/2024 10:21, Andy Chiu wrote:
> On Thu, Apr 25, 2024 at 4:01 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>> Hi Andy,
>>
>> On 12/04/2024 08:48, Andy Chiu wrote:
>>> Currently we only support Vector for SMP platforms, that is, all SMP
>>> cores have the same vlenb. If we happen to detect a mismatching vlen, it
>>> is better to just fail bootting it up to prevent further race/scheduling
>>> issues.
>>>
>>> Also, move .Lsecondary_park forward and chage `tail smp_callin` into a
>>> regular call in the early assembly. So a core would be parked right
>>> after a return from smp_callin. Note that a successful smp_callin
>>> does not return.
>>>
>>> Fixes: 7017858eb2d7 ("riscv: Introduce riscv_v_vsize to record size of Vector context")
>>> Reported-by: Conor Dooley <conor.dooley@microchip.com>
>>> Closes: https://lore.kernel.org/linux-riscv/20240228-vicinity-cornstalk-4b8eb5fe5730@spud/
>>> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
>>> ---
>>> Changelog v4:
>>>    - update comment also in the assembly code (Yunhui)
>>> Changelog v2:
>>>    - update commit message to explain asm code change (Conor)
>>> ---
>>>    arch/riscv/kernel/head.S    | 19 ++++++++++++-------
>>>    arch/riscv/kernel/smpboot.c | 14 +++++++++-----
>>>    2 files changed, 21 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
>>> index 4236a69c35cb..a00f7523cb91 100644
>>> --- a/arch/riscv/kernel/head.S
>>> +++ b/arch/riscv/kernel/head.S
>>> @@ -165,9 +165,20 @@ secondary_start_sbi:
>>>    #endif
>>>        call .Lsetup_trap_vector
>>>        scs_load_current
>>> -     tail smp_callin
>>> +     call smp_callin
>>>    #endif /* CONFIG_SMP */
>>>
>>> +.align 2
>>> +.Lsecondary_park:
>>> +     /*
>>> +      * Park this hart if we:
>>> +      *  - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
>>> +      *  - receive an early trap, before setup_trap_vector finished
>>> +      *  - fail in smp_callin(), as a successful one wouldn't return
>>> +      */
>>> +     wfi
>>> +     j .Lsecondary_park
>>> +
>>>    .align 2
>>>    .Lsetup_trap_vector:
>>>        /* Set trap vector to exception handler */
>>> @@ -181,12 +192,6 @@ secondary_start_sbi:
>>>        csrw CSR_SCRATCH, zero
>>>        ret
>>>
>>> -.align 2
>>> -.Lsecondary_park:
>>> -     /* We lack SMP support or have too many harts, so park this hart */
>>> -     wfi
>>> -     j .Lsecondary_park
>>> -
>>>    SYM_CODE_END(_start)
>>>
>>>    SYM_CODE_START(_start_kernel)
>>> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
>>> index d41090fc3203..673437ccc13d 100644
>>> --- a/arch/riscv/kernel/smpboot.c
>>> +++ b/arch/riscv/kernel/smpboot.c
>>> @@ -214,6 +214,15 @@ asmlinkage __visible void smp_callin(void)
>>>        struct mm_struct *mm = &init_mm;
>>>        unsigned int curr_cpuid = smp_processor_id();
>>>
>>> +     if (has_vector()) {
>>> +             /*
>>> +              * Return as early as possible so the hart with a mismatching
>>> +              * vlen won't boot.
>>> +              */
>>> +             if (riscv_v_setup_vsize())
>>> +                     return;
>>> +     }
>>> +
>>>        /* All kernel threads share the same mm context.  */
>>>        mmgrab(mm);
>>>        current->active_mm = mm;
>>> @@ -226,11 +235,6 @@ asmlinkage __visible void smp_callin(void)
>>>        numa_add_cpu(curr_cpuid);
>>>        set_cpu_online(curr_cpuid, 1);
>>>
>>> -     if (has_vector()) {
>>> -             if (riscv_v_setup_vsize())
>>> -                     elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
>>> -     }
>>> -
>>>        riscv_user_isa_enable();
>>>
>>>        /*
>>>
>> So this should go into -fixes, would you mind sending a single patch for
>> this fix?
> I thought it would be magically picked up by a bot as long as we have
> a fix tag. Am I assuming something wrong?


It gets backported to stable when it is merged, but then it is missing 
from the first stable releases (as long as it is not merged).

But anyway, 6.9 fixes are all out, so let's hope this series makes it to 
6.10.

Thanks,

Alex


>
>> Your patch 8 is actually already fixed by Clement's patch
>> https://lore.kernel.org/linux-riscv/20240409143839.558784-1-cleger@rivosinc.com/
> Okay, I will drop it at the next revision.
>
>> and I already mentioned this one to Palmer.
>>
>> Thanks,
>>
>> Alex
>>
> Thanks,
> Andy
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
diff mbox series

Patch

diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 4236a69c35cb..a00f7523cb91 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -165,9 +165,20 @@  secondary_start_sbi:
 #endif
 	call .Lsetup_trap_vector
 	scs_load_current
-	tail smp_callin
+	call smp_callin
 #endif /* CONFIG_SMP */
 
+.align 2
+.Lsecondary_park:
+	/*
+	 * Park this hart if we:
+	 *  - have too many harts on CONFIG_RISCV_BOOT_SPINWAIT
+	 *  - receive an early trap, before setup_trap_vector finished
+	 *  - fail in smp_callin(), as a successful one wouldn't return
+	 */
+	wfi
+	j .Lsecondary_park
+
 .align 2
 .Lsetup_trap_vector:
 	/* Set trap vector to exception handler */
@@ -181,12 +192,6 @@  secondary_start_sbi:
 	csrw CSR_SCRATCH, zero
 	ret
 
-.align 2
-.Lsecondary_park:
-	/* We lack SMP support or have too many harts, so park this hart */
-	wfi
-	j .Lsecondary_park
-
 SYM_CODE_END(_start)
 
 SYM_CODE_START(_start_kernel)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index d41090fc3203..673437ccc13d 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -214,6 +214,15 @@  asmlinkage __visible void smp_callin(void)
 	struct mm_struct *mm = &init_mm;
 	unsigned int curr_cpuid = smp_processor_id();
 
+	if (has_vector()) {
+		/*
+		 * Return as early as possible so the hart with a mismatching
+		 * vlen won't boot.
+		 */
+		if (riscv_v_setup_vsize())
+			return;
+	}
+
 	/* All kernel threads share the same mm context.  */
 	mmgrab(mm);
 	current->active_mm = mm;
@@ -226,11 +235,6 @@  asmlinkage __visible void smp_callin(void)
 	numa_add_cpu(curr_cpuid);
 	set_cpu_online(curr_cpuid, 1);
 
-	if (has_vector()) {
-		if (riscv_v_setup_vsize())
-			elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
-	}
-
 	riscv_user_isa_enable();
 
 	/*