diff mbox series

riscv: fix race when vmap stack overflow

Message ID 20221019154727.2395-1-jszhang@kernel.org (mailing list archive)
State Superseded
Headers show
Series riscv: fix race when vmap stack overflow | expand

Commit Message

Jisheng Zhang Oct. 19, 2022, 3:47 p.m. UTC
Currently, when detecting vmap stack overflow, riscv firstly switches
to the so called shadow stack, then use this shadow stack to call the
get_overflow_stack() to get the overflow stack. However, there's
a race here if two or more harts use the same shadow stack at the same
time.

To solve this race, we introduce spin_shadow_stack atomic var, which
will make the shadow stack usage serialized.

Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Suggested-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/kernel/entry.S | 4 ++++
 arch/riscv/kernel/traps.c | 4 ++++
 2 files changed, 8 insertions(+)

Comments

Guo Ren Oct. 20, 2022, 1:02 a.m. UTC | #1
Reviewed-by: Guo Ren <guoren@kernel.org>

On Wed, Oct 19, 2022 at 11:57 PM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> Currently, when detecting vmap stack overflow, riscv firstly switches
> to the so called shadow stack, then use this shadow stack to call the
> get_overflow_stack() to get the overflow stack. However, there's
> a race here if two or more harts use the same shadow stack at the same
> time.
>
> To solve this race, we introduce spin_shadow_stack atomic var, which
> will make the shadow stack usage serialized.
>
> Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> Suggested-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/kernel/entry.S | 4 ++++
>  arch/riscv/kernel/traps.c | 4 ++++
>  2 files changed, 8 insertions(+)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index b9eda3fcbd6d..7b924b16792b 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
>
>  #ifdef CONFIG_VMAP_STACK
>  handle_kernel_stack_overflow:
> +1:     la sp, spin_shadow_stack
> +       amoswap.w sp, sp, (sp)
> +       bnez sp, 1b
> +
>         la sp, shadow_stack
>         addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index f3e96d60a2ff..88a54947dffb 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -221,11 +221,15 @@ asmlinkage unsigned long get_overflow_stack(void)
>                 OVERFLOW_STACK_SIZE;
>  }
>
> +atomic_t spin_shadow_stack;
> +
>  asmlinkage void handle_bad_stack(struct pt_regs *regs)
>  {
>         unsigned long tsk_stk = (unsigned long)current->stack;
>         unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
>
> +       atomic_set_release(&spin_shadow_stack, 0);
> +
>         console_verbose();
>
>         pr_emerg("Insufficient stack space to handle exception!\n");
> --
> 2.37.2
>
Guo Ren Oct. 20, 2022, 2:16 a.m. UTC | #2
On Wed, Oct 19, 2022 at 11:57 PM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> Currently, when detecting vmap stack overflow, riscv firstly switches
> to the so called shadow stack, then use this shadow stack to call the
> get_overflow_stack() to get the overflow stack. However, there's
> a race here if two or more harts use the same shadow stack at the same
> time.
>
> To solve this race, we introduce spin_shadow_stack atomic var, which
> will make the shadow stack usage serialized.
>
> Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> Suggested-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/kernel/entry.S | 4 ++++
>  arch/riscv/kernel/traps.c | 4 ++++
>  2 files changed, 8 insertions(+)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index b9eda3fcbd6d..7b924b16792b 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
>
>  #ifdef CONFIG_VMAP_STACK
>  handle_kernel_stack_overflow:
> +1:     la sp, spin_shadow_stack
> +       amoswap.w sp, sp, (sp)
If CONFIG_64BIT=y, it would be broken. Because we only hold 32bit of
the sp, and the next loop would get the wrong sp value of
&spin_shadow_stack.
Here is the correction.
-----
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 1b471ff73178..acf563072b8b 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -23,6 +23,7 @@
 #define REG_L          __REG_SEL(ld, lw)
 #define REG_S          __REG_SEL(sd, sw)
 #define REG_SC         __REG_SEL(sc.d, sc.w)
+#define REG_AMOSWAP    __REG_SEL(amoswap.d, amoswap.w)
 #define REG_ASM                __REG_SEL(.dword, .word)
 #define SZREG          __REG_SEL(8, 4)
 #define LGREG          __REG_SEL(3, 2)
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..ea6b78dac739 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -404,6 +404,10 @@ handle_syscall_trace_exit:

 #ifdef CONFIG_VMAP_STACK
 handle_kernel_stack_overflow:
+1:     la sp, spin_shadow_stack
+       /* Reuse the address as the spin value, so they must be all
XLEN's width. */
+       REG_AMOSWAP sp, sp, (sp)
+       bnez sp, 1b
+
        la sp, shadow_stack
        addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f3e96d60a2ff..9e6cc0d63833 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -221,11 +221,15 @@ asmlinkage unsigned long get_overflow_stack(void)
                OVERFLOW_STACK_SIZE;
 }

+unsigned long spin_shadow_stack = 0;
+
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
        unsigned long tsk_stk = (unsigned long)current->stack;
        unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);

+       smp_store_release(&spin_shadow_stack, 0);
+
        console_verbose();

        pr_emerg("Insufficient stack space to handle exception!\n");

> +       bnez sp, 1b
> +
>         la sp, shadow_stack
>         addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index f3e96d60a2ff..88a54947dffb 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -221,11 +221,15 @@ asmlinkage unsigned long get_overflow_stack(void)
>                 OVERFLOW_STACK_SIZE;
>  }
>
> +atomic_t spin_shadow_stack;
> +
>  asmlinkage void handle_bad_stack(struct pt_regs *regs)
>  {
>         unsigned long tsk_stk = (unsigned long)current->stack;
>         unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
>
> +       atomic_set_release(&spin_shadow_stack, 0);
> +
>         console_verbose();
>
>         pr_emerg("Insufficient stack space to handle exception!\n");
> --
> 2.37.2
>
--
Best Regards
 Guo Ren
Jisheng Zhang Oct. 20, 2022, 2:38 p.m. UTC | #3
On Thu, Oct 20, 2022 at 10:16:47AM +0800, Guo Ren wrote:
> On Wed, Oct 19, 2022 at 11:57 PM Jisheng Zhang <jszhang@kernel.org> wrote:
> >
> > Currently, when detecting vmap stack overflow, riscv firstly switches
> > to the so called shadow stack, then use this shadow stack to call the
> > get_overflow_stack() to get the overflow stack. However, there's
> > a race here if two or more harts use the same shadow stack at the same
> > time.
> >
> > To solve this race, we introduce spin_shadow_stack atomic var, which
> > will make the shadow stack usage serialized.
> >
> > Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > Suggested-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/kernel/entry.S | 4 ++++
> >  arch/riscv/kernel/traps.c | 4 ++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index b9eda3fcbd6d..7b924b16792b 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
> >
> >  #ifdef CONFIG_VMAP_STACK
> >  handle_kernel_stack_overflow:
> > +1:     la sp, spin_shadow_stack
> > +       amoswap.w sp, sp, (sp)
> If CONFIG_64BIT=y, it would be broken. Because we only hold 32bit of
> the sp, and the next loop would get the wrong sp value of
> &spin_shadow_stack.

Hi Guo,

Don't worry about it. the spin_shadow_stack is just a flag used for
"spin", if hart is allowed to used the shadow_stack, we load its
address in next instruction by "la sp, shadow_stack".
But I agree with use unsigned int instead of atomic_t, and use
smp_store_release directly. V2 has been sent out, could you please
review it?

Thanks
Andrea Parri Oct. 20, 2022, 11:26 p.m. UTC | #4
Hi Jisheng,

On Wed, Oct 19, 2022 at 11:47:27PM +0800, Jisheng Zhang wrote:
> Currently, when detecting vmap stack overflow, riscv firstly switches
> to the so called shadow stack, then use this shadow stack to call the
> get_overflow_stack() to get the overflow stack. However, there's
> a race here if two or more harts use the same shadow stack at the same
> time.
> 
> To solve this race, we introduce spin_shadow_stack atomic var, which
> will make the shadow stack usage serialized.
> 
> Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> Suggested-by: Guo Ren <guoren@kernel.org>
> ---
>  arch/riscv/kernel/entry.S | 4 ++++
>  arch/riscv/kernel/traps.c | 4 ++++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index b9eda3fcbd6d..7b924b16792b 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
>  
>  #ifdef CONFIG_VMAP_STACK
>  handle_kernel_stack_overflow:
> +1:	la sp, spin_shadow_stack
> +	amoswap.w sp, sp, (sp)
> +	bnez sp, 1b
> +
>  	la sp, shadow_stack
>  	addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
>  
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index f3e96d60a2ff..88a54947dffb 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -221,11 +221,15 @@ asmlinkage unsigned long get_overflow_stack(void)
>  		OVERFLOW_STACK_SIZE;
>  }
>  
> +atomic_t spin_shadow_stack;
> +
>  asmlinkage void handle_bad_stack(struct pt_regs *regs)
>  {
>  	unsigned long tsk_stk = (unsigned long)current->stack;
>  	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
>  
> +	atomic_set_release(&spin_shadow_stack, 0);
> +

Have not really looked the details: should there be a matching acquire?

  Andrea


>  	console_verbose();
>  
>  	pr_emerg("Insufficient stack space to handle exception!\n");
> -- 
> 2.37.2
>
Guo Ren Oct. 21, 2022, 12:35 a.m. UTC | #5
On Thu, Oct 20, 2022 at 10:47 PM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> On Thu, Oct 20, 2022 at 10:16:47AM +0800, Guo Ren wrote:
> > On Wed, Oct 19, 2022 at 11:57 PM Jisheng Zhang <jszhang@kernel.org> wrote:
> > >
> > > Currently, when detecting vmap stack overflow, riscv firstly switches
> > > to the so called shadow stack, then use this shadow stack to call the
> > > get_overflow_stack() to get the overflow stack. However, there's
> > > a race here if two or more harts use the same shadow stack at the same
> > > time.
> > >
> > > To solve this race, we introduce spin_shadow_stack atomic var, which
> > > will make the shadow stack usage serialized.
> > >
> > > Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > Suggested-by: Guo Ren <guoren@kernel.org>
> > > ---
> > >  arch/riscv/kernel/entry.S | 4 ++++
> > >  arch/riscv/kernel/traps.c | 4 ++++
> > >  2 files changed, 8 insertions(+)
> > >
> > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > > index b9eda3fcbd6d..7b924b16792b 100644
> > > --- a/arch/riscv/kernel/entry.S
> > > +++ b/arch/riscv/kernel/entry.S
> > > @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
> > >
> > >  #ifdef CONFIG_VMAP_STACK
> > >  handle_kernel_stack_overflow:
> > > +1:     la sp, spin_shadow_stack
> > > +       amoswap.w sp, sp, (sp)
> > If CONFIG_64BIT=y, it would be broken. Because we only hold 32bit of
> > the sp, and the next loop would get the wrong sp value of
> > &spin_shadow_stack.
>
> Hi Guo,
>
> Don't worry about it. the spin_shadow_stack is just a flag used for
> "spin", if hart is allowed to used the shadow_stack, we load its
> address in next instruction by "la sp, shadow_stack".
Haha, yes, my brain is at fault :)

> But I agree with use unsigned int instead of atomic_t, and use
> smp_store_release directly. V2 has been sent out, could you please
> review it?
Okay

>
> Thanks
Guo Ren Oct. 21, 2022, 1:11 a.m. UTC | #6
On Fri, Oct 21, 2022 at 7:26 AM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> Hi Jisheng,
>
> On Wed, Oct 19, 2022 at 11:47:27PM +0800, Jisheng Zhang wrote:
> > Currently, when detecting vmap stack overflow, riscv firstly switches
> > to the so called shadow stack, then use this shadow stack to call the
> > get_overflow_stack() to get the overflow stack. However, there's
> > a race here if two or more harts use the same shadow stack at the same
> > time.
> >
> > To solve this race, we introduce spin_shadow_stack atomic var, which
> > will make the shadow stack usage serialized.
> >
> > Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > Suggested-by: Guo Ren <guoren@kernel.org>
> > ---
> >  arch/riscv/kernel/entry.S | 4 ++++
> >  arch/riscv/kernel/traps.c | 4 ++++
> >  2 files changed, 8 insertions(+)
> >
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index b9eda3fcbd6d..7b924b16792b 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -404,6 +404,10 @@ handle_syscall_trace_exit:
> >
> >  #ifdef CONFIG_VMAP_STACK
> >  handle_kernel_stack_overflow:
> > +1:   la sp, spin_shadow_stack
> > +     amoswap.w sp, sp, (sp)
> > +     bnez sp, 1b
> > +
> >       la sp, shadow_stack
> >       addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
> >
> > diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> > index f3e96d60a2ff..88a54947dffb 100644
> > --- a/arch/riscv/kernel/traps.c
> > +++ b/arch/riscv/kernel/traps.c
> > @@ -221,11 +221,15 @@ asmlinkage unsigned long get_overflow_stack(void)
> >               OVERFLOW_STACK_SIZE;
> >  }
> >
> > +atomic_t spin_shadow_stack;
> > +
> >  asmlinkage void handle_bad_stack(struct pt_regs *regs)
> >  {
> >       unsigned long tsk_stk = (unsigned long)current->stack;
> >       unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
> >
> > +     atomic_set_release(&spin_shadow_stack, 0);
> > +
>
> Have not really looked the details: should there be a matching acquire?

I use atomic_set_release here, because I need earlier memory
operations finished to make sure the sp is ready then set the spin
flag.

The following memory operations order is not important, because we
just care about sp value.

Also, we use relax amoswap before, because sp has naturelly
dependency. But giving them RCsc is okay here, because we don't care
about performance here.
eg:
 handle_kernel_stack_overflow:
+1:     la sp, spin_shadow_stack
+       amoswap.w.aqrl sp, sp, (sp)
+       bnez sp, 1b
+
....
+     smp_store_release(&spin_shadow_stack, 0);
+     smp_mb();

>
>   Andrea
>
>
> >       console_verbose();
> >
> >       pr_emerg("Insufficient stack space to handle exception!\n");
> > --
> > 2.37.2
> >



--
Best Regards
 Guo Ren
Andrea Parri Oct. 21, 2022, 8:36 a.m. UTC | #7
> > > +     atomic_set_release(&spin_shadow_stack, 0);
> >
> > Have not really looked the details: should there be a matching acquire?
> 
> I use atomic_set_release here, because I need earlier memory
> operations finished to make sure the sp is ready then set the spin
> flag.
> 
> The following memory operations order is not important, because we
> just care about sp value.
> 
> Also, we use relax amoswap before, because sp has naturelly
> dependency. But giving them RCsc is okay here, because we don't care
> about performance here.

Thanks for the clarification.

I'm not really suggesting to add unneeded synchronization, even more
so in local/private constructs as in this case.  It just felt odd to
see the release without a pairing acquire, so I asked.  ;-)

Thanks,
  Andrea


> eg:
>  handle_kernel_stack_overflow:
> +1:     la sp, spin_shadow_stack
> +       amoswap.w.aqrl sp, sp, (sp)
> +       bnez sp, 1b
> +
> ....
> +     smp_store_release(&spin_shadow_stack, 0);
> +     smp_mb();
Tong Tiangen Oct. 21, 2022, 12:08 p.m. UTC | #8
在 2022/10/21 16:36, Andrea Parri 写道:
>>>> +     atomic_set_release(&spin_shadow_stack, 0);
>>>
>>> Have not really looked the details: should there be a matching acquire?
>>
>> I use atomic_set_release here, because I need earlier memory
>> operations finished to make sure the sp is ready then set the spin
>> flag.
>>
>> The following memory operations order is not important, because we
>> just care about sp value.
>>
>> Also, we use relax amoswap before, because sp has naturelly
>> dependency. But giving them RCsc is okay here, because we don't care
>> about performance here.
> 
> Thanks for the clarification.
> 
> I'm not really suggesting to add unneeded synchronization, even more
> so in local/private constructs as in this case.  It just felt odd to
> see the release without a pairing acquire, so I asked.  ;-)
> 
> Thanks,
>    Andrea
> 
> 
>> eg:
>>   handle_kernel_stack_overflow:
>> +1:     la sp, spin_shadow_stack
>> +       amoswap.w.aqrl sp, sp, (sp)
>> +       bnez sp, 1b
>> +
>> ....
>> +     smp_store_release(&spin_shadow_stack, 0);
>> +     smp_mb();
> 

Hi Andrea, Zhang, Guo:
	Consider this implementation:)

	smp_store_mb(&spin_shadow_stack, 0);

Thanks,
Tong.
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
> 
> .
Andrea Parri Oct. 21, 2022, 1:22 p.m. UTC | #9
Hi Tong,
 
> > > I use atomic_set_release here, because I need earlier memory
> > > operations finished to make sure the sp is ready then set the spin
> > > flag.

> 	Consider this implementation:)
> 
> 	smp_store_mb(&spin_shadow_stack, 0);

smp_store_mb() has "WRITE_ONCE(); smp_mb()" semantics; so it doesn't
guarantee that the store to spin_shadow_stack is ordered after program
-order earlier memory accesses.

  Andrea
Tong Tiangen Oct. 21, 2022, 1:46 p.m. UTC | #10
在 2022/10/21 21:22, Andrea Parri 写道:
> Hi Tong,
>   
>>>> I use atomic_set_release here, because I need earlier memory
>>>> operations finished to make sure the sp is ready then set the spin
>>>> flag.
> 
>> 	Consider this implementation:)
>>
>> 	smp_store_mb(&spin_shadow_stack, 0);
> 
> smp_store_mb() has "WRITE_ONCE(); smp_mb()" semantics; so it doesn't
> guarantee that the store to spin_shadow_stack is ordered after program
> -order earlier memory accesses.
> 
>    Andrea
> .

Hi Andrea:

IIUC, the earlier memory access amoswap.aqrl, here .aqrl guarantee it.
But anyway, consider we don't care about performance here, using 
smp_store_release()(add barrier()) surely right.

Thanks,
Tong.
Guo Ren Oct. 21, 2022, 2:35 p.m. UTC | #11
On Fri, Oct 21, 2022 at 4:36 PM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> > > > +     atomic_set_release(&spin_shadow_stack, 0);
> > >
> > > Have not really looked the details: should there be a matching acquire?
> >
> > I use atomic_set_release here, because I need earlier memory
> > operations finished to make sure the sp is ready then set the spin
> > flag.
> >
> > The following memory operations order is not important, because we
> > just care about sp value.
> >
> > Also, we use relax amoswap before, because sp has naturelly
> > dependency. But giving them RCsc is okay here, because we don't care
> > about performance here.
>
> Thanks for the clarification.
>
> I'm not really suggesting to add unneeded synchronization, even more
> so in local/private constructs as in this case.  It just felt odd to
> see the release without a pairing acquire, so I asked.  ;-)
Okay, let's keep:

 handle_kernel_stack_overflow:
+1:     la sp, spin_shadow_stack
+       amoswap.w sp, sp, (sp)
+       bnez sp, 1b
+
....
+     smp_store_release(&spin_shadow_stack, 0);

>
> Thanks,
>   Andrea
>
>
> > eg:
> >  handle_kernel_stack_overflow:
> > +1:     la sp, spin_shadow_stack
> > +       amoswap.w.aqrl sp, sp, (sp)
> > +       bnez sp, 1b
> > +
> > ....
> > +     smp_store_release(&spin_shadow_stack, 0);
> > +     smp_mb();
Guo Ren Oct. 21, 2022, 2:41 p.m. UTC | #12
On Fri, Oct 21, 2022 at 9:46 PM Tong Tiangen <tongtiangen@huawei.com> wrote:
>
>
>
> 在 2022/10/21 21:22, Andrea Parri 写道:
> > Hi Tong,
> >
> >>>> I use atomic_set_release here, because I need earlier memory
> >>>> operations finished to make sure the sp is ready then set the spin
> >>>> flag.
> >
> >>      Consider this implementation:)
> >>
> >>      smp_store_mb(&spin_shadow_stack, 0);
> >
> > smp_store_mb() has "WRITE_ONCE(); smp_mb()" semantics; so it doesn't
> > guarantee that the store to spin_shadow_stack is ordered after program
> > -order earlier memory accesses.
> >
> >    Andrea
> > .
>
> Hi Andrea:
>
> IIUC, the earlier memory access amoswap.aqrl, here .aqrl guarantee it.
> But anyway, consider we don't care about performance here, using
> smp_store_release()(add barrier()) surely right.
We use smp_store_release() is for:
        //load per-cpu overflow stack
        REG_L sp, -8(sp)

Not amoswap.

Actually, amoswap.aqrl guarantees nothing because all instructions
depend on the sp register.

>
> Thanks,
> Tong.
Tong Tiangen Oct. 21, 2022, 3:17 p.m. UTC | #13
在 2022/10/21 22:41, Guo Ren 写道:
> On Fri, Oct 21, 2022 at 9:46 PM Tong Tiangen <tongtiangen@huawei.com> wrote:
>>
>>
>>
>> 在 2022/10/21 21:22, Andrea Parri 写道:
>>> Hi Tong,
>>>
>>>>>> I use atomic_set_release here, because I need earlier memory
>>>>>> operations finished to make sure the sp is ready then set the spin
>>>>>> flag.
>>>
>>>>       Consider this implementation:)
>>>>
>>>>       smp_store_mb(&spin_shadow_stack, 0);
>>>
>>> smp_store_mb() has "WRITE_ONCE(); smp_mb()" semantics; so it doesn't
>>> guarantee that the store to spin_shadow_stack is ordered after program
>>> -order earlier memory accesses.
>>>
>>>     Andrea
>>> .
>>
>> Hi Andrea:
>>
>> IIUC, the earlier memory access amoswap.aqrl, here .aqrl guarantee it.
>> But anyway, consider we don't care about performance here, using
>> smp_store_release()(add barrier()) surely right.
> We use smp_store_release() is for:
>          //load per-cpu overflow stack
>          REG_L sp, -8(sp)

Oh, missing that, you're right. This guarantee the spin flag is set 
after the sp is used.

> 
> Not amoswap.
> 
> Actually, amoswap.aqrl guarantees nothing because all instructions
> depend on the sp register.
right.

Thanks,
Tong.

> 
>>
>> Thanks,
>> Tong.
> 
> 
>
diff mbox series

Patch

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d..7b924b16792b 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -404,6 +404,10 @@  handle_syscall_trace_exit:
 
 #ifdef CONFIG_VMAP_STACK
 handle_kernel_stack_overflow:
+1:	la sp, spin_shadow_stack
+	amoswap.w sp, sp, (sp)
+	bnez sp, 1b
+
 	la sp, shadow_stack
 	addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
 
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f3e96d60a2ff..88a54947dffb 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -221,11 +221,15 @@  asmlinkage unsigned long get_overflow_stack(void)
 		OVERFLOW_STACK_SIZE;
 }
 
+atomic_t spin_shadow_stack;
+
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
 	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
 
+	atomic_set_release(&spin_shadow_stack, 0);
+
 	console_verbose();
 
 	pr_emerg("Insufficient stack space to handle exception!\n");