diff mbox series

[1/4] riscv: entry: Convert ret_from_fork() to C

Message ID 20250122-riscv_optimize_entry-v1-1-4ee95559cfd0@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series entry: Move ret_from_fork() to C and inline syscall_exit_to_user_mode() | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 120.81s
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1137.03s
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 1398.62s
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 21.22s
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 22.83s
conchuod/patch-1-test-6 success .github/scripts/patches/tests/checkpatch.sh took 0.97s
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 42.98s
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.01s
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.61s
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh took 0.02s
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.01s
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.04s

Commit Message

Charlie Jenkins Jan. 22, 2025, 10:56 p.m. UTC
Move the main section of ret_from_fork() to C to allow inlining of
syscall_exit_to_user_mode().

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/asm-prototypes.h |  1 +
 arch/riscv/kernel/entry.S               | 15 ++++++---------
 arch/riscv/kernel/process.c             | 14 ++++++++++++--
 3 files changed, 19 insertions(+), 11 deletions(-)

Comments

Alexandre Ghiti Jan. 24, 2025, 7:09 a.m. UTC | #1
Hi Charlie,

On 22/01/2025 23:56, Charlie Jenkins wrote:
> Move the main section of ret_from_fork() to C to allow inlining of
> syscall_exit_to_user_mode().
>
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>   arch/riscv/include/asm/asm-prototypes.h |  1 +
>   arch/riscv/kernel/entry.S               | 15 ++++++---------
>   arch/riscv/kernel/process.c             | 14 ++++++++++++--
>   3 files changed, 19 insertions(+), 11 deletions(-)
>
> diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
> index cd627ec289f163a630b73dd03dd52a6b28692997..733ff609778797001006c33bba9e3cc5b1f15387 100644
> --- a/arch/riscv/include/asm/asm-prototypes.h
> +++ b/arch/riscv/include/asm/asm-prototypes.h
> @@ -52,6 +52,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
>   DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
>   DECLARE_DO_ERROR_INFO(do_trap_break);
>   
> +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
>   asmlinkage void handle_bad_stack(struct pt_regs *regs);
>   asmlinkage void do_page_fault(struct pt_regs *regs);
>   asmlinkage void do_irq(struct pt_regs *regs);
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
>   ASM_NOKPROBE(handle_kernel_stack_overflow)
>   #endif
>   
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>   	call schedule_tail
> -	beqz s0, 1f	/* not from kernel thread */
> -	/* Call fn(arg) */
> -	move a0, s1
> -	jalr s0
> -1:
> -	move a0, sp /* pt_regs */
> -	call syscall_exit_to_user_mode
> +	move a0, s1 /* fn */
> +	move a1, s0 /* fn_arg */


You pass the arguments in the right order but the comments are wrong (s1 
contains fn_arg and s0 fn).


> +	move a2, sp /* pt_regs */
> +	call ret_from_fork
>   	j ret_from_exception
> -SYM_CODE_END(ret_from_fork)
> +SYM_CODE_END(ret_from_fork_asm)
>   
>   #ifdef CONFIG_IRQ_STACKS
>   /*
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index 58b6482c2bf662bf5224ca50c8e21a68760a6b41..0d07e6d8f6b57beba438dbba5e8c74a014582bee 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -17,7 +17,9 @@
>   #include <linux/ptrace.h>
>   #include <linux/uaccess.h>
>   #include <linux/personality.h>
> +#include <linux/entry-common.h>
>   
> +#include <asm/asm-prototypes.h>
>   #include <asm/unistd.h>
>   #include <asm/processor.h>
>   #include <asm/csr.h>
> @@ -36,7 +38,7 @@ unsigned long __stack_chk_guard __read_mostly;
>   EXPORT_SYMBOL(__stack_chk_guard);
>   #endif
>   
> -extern asmlinkage void ret_from_fork(void);
> +extern asmlinkage void ret_from_fork_asm(void);
>   
>   void noinstr arch_cpu_idle(void)
>   {
> @@ -206,6 +208,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>   	return 0;
>   }
>   
> +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
> +{
> +	if (unlikely(fn))
> +		fn(fn_arg);
> +
> +	syscall_exit_to_user_mode(regs);
> +}
> +
>   int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>   {
>   	unsigned long clone_flags = args->flags;
> @@ -242,7 +252,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>   	p->thread.riscv_v_flags = 0;
>   	if (has_vector())
>   		riscv_v_thread_alloc(p);
> -	p->thread.ra = (unsigned long)ret_from_fork;
> +	p->thread.ra = (unsigned long)ret_from_fork_asm;
>   	p->thread.sp = (unsigned long)childregs; /* kernel sp */
>   	return 0;
>   }
>

With the comment fix, you can add:

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex
Charlie Jenkins Jan. 24, 2025, 7:56 a.m. UTC | #2
On Fri, Jan 24, 2025 at 08:09:16AM +0100, Alexandre Ghiti wrote:
> Hi Charlie,
> 
> On 22/01/2025 23:56, Charlie Jenkins wrote:
> > Move the main section of ret_from_fork() to C to allow inlining of
> > syscall_exit_to_user_mode().
> > 
> > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > ---
> >   arch/riscv/include/asm/asm-prototypes.h |  1 +
> >   arch/riscv/kernel/entry.S               | 15 ++++++---------
> >   arch/riscv/kernel/process.c             | 14 ++++++++++++--
> >   3 files changed, 19 insertions(+), 11 deletions(-)
> > 
> > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
> > index cd627ec289f163a630b73dd03dd52a6b28692997..733ff609778797001006c33bba9e3cc5b1f15387 100644
> > --- a/arch/riscv/include/asm/asm-prototypes.h
> > +++ b/arch/riscv/include/asm/asm-prototypes.h
> > @@ -52,6 +52,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
> >   DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
> >   DECLARE_DO_ERROR_INFO(do_trap_break);
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
> >   asmlinkage void handle_bad_stack(struct pt_regs *regs);
> >   asmlinkage void do_page_fault(struct pt_regs *regs);
> >   asmlinkage void do_irq(struct pt_regs *regs);
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
> >   ASM_NOKPROBE(handle_kernel_stack_overflow)
> >   #endif
> > -SYM_CODE_START(ret_from_fork)
> > +SYM_CODE_START(ret_from_fork_asm)
> >   	call schedule_tail
> > -	beqz s0, 1f	/* not from kernel thread */
> > -	/* Call fn(arg) */
> > -	move a0, s1
> > -	jalr s0
> > -1:
> > -	move a0, sp /* pt_regs */
> > -	call syscall_exit_to_user_mode
> > +	move a0, s1 /* fn */
> > +	move a1, s0 /* fn_arg */
> 
> 
> You pass the arguments in the right order but the comments are wrong (s1
> contains fn_arg and s0 fn).

Oh thank you! I originally had it in this order but then moved it around
to reduce the amount of register shuffling that is needed on the C side,
I will fix in the next version.

- Charlie

> 
> 
> > +	move a2, sp /* pt_regs */
> > +	call ret_from_fork
> >   	j ret_from_exception
> > -SYM_CODE_END(ret_from_fork)
> > +SYM_CODE_END(ret_from_fork_asm)
> >   #ifdef CONFIG_IRQ_STACKS
> >   /*
> > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> > index 58b6482c2bf662bf5224ca50c8e21a68760a6b41..0d07e6d8f6b57beba438dbba5e8c74a014582bee 100644
> > --- a/arch/riscv/kernel/process.c
> > +++ b/arch/riscv/kernel/process.c
> > @@ -17,7 +17,9 @@
> >   #include <linux/ptrace.h>
> >   #include <linux/uaccess.h>
> >   #include <linux/personality.h>
> > +#include <linux/entry-common.h>
> > +#include <asm/asm-prototypes.h>
> >   #include <asm/unistd.h>
> >   #include <asm/processor.h>
> >   #include <asm/csr.h>
> > @@ -36,7 +38,7 @@ unsigned long __stack_chk_guard __read_mostly;
> >   EXPORT_SYMBOL(__stack_chk_guard);
> >   #endif
> > -extern asmlinkage void ret_from_fork(void);
> > +extern asmlinkage void ret_from_fork_asm(void);
> >   void noinstr arch_cpu_idle(void)
> >   {
> > @@ -206,6 +208,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> >   	return 0;
> >   }
> > +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
> > +{
> > +	if (unlikely(fn))
> > +		fn(fn_arg);
> > +
> > +	syscall_exit_to_user_mode(regs);
> > +}
> > +
> >   int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >   {
> >   	unsigned long clone_flags = args->flags;
> > @@ -242,7 +252,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >   	p->thread.riscv_v_flags = 0;
> >   	if (has_vector())
> >   		riscv_v_thread_alloc(p);
> > -	p->thread.ra = (unsigned long)ret_from_fork;
> > +	p->thread.ra = (unsigned long)ret_from_fork_asm;
> >   	p->thread.sp = (unsigned long)childregs; /* kernel sp */
> >   	return 0;
> >   }
> > 
> 
> With the comment fix, you can add:
> 
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> 
> Thanks,
> 
> Alex
>
Maciej W. Rozycki Jan. 25, 2025, 11:30 p.m. UTC | #3
On Wed, 22 Jan 2025, Charlie Jenkins wrote:

> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
>  ASM_NOKPROBE(handle_kernel_stack_overflow)
>  #endif
>  
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>  	call schedule_tail
> -	beqz s0, 1f	/* not from kernel thread */
> -	/* Call fn(arg) */
> -	move a0, s1
> -	jalr s0
> -1:
> -	move a0, sp /* pt_regs */
> -	call syscall_exit_to_user_mode
> +	move a0, s1 /* fn */
> +	move a1, s0 /* fn_arg */
> +	move a2, sp /* pt_regs */
> +	call ret_from_fork
>  	j ret_from_exception

 Wouldn't it make sense to save a jump here and make it a tail call, i.e.:

	la ra, ret_from_exception
	tail ret_from_fork

?

  Maciej
Charlie Jenkins Jan. 26, 2025, 7:51 p.m. UTC | #4
On Sat, Jan 25, 2025 at 11:30:09PM +0000, Maciej W. Rozycki wrote:
> On Wed, 22 Jan 2025, Charlie Jenkins wrote:
> 
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow)
> >  ASM_NOKPROBE(handle_kernel_stack_overflow)
> >  #endif
> >  
> > -SYM_CODE_START(ret_from_fork)
> > +SYM_CODE_START(ret_from_fork_asm)
> >  	call schedule_tail
> > -	beqz s0, 1f	/* not from kernel thread */
> > -	/* Call fn(arg) */
> > -	move a0, s1
> > -	jalr s0
> > -1:
> > -	move a0, sp /* pt_regs */
> > -	call syscall_exit_to_user_mode
> > +	move a0, s1 /* fn */
> > +	move a1, s0 /* fn_arg */
> > +	move a2, sp /* pt_regs */
> > +	call ret_from_fork
> >  	j ret_from_exception
> 
>  Wouldn't it make sense to save a jump here and make it a tail call, i.e.:
> 
> 	la ra, ret_from_exception
> 	tail ret_from_fork
> 

I don't believe so due to the return address stack. It was shown in this
patch [1] that a 7% performance improvement can be seen on existing
riscv hardware by performing the extra jump. Doing tail calls should be
avoided on riscv since the hardware can be expected to predict the
return address incorrectly every time if the return address is manually
changed.

- Charlie

> ?
> 
>   Maciej
Maciej W. Rozycki Jan. 27, 2025, 8:20 p.m. UTC | #5
On Sun, 26 Jan 2025, Charlie Jenkins wrote:

> >  Wouldn't it make sense to save a jump here and make it a tail call, i.e.:
> > 
> > 	la ra, ret_from_exception
> > 	tail ret_from_fork
> > 
> 
> I don't believe so due to the return address stack. It was shown in this
> patch [1] that a 7% performance improvement can be seen on existing
> riscv hardware by performing the extra jump. Doing tail calls should be
> avoided on riscv since the hardware can be expected to predict the
> return address incorrectly every time if the return address is manually
> changed.

 Fair enough, thanks.  Though no link to said patch given.

  Maciej
Charlie Jenkins Jan. 27, 2025, 8:55 p.m. UTC | #6
On Mon, Jan 27, 2025 at 08:20:56PM +0000, Maciej W. Rozycki wrote:
> On Sun, 26 Jan 2025, Charlie Jenkins wrote:
> 
> > >  Wouldn't it make sense to save a jump here and make it a tail call, i.e.:
> > > 
> > > 	la ra, ret_from_exception
> > > 	tail ret_from_fork
> > > 
> > 
> > I don't believe so due to the return address stack. It was shown in this
> > patch [1] that a 7% performance improvement can be seen on existing
> > riscv hardware by performing the extra jump. Doing tail calls should be
> > avoided on riscv since the hardware can be expected to predict the
> > return address incorrectly every time if the return address is manually
> > changed.
> 
>  Fair enough, thanks.  Though no link to said patch given.

Oops! Here it is:
https://lore.kernel.org/linux-riscv/20240607061335.2197383-1-cyrilbur@tenstorrent.com/.

> 
>   Maciej
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index cd627ec289f163a630b73dd03dd52a6b28692997..733ff609778797001006c33bba9e3cc5b1f15387 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -52,6 +52,7 @@  DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
 DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
 DECLARE_DO_ERROR_INFO(do_trap_break);
 
+asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs);
 asmlinkage void handle_bad_stack(struct pt_regs *regs);
 asmlinkage void do_page_fault(struct pt_regs *regs);
 asmlinkage void do_irq(struct pt_regs *regs);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 33a5a9f2a0d4e1eeccfb3621b9e518b88e1b0704..9225c322279aa90e737b1d7144db084319cf8103 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -319,17 +319,14 @@  SYM_CODE_END(handle_kernel_stack_overflow)
 ASM_NOKPROBE(handle_kernel_stack_overflow)
 #endif
 
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	call schedule_tail
-	beqz s0, 1f	/* not from kernel thread */
-	/* Call fn(arg) */
-	move a0, s1
-	jalr s0
-1:
-	move a0, sp /* pt_regs */
-	call syscall_exit_to_user_mode
+	move a0, s1 /* fn */
+	move a1, s0 /* fn_arg */
+	move a2, sp /* pt_regs */
+	call ret_from_fork
 	j ret_from_exception
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 
 #ifdef CONFIG_IRQ_STACKS
 /*
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 58b6482c2bf662bf5224ca50c8e21a68760a6b41..0d07e6d8f6b57beba438dbba5e8c74a014582bee 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -17,7 +17,9 @@ 
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
 #include <linux/personality.h>
+#include <linux/entry-common.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/unistd.h>
 #include <asm/processor.h>
 #include <asm/csr.h>
@@ -36,7 +38,7 @@  unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
-extern asmlinkage void ret_from_fork(void);
+extern asmlinkage void ret_from_fork_asm(void);
 
 void noinstr arch_cpu_idle(void)
 {
@@ -206,6 +208,14 @@  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs)
+{
+	if (unlikely(fn))
+		fn(fn_arg);
+
+	syscall_exit_to_user_mode(regs);
+}
+
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 {
 	unsigned long clone_flags = args->flags;
@@ -242,7 +252,7 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	p->thread.riscv_v_flags = 0;
 	if (has_vector())
 		riscv_v_thread_alloc(p);
-	p->thread.ra = (unsigned long)ret_from_fork;
+	p->thread.ra = (unsigned long)ret_from_fork_asm;
 	p->thread.sp = (unsigned long)childregs; /* kernel sp */
 	return 0;
 }