Message ID | 20250305-riscv_optimize_entry-v5-2-6507b5dff3ce@rivosinc.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | entry: Move ret_from_fork() to C and inline syscall_exit_to_user_mode() | expand |
Context | Check | Description |
---|---|---|
bjorn/pre-ci_am | success | Success |
bjorn/build-rv32-defconfig | success | build-rv32-defconfig |
bjorn/build-rv64-clang-allmodconfig | success | build-rv64-clang-allmodconfig |
bjorn/build-rv64-gcc-allmodconfig | success | build-rv64-gcc-allmodconfig |
bjorn/build-rv64-nommu-k210-defconfig | success | build-rv64-nommu-k210-defconfig |
bjorn/build-rv64-nommu-k210-virt | success | build-rv64-nommu-k210-virt |
bjorn/checkpatch | warning | checkpatch |
bjorn/dtb-warn-rv64 | success | dtb-warn-rv64 |
bjorn/header-inline | success | header-inline |
bjorn/kdoc | success | kdoc |
bjorn/module-param | success | module-param |
bjorn/verify-fixes | success | verify-fixes |
bjorn/verify-signedoff | success | verify-signedoff |
Hi Charlie, On 06/03/2025 01:43, Charlie Jenkins wrote: > This function was unified into a single function in commit ab9164dae273 > ("riscv: entry: Consolidate ret_from_kernel_thread into ret_from_fork"). > However that imposed a performance degradation. Partially reverting this > commit to have ret_from_fork() split again results in a 1% increase on > the number of times fork is able to be called per second. > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> > --- > arch/riscv/include/asm/asm-prototypes.h | 3 ++- > arch/riscv/kernel/entry.S | 13 ++++++++++--- > arch/riscv/kernel/process.c | 17 +++++++++++------ > 3 files changed, 23 insertions(+), 10 deletions(-) > > diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h > index 733ff609778797001006c33bba9e3cc5b1f15387..bfc8ea5f9319b19449ec59493b45b926df888832 100644 > --- a/arch/riscv/include/asm/asm-prototypes.h > +++ b/arch/riscv/include/asm/asm-prototypes.h > @@ -52,7 +52,8 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); > DECLARE_DO_ERROR_INFO(do_trap_ecall_m); > DECLARE_DO_ERROR_INFO(do_trap_break); > > -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); > +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); > +asmlinkage void ret_from_fork_user(struct pt_regs *regs); > asmlinkage void handle_bad_stack(struct pt_regs *regs); > asmlinkage void do_page_fault(struct pt_regs *regs); > asmlinkage void do_irq(struct pt_regs *regs); > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index b2dc5e7c7b3a843fa4aa02eba2a911eb3ce31d1f..0fb338000c6dc0358742cd03497fa54b9e9d1aec 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -319,14 +319,21 @@ SYM_CODE_END(handle_kernel_stack_overflow) > ASM_NOKPROBE(handle_kernel_stack_overflow) > #endif > > -SYM_CODE_START(ret_from_fork_asm) > +SYM_CODE_START(ret_from_fork_kernel_asm) > call schedule_tail > move a0, s1 /* fn_arg */ > move a1, s0 /* fn */ > move a2, sp /* pt_regs */ > - call ret_from_fork > + call ret_from_fork_kernel > j ret_from_exception > -SYM_CODE_END(ret_from_fork_asm) > +SYM_CODE_END(ret_from_fork_kernel_asm) > + > +SYM_CODE_START(ret_from_fork_user_asm) > + call schedule_tail > + move a0, sp /* pt_regs */ > + call ret_from_fork_user > + j ret_from_exception > +SYM_CODE_END(ret_from_fork_user_asm) > > #ifdef CONFIG_IRQ_STACKS > /* > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index 7b0a0bfe29aec896c2bdd8976d855dd390de88d7..485ec7a80a56097e8905cd6395af29633846b5c8 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -38,7 +38,8 @@ unsigned long __stack_chk_guard __read_mostly; > EXPORT_SYMBOL(__stack_chk_guard); > #endif > > -extern asmlinkage void ret_from_fork_asm(void); > +extern asmlinkage void ret_from_fork_kernel_asm(void); > +extern asmlinkage void ret_from_fork_user_asm(void); > > void noinstr arch_cpu_idle(void) > { > @@ -208,14 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) > return 0; > } > > -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) > +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) > { > - if (unlikely(fn)) > - fn(fn_arg); > + fn(fn_arg); > > syscall_exit_to_user_mode(regs); > } > > +asmlinkage void ret_from_fork_user(struct pt_regs *regs) > +{ > + syscall_exit_to_user_mode(regs); > +} > + > int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > { > unsigned long clone_flags = args->flags; > @@ -238,6 +243,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > > p->thread.s[0] = (unsigned long)args->fn; > p->thread.s[1] = (unsigned long)args->fn_arg; > + p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; > } else { > *childregs = *(current_pt_regs()); > /* Turn off status.VS */ > @@ -247,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > if (clone_flags & CLONE_SETTLS) > childregs->tp = tls; > childregs->a0 = 0; /* Return value of fork() */ > - p->thread.s[0] = 0; > + p->thread.ra = (unsigned long)ret_from_fork_user_asm; > } > p->thread.riscv_v_flags = 0; > if (has_vector() || has_xtheadvector()) > riscv_v_thread_alloc(p); > - p->thread.ra = (unsigned long)ret_from_fork_asm; > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > return 0; > } > As I had noted in v2, this won't bring any performance improvement on real hardware, so I'm not convinced we need this patch. IMO, we should merge micro-optimizations like this if they really bring something. Thanks, Alex
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 733ff609778797001006c33bba9e3cc5b1f15387..bfc8ea5f9319b19449ec59493b45b926df888832 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -52,7 +52,8 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); +asmlinkage void ret_from_fork_user(struct pt_regs *regs); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b2dc5e7c7b3a843fa4aa02eba2a911eb3ce31d1f..0fb338000c6dc0358742cd03497fa54b9e9d1aec 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -319,14 +319,21 @@ SYM_CODE_END(handle_kernel_stack_overflow) ASM_NOKPROBE(handle_kernel_stack_overflow) #endif -SYM_CODE_START(ret_from_fork_asm) +SYM_CODE_START(ret_from_fork_kernel_asm) call schedule_tail move a0, s1 /* fn_arg */ move a1, s0 /* fn */ move a2, sp /* pt_regs */ - call ret_from_fork + call ret_from_fork_kernel j ret_from_exception -SYM_CODE_END(ret_from_fork_asm) +SYM_CODE_END(ret_from_fork_kernel_asm) + +SYM_CODE_START(ret_from_fork_user_asm) + call schedule_tail + move a0, sp /* pt_regs */ + call ret_from_fork_user + j ret_from_exception +SYM_CODE_END(ret_from_fork_user_asm) #ifdef CONFIG_IRQ_STACKS /* diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7b0a0bfe29aec896c2bdd8976d855dd390de88d7..485ec7a80a56097e8905cd6395af29633846b5c8 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -38,7 +38,8 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -extern asmlinkage void ret_from_fork_asm(void); +extern asmlinkage void ret_from_fork_kernel_asm(void); +extern asmlinkage void ret_from_fork_user_asm(void); void noinstr arch_cpu_idle(void) { @@ -208,14 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) { - if (unlikely(fn)) - fn(fn_arg); + fn(fn_arg); syscall_exit_to_user_mode(regs); } +asmlinkage void ret_from_fork_user(struct pt_regs *regs) +{ + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -238,6 +243,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; + p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; } else { *childregs = *(current_pt_regs()); /* Turn off status.VS */ @@ -247,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.s[0] = 0; + p->thread.ra = (unsigned long)ret_from_fork_user_asm; } p->thread.riscv_v_flags = 0; if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); - p->thread.ra = (unsigned long)ret_from_fork_asm; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; }
This function was unified into a single function in commit ab9164dae273 ("riscv: entry: Consolidate ret_from_kernel_thread into ret_from_fork"). However that imposed a performance degradation. Partially reverting this commit to have ret_from_fork() split again results in a 1% increase on the number of times fork is able to be called per second. Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> --- arch/riscv/include/asm/asm-prototypes.h | 3 ++- arch/riscv/kernel/entry.S | 13 ++++++++++--- arch/riscv/kernel/process.c | 17 +++++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-)