diff mbox series

[v2,3/4] loongarch: entry: Migrate ret_from_fork() to C

Message ID 20250123-riscv_optimize_entry-v2-3-7c259492d508@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series entry: Move ret_from_fork() to C and inline syscall_exit_to_user_mode() | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-3-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 128.30s
conchuod/patch-3-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1142.25s
conchuod/patch-3-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 1384.84s
conchuod/patch-3-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 22.69s
conchuod/patch-3-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 24.89s
conchuod/patch-3-test-6 warning .github/scripts/patches/tests/checkpatch.sh took 1.27s
conchuod/patch-3-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 46.43s
conchuod/patch-3-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.01s
conchuod/patch-3-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.68s
conchuod/patch-3-test-10 success .github/scripts/patches/tests/module_param.sh took 0.02s
conchuod/patch-3-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.00s
conchuod/patch-3-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.04s

Commit Message

Charlie Jenkins Jan. 23, 2025, 7:14 p.m. UTC
Loongarch is the only architecture that calls
syscall_exit_to_user_mode() from asm. Move the call into C so that this
function can be inlined across all architectures.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/loongarch/include/asm/asm-prototypes.h |  5 +++++
 arch/loongarch/include/asm/switch_to.h      |  8 +++++++
 arch/loongarch/kernel/entry.S               | 22 +++++++++----------
 arch/loongarch/kernel/process.c             | 34 ++++++++++++++++++++++++-----
 4 files changed, 51 insertions(+), 18 deletions(-)

Comments

Huacai Chen Jan. 24, 2025, 9:05 a.m. UTC | #1
Hi, Charlie,

On Fri, Jan 24, 2025 at 3:15 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> Loongarch is the only architecture that calls
We usually use "LoongArch" instead of "loongarch" or "Loongarch".

> syscall_exit_to_user_mode() from asm. Move the call into C so that this
> function can be inlined across all architectures.
>
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
>  arch/loongarch/include/asm/asm-prototypes.h |  5 +++++
>  arch/loongarch/include/asm/switch_to.h      |  8 +++++++
>  arch/loongarch/kernel/entry.S               | 22 +++++++++----------
>  arch/loongarch/kernel/process.c             | 34 ++++++++++++++++++++++++-----
>  4 files changed, 51 insertions(+), 18 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> index 51f224bcfc654228ae423e9a066b25b35102a5b9..0195d4309fd29f94664d5f34247198c769033b1b 100644
> --- a/arch/loongarch/include/asm/asm-prototypes.h
> +++ b/arch/loongarch/include/asm/asm-prototypes.h
> @@ -12,3 +12,8 @@ __int128_t __ashlti3(__int128_t a, int b);
>  __int128_t __ashrti3(__int128_t a, int b);
>  __int128_t __lshrti3(__int128_t a, int b);
>  #endif
> +
> +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> +                                                                   struct pt_regs *regs,
> +                                                                   int (*fn)(void *),
> +                                                                   void *fn_arg);
It is a little strange that we only need to declare
ret_from_kernel_thread() but not ret_from_fork().

> diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
> index 5b225aff3ba21aa06d0713bc8e73e1b941389630..a1c5576f1fd145670e13038bec6dd390486099ab 100644
> --- a/arch/loongarch/include/asm/switch_to.h
> +++ b/arch/loongarch/include/asm/switch_to.h
> @@ -26,6 +26,14 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
>                         struct task_struct *next, struct thread_info *next_ti,
>                         void *sched_ra, void *sched_cfa);
>
> +void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> +                                                        struct pt_regs *regs,
> +                                                        int (*fn)(void *),
> +                                                        void *fn_arg);
> +
> +void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
> +                                               struct pt_regs *regs);
> +
I prefer alpha-betical order, which means put ret_from_fork() before
ret_from_kernel_thread().

>  /*
>   * For newly created kernel threads switch_to() will return to
>   * ret_from_kernel_thread, newly created user threads to ret_from_fork.
> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> index 48e7e34e355e83eae8165957ba2eac05a8bf17df..2abc29e573810e000f2fef4646ddca0dbb80eabe 100644
> --- a/arch/loongarch/kernel/entry.S
> +++ b/arch/loongarch/kernel/entry.S
> @@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
>  SYM_CODE_END(handle_syscall)
>  _ASM_NOKPROBE(handle_syscall)
>
> -SYM_CODE_START(ret_from_fork)
> +SYM_CODE_START(ret_from_fork_asm)
>         UNWIND_HINT_REGS
> -       bl              schedule_tail           # a0 = struct task_struct *prev
> -       move            a0, sp
> -       bl              syscall_exit_to_user_mode
> +       move            a1, sp
> +       bl              ret_from_fork
>         RESTORE_STATIC
>         RESTORE_SOME
>         RESTORE_SP_AND_RET
> -SYM_CODE_END(ret_from_fork)
> +SYM_CODE_END(ret_from_fork_asm)
>
> -SYM_CODE_START(ret_from_kernel_thread)
> +SYM_CODE_START(ret_from_kernel_thread_asm)
>         UNWIND_HINT_REGS
> -       bl              schedule_tail           # a0 = struct task_struct *prev
> -       move            a0, s1
> -       jirl            ra, s0, 0
> -       move            a0, sp
> -       bl              syscall_exit_to_user_mode
> +       move            a1, sp
> +       move            a2, s0
> +       move            a3, s1
> +       bl              ret_from_kernel_thread
>         RESTORE_STATIC
>         RESTORE_SOME
>         RESTORE_SP_AND_RET
> -SYM_CODE_END(ret_from_kernel_thread)
> +SYM_CODE_END(ret_from_kernel_thread_asm)
> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
> index 6e58f65455c7ca3eae2e88ed852c8655a6701e5c..16cc949fe43443d70f1d865ce04595c2d8c1615b 100644
> --- a/arch/loongarch/kernel/process.c
> +++ b/arch/loongarch/kernel/process.c
> @@ -14,6 +14,7 @@
>  #include <linux/init.h>
>  #include <linux/kernel.h>
>  #include <linux/errno.h>
> +#include <linux/entry-common.h>
>  #include <linux/sched.h>
>  #include <linux/sched/debug.h>
>  #include <linux/sched/task.h>
> @@ -33,6 +34,7 @@
>  #include <linux/prctl.h>
>  #include <linux/nmi.h>
>
> +#include <asm/asm-prototypes.h>
>  #include <asm/asm.h>
>  #include <asm/bootinfo.h>
>  #include <asm/cpu.h>
> @@ -47,6 +49,7 @@
>  #include <asm/pgtable.h>
>  #include <asm/processor.h>
>  #include <asm/reg.h>
> +#include <asm/switch_to.h>
>  #include <asm/unwind.h>
>  #include <asm/vdso.h>
>
> @@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
>  unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
>  EXPORT_SYMBOL(boot_option_idle_override);
>
> -asmlinkage void ret_from_fork(void);
> -asmlinkage void ret_from_kernel_thread(void);
> +asmlinkage void restore_and_ret(void);
> +asmlinkage void ret_from_fork_asm(void);
> +asmlinkage void ret_from_kernel_thread_asm(void);
>
>  void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
>  {
> @@ -138,6 +142,24 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>         return 0;
>  }
>
> +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> +                                                                   struct pt_regs *regs,
> +                                                                   int (*fn)(void *),
> +                                                                   void *fn_arg)
> +{
> +       schedule_tail(prev);
> +
> +       fn(fn_arg);
> +
The two blank lines can be removed, and again, I prefer alpha-betical
order, which means put ret_from_fork() before
ret_from_kernel_thread().

Huacai

> +       syscall_exit_to_user_mode(regs);
> +}
> +
> +void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
> +{
> +       schedule_tail(prev);
> +       syscall_exit_to_user_mode(regs);
> +}
> +
>  /*
>   * Copy architecture-specific thread state
>   */
> @@ -165,8 +187,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 p->thread.reg03 = childksp;
>                 p->thread.reg23 = (unsigned long)args->fn;
>                 p->thread.reg24 = (unsigned long)args->fn_arg;
> -               p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
> -               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
> +               p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
> +               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
>                 memset(childregs, 0, sizeof(struct pt_regs));
>                 childregs->csr_euen = p->thread.csr_euen;
>                 childregs->csr_crmd = p->thread.csr_crmd;
> @@ -182,8 +204,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 childregs->regs[3] = usp;
>
>         p->thread.reg03 = (unsigned long) childregs;
> -       p->thread.reg01 = (unsigned long) ret_from_fork;
> -       p->thread.sched_ra = (unsigned long) ret_from_fork;
> +       p->thread.reg01 = (unsigned long) ret_from_fork_asm;
> +       p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
>
>         /*
>          * New tasks lose permission to use the fpu. This accelerates context
>
> --
> 2.43.0
>
Charlie Jenkins Jan. 24, 2025, 6:28 p.m. UTC | #2
On Fri, Jan 24, 2025 at 05:05:21PM +0800, Huacai Chen wrote:
> Hi, Charlie,
> 
> On Fri, Jan 24, 2025 at 3:15 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > Loongarch is the only architecture that calls
> We usually use "LoongArch" instead of "loongarch" or "Loongarch".
> 
> > syscall_exit_to_user_mode() from asm. Move the call into C so that this
> > function can be inlined across all architectures.
> >
> > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > ---
> >  arch/loongarch/include/asm/asm-prototypes.h |  5 +++++
> >  arch/loongarch/include/asm/switch_to.h      |  8 +++++++
> >  arch/loongarch/kernel/entry.S               | 22 +++++++++----------
> >  arch/loongarch/kernel/process.c             | 34 ++++++++++++++++++++++++-----
> >  4 files changed, 51 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> > index 51f224bcfc654228ae423e9a066b25b35102a5b9..0195d4309fd29f94664d5f34247198c769033b1b 100644
> > --- a/arch/loongarch/include/asm/asm-prototypes.h
> > +++ b/arch/loongarch/include/asm/asm-prototypes.h
> > @@ -12,3 +12,8 @@ __int128_t __ashlti3(__int128_t a, int b);
> >  __int128_t __ashrti3(__int128_t a, int b);
> >  __int128_t __lshrti3(__int128_t a, int b);
> >  #endif
> > +
> > +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> > +                                                                   struct pt_regs *regs,
> > +                                                                   int (*fn)(void *),
> > +                                                                   void *fn_arg);
> It is a little strange that we only need to declare
> ret_from_kernel_thread() but not ret_from_fork().

Just an oversight by me, thank you for pointing that out.

> 
> > diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
> > index 5b225aff3ba21aa06d0713bc8e73e1b941389630..a1c5576f1fd145670e13038bec6dd390486099ab 100644
> > --- a/arch/loongarch/include/asm/switch_to.h
> > +++ b/arch/loongarch/include/asm/switch_to.h
> > @@ -26,6 +26,14 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
> >                         struct task_struct *next, struct thread_info *next_ti,
> >                         void *sched_ra, void *sched_cfa);
> >
> > +void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> > +                                                        struct pt_regs *regs,
> > +                                                        int (*fn)(void *),
> > +                                                        void *fn_arg);
> > +
> > +void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
> > +                                               struct pt_regs *regs);
> > +
> I prefer alpha-betical order, which means put ret_from_fork() before
> ret_from_kernel_thread().

Makes sense!

> 
> >  /*
> >   * For newly created kernel threads switch_to() will return to
> >   * ret_from_kernel_thread, newly created user threads to ret_from_fork.
> > diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> > index 48e7e34e355e83eae8165957ba2eac05a8bf17df..2abc29e573810e000f2fef4646ddca0dbb80eabe 100644
> > --- a/arch/loongarch/kernel/entry.S
> > +++ b/arch/loongarch/kernel/entry.S
> > @@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall)
> >  SYM_CODE_END(handle_syscall)
> >  _ASM_NOKPROBE(handle_syscall)
> >
> > -SYM_CODE_START(ret_from_fork)
> > +SYM_CODE_START(ret_from_fork_asm)
> >         UNWIND_HINT_REGS
> > -       bl              schedule_tail           # a0 = struct task_struct *prev
> > -       move            a0, sp
> > -       bl              syscall_exit_to_user_mode
> > +       move            a1, sp
> > +       bl              ret_from_fork
> >         RESTORE_STATIC
> >         RESTORE_SOME
> >         RESTORE_SP_AND_RET
> > -SYM_CODE_END(ret_from_fork)
> > +SYM_CODE_END(ret_from_fork_asm)
> >
> > -SYM_CODE_START(ret_from_kernel_thread)
> > +SYM_CODE_START(ret_from_kernel_thread_asm)
> >         UNWIND_HINT_REGS
> > -       bl              schedule_tail           # a0 = struct task_struct *prev
> > -       move            a0, s1
> > -       jirl            ra, s0, 0
> > -       move            a0, sp
> > -       bl              syscall_exit_to_user_mode
> > +       move            a1, sp
> > +       move            a2, s0
> > +       move            a3, s1
> > +       bl              ret_from_kernel_thread
> >         RESTORE_STATIC
> >         RESTORE_SOME
> >         RESTORE_SP_AND_RET
> > -SYM_CODE_END(ret_from_kernel_thread)
> > +SYM_CODE_END(ret_from_kernel_thread_asm)
> > diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
> > index 6e58f65455c7ca3eae2e88ed852c8655a6701e5c..16cc949fe43443d70f1d865ce04595c2d8c1615b 100644
> > --- a/arch/loongarch/kernel/process.c
> > +++ b/arch/loongarch/kernel/process.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/init.h>
> >  #include <linux/kernel.h>
> >  #include <linux/errno.h>
> > +#include <linux/entry-common.h>
> >  #include <linux/sched.h>
> >  #include <linux/sched/debug.h>
> >  #include <linux/sched/task.h>
> > @@ -33,6 +34,7 @@
> >  #include <linux/prctl.h>
> >  #include <linux/nmi.h>
> >
> > +#include <asm/asm-prototypes.h>
> >  #include <asm/asm.h>
> >  #include <asm/bootinfo.h>
> >  #include <asm/cpu.h>
> > @@ -47,6 +49,7 @@
> >  #include <asm/pgtable.h>
> >  #include <asm/processor.h>
> >  #include <asm/reg.h>
> > +#include <asm/switch_to.h>
> >  #include <asm/unwind.h>
> >  #include <asm/vdso.h>
> >
> > @@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard);
> >  unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
> >  EXPORT_SYMBOL(boot_option_idle_override);
> >
> > -asmlinkage void ret_from_fork(void);
> > -asmlinkage void ret_from_kernel_thread(void);
> > +asmlinkage void restore_and_ret(void);
> > +asmlinkage void ret_from_fork_asm(void);
> > +asmlinkage void ret_from_kernel_thread_asm(void);
> >
> >  void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
> >  {
> > @@ -138,6 +142,24 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> >         return 0;
> >  }
> >
> > +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> > +                                                                   struct pt_regs *regs,
> > +                                                                   int (*fn)(void *),
> > +                                                                   void *fn_arg)
> > +{
> > +       schedule_tail(prev);
> > +
> > +       fn(fn_arg);
> > +
> The two blank lines can be removed, and again, I prefer alpha-betical
> order, which means put ret_from_fork() before
> ret_from_kernel_thread().

Will update in the next version.

- Charlie

> 
> Huacai
> 
> > +       syscall_exit_to_user_mode(regs);
> > +}
> > +
> > +void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
> > +{
> > +       schedule_tail(prev);
> > +       syscall_exit_to_user_mode(regs);
> > +}
> > +
> >  /*
> >   * Copy architecture-specific thread state
> >   */
> > @@ -165,8 +187,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >                 p->thread.reg03 = childksp;
> >                 p->thread.reg23 = (unsigned long)args->fn;
> >                 p->thread.reg24 = (unsigned long)args->fn_arg;
> > -               p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
> > -               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
> > +               p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
> > +               p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
> >                 memset(childregs, 0, sizeof(struct pt_regs));
> >                 childregs->csr_euen = p->thread.csr_euen;
> >                 childregs->csr_crmd = p->thread.csr_crmd;
> > @@ -182,8 +204,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >                 childregs->regs[3] = usp;
> >
> >         p->thread.reg03 = (unsigned long) childregs;
> > -       p->thread.reg01 = (unsigned long) ret_from_fork;
> > -       p->thread.sched_ra = (unsigned long) ret_from_fork;
> > +       p->thread.reg01 = (unsigned long) ret_from_fork_asm;
> > +       p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
> >
> >         /*
> >          * New tasks lose permission to use the fpu. This accelerates context
> >
> > --
> > 2.43.0
> >
Charlie Jenkins Jan. 24, 2025, 10:23 p.m. UTC | #3
On Fri, Jan 24, 2025 at 10:28:50AM -0800, Charlie Jenkins wrote:
> On Fri, Jan 24, 2025 at 05:05:21PM +0800, Huacai Chen wrote:
> > Hi, Charlie,
> > 
> > On Fri, Jan 24, 2025 at 3:15 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> > >
> > > Loongarch is the only architecture that calls
> > We usually use "LoongArch" instead of "loongarch" or "Loongarch".
> > 
> > > syscall_exit_to_user_mode() from asm. Move the call into C so that this
> > > function can be inlined across all architectures.
> > >
> > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> > > ---
> > >  arch/loongarch/include/asm/asm-prototypes.h |  5 +++++
> > >  arch/loongarch/include/asm/switch_to.h      |  8 +++++++
> > >  arch/loongarch/kernel/entry.S               | 22 +++++++++----------
> > >  arch/loongarch/kernel/process.c             | 34 ++++++++++++++++++++++++-----
> > >  4 files changed, 51 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> > > index 51f224bcfc654228ae423e9a066b25b35102a5b9..0195d4309fd29f94664d5f34247198c769033b1b 100644
> > > --- a/arch/loongarch/include/asm/asm-prototypes.h
> > > +++ b/arch/loongarch/include/asm/asm-prototypes.h
> > > @@ -12,3 +12,8 @@ __int128_t __ashlti3(__int128_t a, int b);
> > >  __int128_t __ashrti3(__int128_t a, int b);
> > >  __int128_t __lshrti3(__int128_t a, int b);
> > >  #endif
> > > +
> > > +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
> > > +                                                                   struct pt_regs *regs,
> > > +                                                                   int (*fn)(void *),
> > > +                                                                   void *fn_arg);
> > It is a little strange that we only need to declare
> > ret_from_kernel_thread() but not ret_from_fork().
> 
> Just an oversight by me, thank you for pointing that out.

Oh I see what I did, I meant to put these functions in asm-prototypes
and not in switch_to but I ended up putting them in both.

- Charlie
diff mbox series

Patch

diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index 51f224bcfc654228ae423e9a066b25b35102a5b9..0195d4309fd29f94664d5f34247198c769033b1b 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -12,3 +12,8 @@  __int128_t __ashlti3(__int128_t a, int b);
 __int128_t __ashrti3(__int128_t a, int b);
 __int128_t __lshrti3(__int128_t a, int b);
 #endif
+
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg);
diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
index 5b225aff3ba21aa06d0713bc8e73e1b941389630..a1c5576f1fd145670e13038bec6dd390486099ab 100644
--- a/arch/loongarch/include/asm/switch_to.h
+++ b/arch/loongarch/include/asm/switch_to.h
@@ -26,6 +26,14 @@  extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 			struct task_struct *next, struct thread_info *next_ti,
 			void *sched_ra, void *sched_cfa);
 
+void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+							 struct pt_regs *regs,
+							 int (*fn)(void *),
+							 void *fn_arg);
+
+void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev,
+						struct pt_regs *regs);
+
 /*
  * For newly created kernel threads switch_to() will return to
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 48e7e34e355e83eae8165957ba2eac05a8bf17df..2abc29e573810e000f2fef4646ddca0dbb80eabe 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -77,24 +77,22 @@  SYM_CODE_START(handle_syscall)
 SYM_CODE_END(handle_syscall)
 _ASM_NOKPROBE(handle_syscall)
 
-SYM_CODE_START(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, sp
-	bl 		syscall_exit_to_user_mode
+	move		a1, sp
+	bl 		ret_from_fork
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_fork)
+SYM_CODE_END(ret_from_fork_asm)
 
-SYM_CODE_START(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_kernel_thread_asm)
 	UNWIND_HINT_REGS
-	bl		schedule_tail		# a0 = struct task_struct *prev
-	move		a0, s1
-	jirl		ra, s0, 0
-	move		a0, sp
-	bl		syscall_exit_to_user_mode
+	move		a1, sp
+	move		a2, s0
+	move		a3, s1
+	bl		ret_from_kernel_thread
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
-SYM_CODE_END(ret_from_kernel_thread)
+SYM_CODE_END(ret_from_kernel_thread_asm)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 6e58f65455c7ca3eae2e88ed852c8655a6701e5c..16cc949fe43443d70f1d865ce04595c2d8c1615b 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -14,6 +14,7 @@ 
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/entry-common.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
@@ -33,6 +34,7 @@ 
 #include <linux/prctl.h>
 #include <linux/nmi.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu.h>
@@ -47,6 +49,7 @@ 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/reg.h>
+#include <asm/switch_to.h>
 #include <asm/unwind.h>
 #include <asm/vdso.h>
 
@@ -63,8 +66,9 @@  EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-asmlinkage void ret_from_fork(void);
-asmlinkage void ret_from_kernel_thread(void);
+asmlinkage void restore_and_ret(void);
+asmlinkage void ret_from_fork_asm(void);
+asmlinkage void ret_from_kernel_thread_asm(void);
 
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
@@ -138,6 +142,24 @@  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev,
+								    struct pt_regs *regs,
+								    int (*fn)(void *),
+								    void *fn_arg)
+{
+	schedule_tail(prev);
+
+	fn(fn_arg);
+
+	syscall_exit_to_user_mode(regs);
+}
+
+void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
+{
+	schedule_tail(prev);
+	syscall_exit_to_user_mode(regs);
+}
+
 /*
  * Copy architecture-specific thread state
  */
@@ -165,8 +187,8 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		p->thread.reg03 = childksp;
 		p->thread.reg23 = (unsigned long)args->fn;
 		p->thread.reg24 = (unsigned long)args->fn_arg;
-		p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
-		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
+		p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm;
+		p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm;
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->csr_euen = p->thread.csr_euen;
 		childregs->csr_crmd = p->thread.csr_crmd;
@@ -182,8 +204,8 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		childregs->regs[3] = usp;
 
 	p->thread.reg03 = (unsigned long) childregs;
-	p->thread.reg01 = (unsigned long) ret_from_fork;
-	p->thread.sched_ra = (unsigned long) ret_from_fork;
+	p->thread.reg01 = (unsigned long) ret_from_fork_asm;
+	p->thread.sched_ra = (unsigned long) ret_from_fork_asm;
 
 	/*
 	 * New tasks lose permission to use the fpu. This accelerates context