diff mbox series

[2/4] riscv: consolidate ret_from_kernel_thread into ret_from_fork

Message ID 20220925175356.681-3-jszhang@kernel.org (mailing list archive)
State Superseded
Headers show
Series riscv: entry: further clean up and VMAP_STACK fix | expand

Commit Message

Jisheng Zhang Sept. 25, 2022, 5:53 p.m. UTC
The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
the only difference is whether call the fn(arg) or not, this can be
acchieved by testing fn is NULL or not, I.E s0 is 0 or not.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
 arch/riscv/kernel/entry.S   | 11 +++--------
 arch/riscv/kernel/process.c |  5 ++---
 2 files changed, 5 insertions(+), 11 deletions(-)

Comments

Guo Ren Sept. 25, 2022, 11:25 p.m. UTC | #1
On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
> the only difference is whether call the fn(arg) or not, this can be
> acchieved by testing fn is NULL or not, I.E s0 is 0 or not.
>
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
>  arch/riscv/kernel/entry.S   | 11 +++--------
>  arch/riscv/kernel/process.c |  5 ++---
>  2 files changed, 5 insertions(+), 11 deletions(-)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index 2207cf44a3bc..a3e1ed2fa2ac 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow)
>
>  ENTRY(ret_from_fork)
>         call schedule_tail
> -       move a0, sp /* pt_regs */
> -       la ra, ret_from_exception
> -       tail syscall_exit_to_user_mode
> -ENDPROC(ret_from_fork)
> -
> -ENTRY(ret_from_kernel_thread)
> -       call schedule_tail
> +       beqz s0, 1f     /* not from kernel thread */
We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread.
The s0=0 is also okay for ret_from_fork.

        /* p->thread holds context to be restored by __switch_to() */
        if (unlikely(args->fn)) {
                /* Kernel thread */
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->gp = gp_in_global;
                /* Supervisor/Machine, irqs on: */
                childregs->status = SR_PP | SR_PIE;

                p->thread.ra = (unsigned long)ret_from_kernel_thread;
                p->thread.s[0] = (unsigned long)args->fn;
                p->thread.s[1] = (unsigned long)args->fn_arg;
        } else {
                *childregs = *(current_pt_regs());
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                if (usp) /* User fork */
                        childregs->sp = usp;
                if (clone_flags & CLONE_SETTLS)
                        childregs->tp = tls;
                childregs->a0 = 0; /* Return value of fork() */
                p->thread.ra = (unsigned long)ret_from_fork;
        }
        p->thread.sp = (unsigned long)childregs; /* kernel sp */

>         /* Call fn(arg) */
>         move a0, s1
>         jalr s0
> +1:
>         move a0, sp /* pt_regs */
>         la ra, ret_from_exception
>         tail syscall_exit_to_user_mode
> -ENDPROC(ret_from_kernel_thread)
> +ENDPROC(ret_from_fork)
>
>  #ifdef CONFIG_IRQ_STACKS
>  ENTRY(call_on_stack)
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index ceb9ebab6558..67e7cd123ceb 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
>  #endif
>
>  extern asmlinkage void ret_from_fork(void);
> -extern asmlinkage void ret_from_kernel_thread(void);
>
>  void arch_cpu_idle(void)
>  {
> @@ -172,7 +171,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 /* Supervisor/Machine, irqs on: */
>                 childregs->status = SR_PP | SR_PIE;
>
> -               p->thread.ra = (unsigned long)ret_from_kernel_thread;
>                 p->thread.s[0] = (unsigned long)args->fn;
>                 p->thread.s[1] = (unsigned long)args->fn_arg;
>         } else {
> @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 if (clone_flags & CLONE_SETTLS)
>                         childregs->tp = tls;
>                 childregs->a0 = 0; /* Return value of fork() */
> -               p->thread.ra = (unsigned long)ret_from_fork;
> +               p->thread.s[0] = 0;
>         }
> +       p->thread.ra = (unsigned long)ret_from_fork;
>         p->thread.sp = (unsigned long)childregs; /* kernel sp */
>         return 0;
>  }
> --
> 2.34.1
>
Jisheng Zhang Sept. 26, 2022, 4:05 p.m. UTC | #2
On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote:
> On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> >
> > The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
> > the only difference is whether call the fn(arg) or not, this can be
> > acchieved by testing fn is NULL or not, I.E s0 is 0 or not.
> >
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > ---
> >  arch/riscv/kernel/entry.S   | 11 +++--------
> >  arch/riscv/kernel/process.c |  5 ++---
> >  2 files changed, 5 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > index 2207cf44a3bc..a3e1ed2fa2ac 100644
> > --- a/arch/riscv/kernel/entry.S
> > +++ b/arch/riscv/kernel/entry.S
> > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow)
> >
> >  ENTRY(ret_from_fork)
> >         call schedule_tail
> > -       move a0, sp /* pt_regs */
> > -       la ra, ret_from_exception
> > -       tail syscall_exit_to_user_mode
> > -ENDPROC(ret_from_fork)
> > -
> > -ENTRY(ret_from_kernel_thread)
> > -       call schedule_tail
> > +       beqz s0, 1f     /* not from kernel thread */

Hi Guo,

> We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread.
> The s0=0 is also okay for ret_from_fork.

IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in
pt_regs.

> 
>         /* p->thread holds context to be restored by __switch_to() */
>         if (unlikely(args->fn)) {
>                 /* Kernel thread */
>                 memset(childregs, 0, sizeof(struct pt_regs));
>                 childregs->gp = gp_in_global;
>                 /* Supervisor/Machine, irqs on: */
>                 childregs->status = SR_PP | SR_PIE;
> 
>                 p->thread.ra = (unsigned long)ret_from_kernel_thread;
>                 p->thread.s[0] = (unsigned long)args->fn;
>                 p->thread.s[1] = (unsigned long)args->fn_arg;
>         } else {
>                 *childregs = *(current_pt_regs());
>                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>                 if (usp) /* User fork */
>                         childregs->sp = usp;
>                 if (clone_flags & CLONE_SETTLS)
>                         childregs->tp = tls;
>                 childregs->a0 = 0; /* Return value of fork() */
>                 p->thread.ra = (unsigned long)ret_from_fork;
>         }
>         p->thread.sp = (unsigned long)childregs; /* kernel sp */
> 

<snip>

> > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> >                 if (clone_flags & CLONE_SETTLS)
> >                         childregs->tp = tls;
> >                 childregs->a0 = 0; /* Return value of fork() */
> > -               p->thread.ra = (unsigned long)ret_from_fork;
> > +               p->thread.s[0] = 0;

Here we assign 0 to p->thread.s[0]

Thanks
Guo Ren Sept. 26, 2022, 11:55 p.m. UTC | #3
On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote:
> > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> > >
> > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
> > > the only difference is whether call the fn(arg) or not, this can be
> > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not.
> > >
> > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > ---
> > >  arch/riscv/kernel/entry.S   | 11 +++--------
> > >  arch/riscv/kernel/process.c |  5 ++---
> > >  2 files changed, 5 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > > index 2207cf44a3bc..a3e1ed2fa2ac 100644
> > > --- a/arch/riscv/kernel/entry.S
> > > +++ b/arch/riscv/kernel/entry.S
> > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow)
> > >
> > >  ENTRY(ret_from_fork)
> > >         call schedule_tail
> > > -       move a0, sp /* pt_regs */
> > > -       la ra, ret_from_exception
> > > -       tail syscall_exit_to_user_mode
> > > -ENDPROC(ret_from_fork)
> > > -
> > > -ENTRY(ret_from_kernel_thread)
> > > -       call schedule_tail
> > > +       beqz s0, 1f     /* not from kernel thread */
>
> Hi Guo,
>
> > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread.
> > The s0=0 is also okay for ret_from_fork.
>
> IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in
> pt_regs.
Yes, you are correct.

>
> >
> >         /* p->thread holds context to be restored by __switch_to() */
> >         if (unlikely(args->fn)) {
> >                 /* Kernel thread */
> >                 memset(childregs, 0, sizeof(struct pt_regs));
> >                 childregs->gp = gp_in_global;
> >                 /* Supervisor/Machine, irqs on: */
> >                 childregs->status = SR_PP | SR_PIE;
> >
> >                 p->thread.ra = (unsigned long)ret_from_kernel_thread;
> >                 p->thread.s[0] = (unsigned long)args->fn;
> >                 p->thread.s[1] = (unsigned long)args->fn_arg;
> >         } else {
> >                 *childregs = *(current_pt_regs());
> >                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Oh, I'm wrong, It's switch_to_restore -> exception_restore.

> >                 if (usp) /* User fork */
> >                         childregs->sp = usp;
> >                 if (clone_flags & CLONE_SETTLS)
> >                         childregs->tp = tls;
> >                 childregs->a0 = 0; /* Return value of fork() */
> >                 p->thread.ra = (unsigned long)ret_from_fork;
> >         }
> >         p->thread.sp = (unsigned long)childregs; /* kernel sp */
> >
>
> <snip>
>
> > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > >                 if (clone_flags & CLONE_SETTLS)
> > >                         childregs->tp = tls;
> > >                 childregs->a0 = 0; /* Return value of fork() */
> > > -               p->thread.ra = (unsigned long)ret_from_fork;
> > > +               p->thread.s[0] = 0;
>
> Here we assign 0 to p->thread.s[0]
I missed that.

Merge thread & fork is not a good idea, and using fp as the flag is so implicit.

➜  linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S
arch/arc/kernel/entry.S:ENTRY(ret_from_fork)
arch/arc/kernel/entry.S:END(ret_from_fork)
arch/csky/kernel/entry.S:ENTRY(ret_from_fork)
arch/x86/kernel/process_32.c: * the task-switch, and shows up in
ret_from_fork in entry.S,
arch/alpha/kernel/entry.S:      .globl  ret_from_fork
arch/alpha/kernel/entry.S:      .ent    ret_from_fork
arch/alpha/kernel/entry.S:ret_from_fork:
arch/alpha/kernel/entry.S:.end ret_from_fork
arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork)
arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork)
arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork
arch/hexagon/kernel/vm_entry.S:ret_from_fork:
arch/microblaze/kernel/entry.S:   (copy_thread makes ret_from_fork the
return address in each new thread's
arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork):
arch/m68k/kernel/entry.S:ENTRY(ret_from_fork)
arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork)
arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork)
arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork)
arch/riscv/kernel/entry.S:ENTRY(ret_from_fork)
arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork)
arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork
arch/s390/kernel/entry.S:ENTRY(ret_from_fork)
arch/s390/kernel/entry.S:       brasl   %r14,__ret_from_fork
arch/s390/kernel/entry.S:ENDPROC(ret_from_fork)
arch/mips/kernel/entry.S:FEXPORT(ret_from_fork)
arch/openrisc/kernel/entry.S:   /* All syscalls return here... just
pay attention to ret_from_fork
arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork)
arch/openrisc/kernel/entry.S:    * that may be either schedule(),
ret_from_fork(), or
arch/nios2/kernel/entry.S:ENTRY(ret_from_fork)
arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork)
arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork)
arch/sparc/kernel/entry.S:      .globl  ret_from_fork
arch/sparc/kernel/entry.S:ret_from_fork:
➜  linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S
arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/alpha/kernel/entry.S:      .globl  ret_from_kernel_thread
arch/alpha/kernel/entry.S:      .ent    ret_from_kernel_thread
arch/alpha/kernel/entry.S:ret_from_kernel_thread:
arch/alpha/kernel/entry.S:.end ret_from_kernel_thread
arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/parisc/kernel/entry.S:END(ret_from_kernel_thread)
arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread)
arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread)
arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread):
arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread)
arch/openrisc/kernel/entry.S:    * ret_from_kernel_thread().  If we
are returning to a new thread,
arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread)
arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
arch/sparc/kernel/entry.S:      .globl  ret_from_kernel_thread
arch/sparc/kernel/entry.S:ret_from_kernel_thread:

Many architectures use a similar style. If you want to continue the
patch, I think you should first rename ret_from_fork properly, and
give an explicit flag definition, not just setting fp = 0.

>
> Thanks
Jisheng Zhang Sept. 28, 2022, 4:40 p.m. UTC | #4
On Tue, Sep 27, 2022 at 07:55:27AM +0800, Guo Ren wrote:
> On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> >
> > On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote:
> > > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> > > >
> > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
> > > > the only difference is whether call the fn(arg) or not, this can be
> > > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not.
> > > >
> > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > > ---
> > > >  arch/riscv/kernel/entry.S   | 11 +++--------
> > > >  arch/riscv/kernel/process.c |  5 ++---
> > > >  2 files changed, 5 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > > > index 2207cf44a3bc..a3e1ed2fa2ac 100644
> > > > --- a/arch/riscv/kernel/entry.S
> > > > +++ b/arch/riscv/kernel/entry.S
> > > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow)
> > > >
> > > >  ENTRY(ret_from_fork)
> > > >         call schedule_tail
> > > > -       move a0, sp /* pt_regs */
> > > > -       la ra, ret_from_exception
> > > > -       tail syscall_exit_to_user_mode
> > > > -ENDPROC(ret_from_fork)
> > > > -
> > > > -ENTRY(ret_from_kernel_thread)
> > > > -       call schedule_tail
> > > > +       beqz s0, 1f     /* not from kernel thread */
> >
> > Hi Guo,
> >
> > > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread.
> > > The s0=0 is also okay for ret_from_fork.
> >
> > IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in
> > pt_regs.
> Yes, you are correct.
> 
> >
> > >
> > >         /* p->thread holds context to be restored by __switch_to() */
> > >         if (unlikely(args->fn)) {
> > >                 /* Kernel thread */
> > >                 memset(childregs, 0, sizeof(struct pt_regs));
> > >                 childregs->gp = gp_in_global;
> > >                 /* Supervisor/Machine, irqs on: */
> > >                 childregs->status = SR_PP | SR_PIE;
> > >
> > >                 p->thread.ra = (unsigned long)ret_from_kernel_thread;
> > >                 p->thread.s[0] = (unsigned long)args->fn;
> > >                 p->thread.s[1] = (unsigned long)args->fn_arg;
> > >         } else {
> > >                 *childregs = *(current_pt_regs());
> > >                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> Oh, I'm wrong, It's switch_to_restore -> exception_restore.
> 
> > >                 if (usp) /* User fork */
> > >                         childregs->sp = usp;
> > >                 if (clone_flags & CLONE_SETTLS)
> > >                         childregs->tp = tls;
> > >                 childregs->a0 = 0; /* Return value of fork() */
> > >                 p->thread.ra = (unsigned long)ret_from_fork;
> > >         }
> > >         p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > >
> >
> > <snip>
> >
> > > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > > >                 if (clone_flags & CLONE_SETTLS)
> > > >                         childregs->tp = tls;
> > > >                 childregs->a0 = 0; /* Return value of fork() */
> > > > -               p->thread.ra = (unsigned long)ret_from_fork;
> > > > +               p->thread.s[0] = 0;
> >
> > Here we assign 0 to p->thread.s[0]
> I missed that.
> 
> Merge thread & fork is not a good idea, and using fp as the flag is so implicit.
> 
> ➜  linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S
> arch/arc/kernel/entry.S:ENTRY(ret_from_fork)
> arch/arc/kernel/entry.S:END(ret_from_fork)
> arch/csky/kernel/entry.S:ENTRY(ret_from_fork)
> arch/x86/kernel/process_32.c: * the task-switch, and shows up in
> ret_from_fork in entry.S,
> arch/alpha/kernel/entry.S:      .globl  ret_from_fork
> arch/alpha/kernel/entry.S:      .ent    ret_from_fork
> arch/alpha/kernel/entry.S:ret_from_fork:
> arch/alpha/kernel/entry.S:.end ret_from_fork
> arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork)
> arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork)
> arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork
> arch/hexagon/kernel/vm_entry.S:ret_from_fork:
> arch/microblaze/kernel/entry.S:   (copy_thread makes ret_from_fork the
> return address in each new thread's
> arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork):
> arch/m68k/kernel/entry.S:ENTRY(ret_from_fork)
> arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork)
> arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork)
> arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork)
> arch/riscv/kernel/entry.S:ENTRY(ret_from_fork)
> arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork)
> arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork
> arch/s390/kernel/entry.S:ENTRY(ret_from_fork)
> arch/s390/kernel/entry.S:       brasl   %r14,__ret_from_fork
> arch/s390/kernel/entry.S:ENDPROC(ret_from_fork)
> arch/mips/kernel/entry.S:FEXPORT(ret_from_fork)
> arch/openrisc/kernel/entry.S:   /* All syscalls return here... just
> pay attention to ret_from_fork
> arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork)
> arch/openrisc/kernel/entry.S:    * that may be either schedule(),
> ret_from_fork(), or
> arch/nios2/kernel/entry.S:ENTRY(ret_from_fork)
> arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork)
> arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork)
> arch/sparc/kernel/entry.S:      .globl  ret_from_fork
> arch/sparc/kernel/entry.S:ret_from_fork:
> ➜  linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S
> arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/alpha/kernel/entry.S:      .globl  ret_from_kernel_thread
> arch/alpha/kernel/entry.S:      .ent    ret_from_kernel_thread
> arch/alpha/kernel/entry.S:ret_from_kernel_thread:
> arch/alpha/kernel/entry.S:.end ret_from_kernel_thread
> arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/parisc/kernel/entry.S:END(ret_from_kernel_thread)
> arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread)
> arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread)
> arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread):
> arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
> arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread)
> arch/openrisc/kernel/entry.S:    * ret_from_kernel_thread().  If we
> are returning to a new thread,
> arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
> arch/sparc/kernel/entry.S:      .globl  ret_from_kernel_thread
> arch/sparc/kernel/entry.S:ret_from_kernel_thread:
> 
> Many architectures use a similar style. If you want to continue the
> patch, I think you should first rename ret_from_fork properly, and
> give an explicit flag definition, not just setting fp = 0.
> 

Above list also shows many architectures don't have a
ret_from_kernel_thread, I think the reason is simple it behaves
similarly as ret_from_fork.
As for flag, IMHO, we may missed something as clearing the s[12]
array in thread_struct when user fork, because s[12] may contain
random kernel memory content, which may be finally leaked to
userspace. This is a security hole.

A trivial patch of memset(0) can fix it, after this fix, checking the
s[0] is straightforward.

diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 67e7cd123ceb..50a0f7e4327c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -174,6 +174,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
                p->thread.s[0] = (unsigned long)args->fn;
                p->thread.s[1] = (unsigned long)args->fn_arg;
        } else {
+               memset(&p->thread.s, 0, sizeof(p->thread.s));
                *childregs = *(current_pt_regs());
                if (usp) /* User fork */
                        childregs->sp = usp;
Guo Ren Sept. 30, 2022, 11:42 a.m. UTC | #5
On Thu, Sep 29, 2022 at 12:49 AM Jisheng Zhang <jszhang@kernel.org> wrote:
>
> On Tue, Sep 27, 2022 at 07:55:27AM +0800, Guo Ren wrote:
> > On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> > >
> > > On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote:
> > > > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote:
> > > > >
> > > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(),
> > > > > the only difference is whether call the fn(arg) or not, this can be
> > > > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not.
> > > > >
> > > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > > > ---
> > > > >  arch/riscv/kernel/entry.S   | 11 +++--------
> > > > >  arch/riscv/kernel/process.c |  5 ++---
> > > > >  2 files changed, 5 insertions(+), 11 deletions(-)
> > > > >
> > > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> > > > > index 2207cf44a3bc..a3e1ed2fa2ac 100644
> > > > > --- a/arch/riscv/kernel/entry.S
> > > > > +++ b/arch/riscv/kernel/entry.S
> > > > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow)
> > > > >
> > > > >  ENTRY(ret_from_fork)
> > > > >         call schedule_tail
> > > > > -       move a0, sp /* pt_regs */
> > > > > -       la ra, ret_from_exception
> > > > > -       tail syscall_exit_to_user_mode
> > > > > -ENDPROC(ret_from_fork)
> > > > > -
> > > > > -ENTRY(ret_from_kernel_thread)
> > > > > -       call schedule_tail
> > > > > +       beqz s0, 1f     /* not from kernel thread */
> > >
> > > Hi Guo,
> > >
> > > > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread.
> > > > The s0=0 is also okay for ret_from_fork.
> > >
> > > IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in
> > > pt_regs.
> > Yes, you are correct.
> >
> > >
> > > >
> > > >         /* p->thread holds context to be restored by __switch_to() */
> > > >         if (unlikely(args->fn)) {
> > > >                 /* Kernel thread */
> > > >                 memset(childregs, 0, sizeof(struct pt_regs));
> > > >                 childregs->gp = gp_in_global;
> > > >                 /* Supervisor/Machine, irqs on: */
> > > >                 childregs->status = SR_PP | SR_PIE;
> > > >
> > > >                 p->thread.ra = (unsigned long)ret_from_kernel_thread;
> > > >                 p->thread.s[0] = (unsigned long)args->fn;
> > > >                 p->thread.s[1] = (unsigned long)args->fn_arg;
> > > >         } else {
> > > >                 *childregs = *(current_pt_regs());
> > > >                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > Oh, I'm wrong, It's switch_to_restore -> exception_restore.
> >
> > > >                 if (usp) /* User fork */
> > > >                         childregs->sp = usp;
> > > >                 if (clone_flags & CLONE_SETTLS)
> > > >                         childregs->tp = tls;
> > > >                 childregs->a0 = 0; /* Return value of fork() */
> > > >                 p->thread.ra = (unsigned long)ret_from_fork;
> > > >         }
> > > >         p->thread.sp = (unsigned long)childregs; /* kernel sp */
> > > >
> > >
> > > <snip>
> > >
> > > > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
> > > > >                 if (clone_flags & CLONE_SETTLS)
> > > > >                         childregs->tp = tls;
> > > > >                 childregs->a0 = 0; /* Return value of fork() */
> > > > > -               p->thread.ra = (unsigned long)ret_from_fork;
> > > > > +               p->thread.s[0] = 0;
> > >
> > > Here we assign 0 to p->thread.s[0]
> > I missed that.
> >
> > Merge thread & fork is not a good idea, and using fp as the flag is so implicit.
> >
> > ➜  linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S
> > arch/arc/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/arc/kernel/entry.S:END(ret_from_fork)
> > arch/csky/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/x86/kernel/process_32.c: * the task-switch, and shows up in
> > ret_from_fork in entry.S,
> > arch/alpha/kernel/entry.S:      .globl  ret_from_fork
> > arch/alpha/kernel/entry.S:      .ent    ret_from_fork
> > arch/alpha/kernel/entry.S:ret_from_fork:
> > arch/alpha/kernel/entry.S:.end ret_from_fork
> > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork)
> > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork)
> > arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork
> > arch/hexagon/kernel/vm_entry.S:ret_from_fork:
> > arch/microblaze/kernel/entry.S:   (copy_thread makes ret_from_fork the
> > return address in each new thread's
> > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork):
> > arch/m68k/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork)
> > arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork)
> > arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork)
> > arch/riscv/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork)
> > arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork
> > arch/s390/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/s390/kernel/entry.S:       brasl   %r14,__ret_from_fork
> > arch/s390/kernel/entry.S:ENDPROC(ret_from_fork)
> > arch/mips/kernel/entry.S:FEXPORT(ret_from_fork)
> > arch/openrisc/kernel/entry.S:   /* All syscalls return here... just
> > pay attention to ret_from_fork
> > arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/openrisc/kernel/entry.S:    * that may be either schedule(),
> > ret_from_fork(), or
> > arch/nios2/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork)
> > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork)
> > arch/sparc/kernel/entry.S:      .globl  ret_from_fork
> > arch/sparc/kernel/entry.S:ret_from_fork:
> > ➜  linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S
> > arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/alpha/kernel/entry.S:      .globl  ret_from_kernel_thread
> > arch/alpha/kernel/entry.S:      .ent    ret_from_kernel_thread
> > arch/alpha/kernel/entry.S:ret_from_kernel_thread:
> > arch/alpha/kernel/entry.S:.end ret_from_kernel_thread
> > arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/parisc/kernel/entry.S:END(ret_from_kernel_thread)
> > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread)
> > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread)
> > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread):
> > arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
> > arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread)
> > arch/openrisc/kernel/entry.S:    * ret_from_kernel_thread().  If we
> > are returning to a new thread,
> > arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread)
> > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread)
> > arch/sparc/kernel/entry.S:      .globl  ret_from_kernel_thread
> > arch/sparc/kernel/entry.S:ret_from_kernel_thread:
> >
> > Many architectures use a similar style. If you want to continue the
> > patch, I think you should first rename ret_from_fork properly, and
> > give an explicit flag definition, not just setting fp = 0.
> >
>
> Above list also shows many architectures don't have a
> ret_from_kernel_thread, I think the reason is simple it behaves
> similarly as ret_from_fork.
After looking at x86 & arm64, you've convinced me.

Acked-by: Guo Ren <guoren@kernel.org>

> As for flag, IMHO, we may missed something as clearing the s[12]
> array in thread_struct when user fork, because s[12] may contain
> random kernel memory content, which may be finally leaked to
> userspace. This is a security hole.
>
> A trivial patch of memset(0) can fix it, after this fix, checking the
> s[0] is straightforward.
>
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index 67e7cd123ceb..50a0f7e4327c 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -174,6 +174,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>                 p->thread.s[0] = (unsigned long)args->fn;
>                 p->thread.s[1] = (unsigned long)args->fn_arg;
>         } else {
> +               memset(&p->thread.s, 0, sizeof(p->thread.s));
Good catch. s[12] may leave some information about the kernel.

It could be a separate patch with a Fixes flag.

>                 *childregs = *(current_pt_regs());
>                 if (usp) /* User fork */
>                         childregs->sp = usp;
>
diff mbox series

Patch

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 2207cf44a3bc..a3e1ed2fa2ac 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -323,20 +323,15 @@  END(handle_kernel_stack_overflow)
 
 ENTRY(ret_from_fork)
 	call schedule_tail
-	move a0, sp /* pt_regs */
-	la ra, ret_from_exception
-	tail syscall_exit_to_user_mode
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
-	call schedule_tail
+	beqz s0, 1f	/* not from kernel thread */
 	/* Call fn(arg) */
 	move a0, s1
 	jalr s0
+1:
 	move a0, sp /* pt_regs */
 	la ra, ret_from_exception
 	tail syscall_exit_to_user_mode
-ENDPROC(ret_from_kernel_thread)
+ENDPROC(ret_from_fork)
 
 #ifdef CONFIG_IRQ_STACKS
 ENTRY(call_on_stack)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index ceb9ebab6558..67e7cd123ceb 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -34,7 +34,6 @@  EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
 extern asmlinkage void ret_from_fork(void);
-extern asmlinkage void ret_from_kernel_thread(void);
 
 void arch_cpu_idle(void)
 {
@@ -172,7 +171,6 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		/* Supervisor/Machine, irqs on: */
 		childregs->status = SR_PP | SR_PIE;
 
-		p->thread.ra = (unsigned long)ret_from_kernel_thread;
 		p->thread.s[0] = (unsigned long)args->fn;
 		p->thread.s[1] = (unsigned long)args->fn_arg;
 	} else {
@@ -182,8 +180,9 @@  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		if (clone_flags & CLONE_SETTLS)
 			childregs->tp = tls;
 		childregs->a0 = 0; /* Return value of fork() */
-		p->thread.ra = (unsigned long)ret_from_fork;
+		p->thread.s[0] = 0;
 	}
+	p->thread.ra = (unsigned long)ret_from_fork;
 	p->thread.sp = (unsigned long)childregs; /* kernel sp */
 	return 0;
 }