diff mbox series

[-next,V6,5/7] riscv: ftrace: Make ftrace_caller call ftrace_graph_func

Message ID 20230107133549.4192639-6-guoren@kernel.org (mailing list archive)
State Superseded
Delegated to: Palmer Dabbelt
Headers show
Series riscv: Optimize function trace | expand

Checks

Context Check Description
conchuod/patch_count success Link
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be fixes
conchuod/fixes_present success Fixes tag present in non-next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 13 and now 13
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/module_param success Was 0 now: 0
conchuod/alphanumeric_selects success Out of order selects before the patch: 57 and now 57
conchuod/build_rv32_defconfig success Build OK
conchuod/build_warn_rv64 success Errors and warnings before: 2054 this patch: 2054
conchuod/dtb_warn_rv64 success Errors and warnings before: 4 this patch: 4
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch success total: 0 errors, 0 warnings, 0 checks, 197 lines checked
conchuod/source_inline success Was 0 now: 0
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Guo Ren Jan. 7, 2023, 1:35 p.m. UTC
From: Song Shuai <suagrfillet@gmail.com>

In order to make the function graph use ftrace directly, ftrace_caller
should be adjusted to save the necessary regs against the pt_regs layout
so it can call ftrace_graph_func reasonably.

SAVE_ALL now saves all the regs according to the pt_regs struct. Here
supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow
saving only the necessary ABI-related regs for ftrace_caller.

ftrace_caller and ftrace_regs_caller save their regs with the respective
option of SAVE_ABI_REGS, then call the tracing function, especially
graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels
aren't needed anymore if FTRACE_WITH_REGS is defined.

As the previous patch described, the ftrace_caller remains with its
ftrace_graph_call if FTRACE_WITH_REGS isn't defined,

For convenience, the original argument setup for the tracing function in
ftrace_[regs]_caller is separated as PREPARE_ARGS.

Signed-off-by: Song Shuai <suagrfillet@gmail.com>
Tested-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++---------
 1 file changed, 104 insertions(+), 38 deletions(-)

Comments

Evgenii Shatokhin Jan. 10, 2023, 5:16 p.m. UTC | #1
On 07.01.2023 16:35, guoren@kernel.org wrote:
> From: Song Shuai <suagrfillet@gmail.com>
> 
> In order to make the function graph use ftrace directly, ftrace_caller
> should be adjusted to save the necessary regs against the pt_regs layout
> so it can call ftrace_graph_func reasonably.
> 
> SAVE_ALL now saves all the regs according to the pt_regs struct. Here
> supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow
> saving only the necessary ABI-related regs for ftrace_caller.
> 
> ftrace_caller and ftrace_regs_caller save their regs with the respective
> option of SAVE_ABI_REGS, then call the tracing function, especially
> graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels
> aren't needed anymore if FTRACE_WITH_REGS is defined.
> 
> As the previous patch described, the ftrace_caller remains with its
> ftrace_graph_call if FTRACE_WITH_REGS isn't defined,
> 
> For convenience, the original argument setup for the tracing function in
> ftrace_[regs]_caller is separated as PREPARE_ARGS.
> 
> Signed-off-by: Song Shuai <suagrfillet@gmail.com>
> Tested-by: Guo Ren <guoren@kernel.org>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
>   arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++---------
>   1 file changed, 104 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
> index b75332ced757..d7d4d51b4bd7 100644
> --- a/arch/riscv/kernel/mcount-dyn.S
> +++ b/arch/riscv/kernel/mcount-dyn.S
> @@ -57,19 +57,52 @@
>          .endm
> 
>   #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> -       .macro SAVE_ALL
> +
> +/**
> +* SAVE_ABI_REGS - save regs against the pt_regs struct
> +*
> +* @all: tell if saving all the regs
> +*
> +* If all is set, all the regs will be saved, otherwise only ABI
> +* related regs (a0-a7,epc,ra and optional s0) will be saved.
> +*
> +* After the stack is established,
> +*
> +* 0(sp) stores the PC of the traced function which can be accessed
> +* by &(fregs)->regs->epc in tracing function. Note that the real
> +* function entry address should be computed with -FENTRY_RA_OFFSET.
> +*
> +* 8(sp) stores the function return address (i.e. parent IP) that
> +* can be accessed by &(fregs)->regs->ra in tracing function.
> +*
> +* The other regs are saved at the respective localtion and accessed
> +* by the respective pt_regs member.
> +*
> +* Here is the layout of stack for your reference.
> +*
> +* PT_SIZE_ON_STACK  ->  +++++++++
> +*                       + ..... +
> +*                       + t3-t6 +
> +*                       + s2-s11+
> +*                       + a0-a7 + --++++-> ftrace_caller saved
> +*                       + s1    +   +
> +*                       + s0    + --+
> +*                       + t0-t2 +   +
> +*                       + tp    +   +
> +*                       + gp    +   +
> +*                       + sp    +   +
> +*                       + ra    + --+ // parent IP
> +*               sp  ->  + epc   + --+ // PC
> +*                       +++++++++
> +**/
> +       .macro SAVE_ABI_REGS, all=0
>          addi    sp, sp, -PT_SIZE_ON_STACK
> 
>          REG_S t0,  PT_EPC(sp)
>          REG_S x1,  PT_RA(sp)
> -       REG_S x2,  PT_SP(sp)
> -       REG_S x3,  PT_GP(sp)
> -       REG_S x4,  PT_TP(sp)
> -       REG_S x5,  PT_T0(sp)
> -       REG_S x6,  PT_T1(sp)
> -       REG_S x7,  PT_T2(sp)
> -       REG_S x8,  PT_S0(sp)
> -       REG_S x9,  PT_S1(sp)
> +
> +       // always save the ABI regs
> +
>          REG_S x10, PT_A0(sp)
>          REG_S x11, PT_A1(sp)
>          REG_S x12, PT_A2(sp)
> @@ -78,6 +111,18 @@
>          REG_S x15, PT_A5(sp)
>          REG_S x16, PT_A6(sp)
>          REG_S x17, PT_A7(sp)
> +
> +       // save the leftover regs
> +
> +       .if \all == 1
> +       REG_S x2,  PT_SP(sp)
> +       REG_S x3,  PT_GP(sp)
> +       REG_S x4,  PT_TP(sp)
> +       REG_S x5,  PT_T0(sp)
> +       REG_S x6,  PT_T1(sp)
> +       REG_S x7,  PT_T2(sp)
> +       REG_S x8,  PT_S0(sp)
> +       REG_S x9,  PT_S1(sp)
>          REG_S x18, PT_S2(sp)
>          REG_S x19, PT_S3(sp)
>          REG_S x20, PT_S4(sp)
> @@ -92,19 +137,19 @@
>          REG_S x29, PT_T4(sp)
>          REG_S x30, PT_T5(sp)
>          REG_S x31, PT_T6(sp)
> +
> +       // save s0 if FP_TEST defined
> +
> +       .else
> +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> +       REG_S x8,  PT_S0(sp)
> +#endif
> +       .endif
>          .endm
> 
> -       .macro RESTORE_ALL
> +       .macro RESTORE_ABI_REGS, all=0
>          REG_L t0,  PT_EPC(sp)
>          REG_L x1,  PT_RA(sp)
> -       REG_L x2,  PT_SP(sp)
> -       REG_L x3,  PT_GP(sp)
> -       REG_L x4,  PT_TP(sp)
> -       REG_L x5,  PT_T0(sp)
> -       REG_L x6,  PT_T1(sp)
> -       REG_L x7,  PT_T2(sp)
> -       REG_L x8,  PT_S0(sp)
> -       REG_L x9,  PT_S1(sp)
>          REG_L x10, PT_A0(sp)
>          REG_L x11, PT_A1(sp)
>          REG_L x12, PT_A2(sp)
> @@ -113,6 +158,16 @@
>          REG_L x15, PT_A5(sp)
>          REG_L x16, PT_A6(sp)
>          REG_L x17, PT_A7(sp)
> +
> +       .if \all == 1
> +       REG_L x2,  PT_SP(sp)
> +       REG_L x3,  PT_GP(sp)
> +       REG_L x4,  PT_TP(sp)
> +       REG_L x5,  PT_T0(sp)

Same as for the patch #3, please skip "REG_L x5,  PT_T0(sp)" here. The 
correct value of t0/x5 has already been read from PT_EPC(sp) at this point.

> +       REG_L x6,  PT_T1(sp)
> +       REG_L x7,  PT_T2(sp)
> +       REG_L x8,  PT_S0(sp)
> +       REG_L x9,  PT_S1(sp)
>          REG_L x18, PT_S2(sp)
>          REG_L x19, PT_S3(sp)
>          REG_L x20, PT_S4(sp)
> @@ -128,10 +183,25 @@
>          REG_L x30, PT_T5(sp)
>          REG_L x31, PT_T6(sp)
> 
> +       .else
> +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> +       REG_L x8,  PT_S0(sp)
> +#endif
> +       .endif
>          addi    sp, sp, PT_SIZE_ON_STACK
>          .endm
> +
> +       .macro PREPARE_ARGS
> +       addi    a0, t0, -FENTRY_RA_OFFSET       // ip
> +       la      a1, function_trace_op
> +       REG_L   a2, 0(a1)                       // op
> +       mv      a1, ra                          // parent_ip
> +       mv      a3, sp                          // fregs
> +       .endm
> +
>   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> 
> +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>   ENTRY(ftrace_caller)
>          SAVE_ABI
> 
> @@ -160,33 +230,29 @@ ftrace_graph_call:
>          jr t0
>   ENDPROC(ftrace_caller)
> 
> -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
>   ENTRY(ftrace_regs_caller)
> -       SAVE_ALL
> -
> -       addi    a0, t0, -FENTRY_RA_OFFSET
> -       la      a1, function_trace_op
> -       REG_L   a2, 0(a1)
> -       mv      a1, ra
> -       mv      a3, sp
> +       SAVE_ABI_REGS 1
> +       PREPARE_ARGS
> 
>   ftrace_regs_call:
>          .global ftrace_regs_call
>          call    ftrace_stub
> 
> -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> -       addi    a0, sp, PT_RA
> -       REG_L   a1, PT_T0(sp)
> -       addi    a1, a1, -FENTRY_RA_OFFSET
> -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> -       mv      a2, s0
> -#endif
> -ftrace_graph_regs_call:
> -       .global ftrace_graph_regs_call
> -       call    ftrace_stub
> -#endif
> 
> -       RESTORE_ALL
> +       RESTORE_ABI_REGS 1
>          jr t0
>   ENDPROC(ftrace_regs_caller)
> +
> +ENTRY(ftrace_caller)
> +       SAVE_ABI_REGS 0
> +       PREPARE_ARGS
> +
> +ftrace_call:
> +       .global ftrace_call
> +       call    ftrace_stub
> +
> +       RESTORE_ABI_REGS 0
> +       jr t0
> +ENDPROC(ftrace_caller)
>   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> --
> 2.36.1
> 
> 
Regards,
Evgenii
Guo Ren Jan. 11, 2023, 8:23 a.m. UTC | #2
On Wed, Jan 11, 2023 at 1:16 AM Evgenii Shatokhin <e.shatokhin@yadro.com> wrote:
>
> On 07.01.2023 16:35, guoren@kernel.org wrote:
> > From: Song Shuai <suagrfillet@gmail.com>
> >
> > In order to make the function graph use ftrace directly, ftrace_caller
> > should be adjusted to save the necessary regs against the pt_regs layout
> > so it can call ftrace_graph_func reasonably.
> >
> > SAVE_ALL now saves all the regs according to the pt_regs struct. Here
> > supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow
> > saving only the necessary ABI-related regs for ftrace_caller.
> >
> > ftrace_caller and ftrace_regs_caller save their regs with the respective
> > option of SAVE_ABI_REGS, then call the tracing function, especially
> > graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels
> > aren't needed anymore if FTRACE_WITH_REGS is defined.
> >
> > As the previous patch described, the ftrace_caller remains with its
> > ftrace_graph_call if FTRACE_WITH_REGS isn't defined,
> >
> > For convenience, the original argument setup for the tracing function in
> > ftrace_[regs]_caller is separated as PREPARE_ARGS.
> >
> > Signed-off-by: Song Shuai <suagrfillet@gmail.com>
> > Tested-by: Guo Ren <guoren@kernel.org>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> >   arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++---------
> >   1 file changed, 104 insertions(+), 38 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
> > index b75332ced757..d7d4d51b4bd7 100644
> > --- a/arch/riscv/kernel/mcount-dyn.S
> > +++ b/arch/riscv/kernel/mcount-dyn.S
> > @@ -57,19 +57,52 @@
> >          .endm
> >
> >   #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > -       .macro SAVE_ALL
> > +
> > +/**
> > +* SAVE_ABI_REGS - save regs against the pt_regs struct
> > +*
> > +* @all: tell if saving all the regs
> > +*
> > +* If all is set, all the regs will be saved, otherwise only ABI
> > +* related regs (a0-a7,epc,ra and optional s0) will be saved.
> > +*
> > +* After the stack is established,
> > +*
> > +* 0(sp) stores the PC of the traced function which can be accessed
> > +* by &(fregs)->regs->epc in tracing function. Note that the real
> > +* function entry address should be computed with -FENTRY_RA_OFFSET.
> > +*
> > +* 8(sp) stores the function return address (i.e. parent IP) that
> > +* can be accessed by &(fregs)->regs->ra in tracing function.
> > +*
> > +* The other regs are saved at the respective localtion and accessed
> > +* by the respective pt_regs member.
> > +*
> > +* Here is the layout of stack for your reference.
> > +*
> > +* PT_SIZE_ON_STACK  ->  +++++++++
> > +*                       + ..... +
> > +*                       + t3-t6 +
> > +*                       + s2-s11+
> > +*                       + a0-a7 + --++++-> ftrace_caller saved
> > +*                       + s1    +   +
> > +*                       + s0    + --+
> > +*                       + t0-t2 +   +
> > +*                       + tp    +   +
> > +*                       + gp    +   +
> > +*                       + sp    +   +
> > +*                       + ra    + --+ // parent IP
> > +*               sp  ->  + epc   + --+ // PC
> > +*                       +++++++++
> > +**/
> > +       .macro SAVE_ABI_REGS, all=0
> >          addi    sp, sp, -PT_SIZE_ON_STACK
> >
> >          REG_S t0,  PT_EPC(sp)
> >          REG_S x1,  PT_RA(sp)
> > -       REG_S x2,  PT_SP(sp)
> > -       REG_S x3,  PT_GP(sp)
> > -       REG_S x4,  PT_TP(sp)
> > -       REG_S x5,  PT_T0(sp)
> > -       REG_S x6,  PT_T1(sp)
> > -       REG_S x7,  PT_T2(sp)
> > -       REG_S x8,  PT_S0(sp)
> > -       REG_S x9,  PT_S1(sp)
> > +
> > +       // always save the ABI regs
> > +
> >          REG_S x10, PT_A0(sp)
> >          REG_S x11, PT_A1(sp)
> >          REG_S x12, PT_A2(sp)
> > @@ -78,6 +111,18 @@
> >          REG_S x15, PT_A5(sp)
> >          REG_S x16, PT_A6(sp)
> >          REG_S x17, PT_A7(sp)
> > +
> > +       // save the leftover regs
> > +
> > +       .if \all == 1
> > +       REG_S x2,  PT_SP(sp)
> > +       REG_S x3,  PT_GP(sp)
> > +       REG_S x4,  PT_TP(sp)
> > +       REG_S x5,  PT_T0(sp)
> > +       REG_S x6,  PT_T1(sp)
> > +       REG_S x7,  PT_T2(sp)
> > +       REG_S x8,  PT_S0(sp)
> > +       REG_S x9,  PT_S1(sp)
> >          REG_S x18, PT_S2(sp)
> >          REG_S x19, PT_S3(sp)
> >          REG_S x20, PT_S4(sp)
> > @@ -92,19 +137,19 @@
> >          REG_S x29, PT_T4(sp)
> >          REG_S x30, PT_T5(sp)
> >          REG_S x31, PT_T6(sp)
> > +
> > +       // save s0 if FP_TEST defined
> > +
> > +       .else
> > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > +       REG_S x8,  PT_S0(sp)
> > +#endif
> > +       .endif
> >          .endm
> >
> > -       .macro RESTORE_ALL
> > +       .macro RESTORE_ABI_REGS, all=0
> >          REG_L t0,  PT_EPC(sp)
> >          REG_L x1,  PT_RA(sp)
> > -       REG_L x2,  PT_SP(sp)
> > -       REG_L x3,  PT_GP(sp)
> > -       REG_L x4,  PT_TP(sp)
> > -       REG_L x5,  PT_T0(sp)
> > -       REG_L x6,  PT_T1(sp)
> > -       REG_L x7,  PT_T2(sp)
> > -       REG_L x8,  PT_S0(sp)
> > -       REG_L x9,  PT_S1(sp)
> >          REG_L x10, PT_A0(sp)
> >          REG_L x11, PT_A1(sp)
> >          REG_L x12, PT_A2(sp)
> > @@ -113,6 +158,16 @@
> >          REG_L x15, PT_A5(sp)
> >          REG_L x16, PT_A6(sp)
> >          REG_L x17, PT_A7(sp)
> > +
> > +       .if \all == 1
> > +       REG_L x2,  PT_SP(sp)
> > +       REG_L x3,  PT_GP(sp)
> > +       REG_L x4,  PT_TP(sp)
> > +       REG_L x5,  PT_T0(sp)
>
> Same as for the patch #3, please skip "REG_L x5,  PT_T0(sp)" here. The
> correct value of t0/x5 has already been read from PT_EPC(sp) at this point.
Oh, I don't want to do that here. It's a common macro. Because it's a
continuous load within the cacheline, I don't think it would cause a
performance gap.


>
> > +       REG_L x6,  PT_T1(sp)
> > +       REG_L x7,  PT_T2(sp)
> > +       REG_L x8,  PT_S0(sp)
> > +       REG_L x9,  PT_S1(sp)
> >          REG_L x18, PT_S2(sp)
> >          REG_L x19, PT_S3(sp)
> >          REG_L x20, PT_S4(sp)
> > @@ -128,10 +183,25 @@
> >          REG_L x30, PT_T5(sp)
> >          REG_L x31, PT_T6(sp)
> >
> > +       .else
> > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > +       REG_L x8,  PT_S0(sp)
> > +#endif
> > +       .endif
> >          addi    sp, sp, PT_SIZE_ON_STACK
> >          .endm
> > +
> > +       .macro PREPARE_ARGS
> > +       addi    a0, t0, -FENTRY_RA_OFFSET       // ip
> > +       la      a1, function_trace_op
> > +       REG_L   a2, 0(a1)                       // op
> > +       mv      a1, ra                          // parent_ip
> > +       mv      a3, sp                          // fregs
> > +       .endm
> > +
> >   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> >
> > +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> >   ENTRY(ftrace_caller)
> >          SAVE_ABI
> >
> > @@ -160,33 +230,29 @@ ftrace_graph_call:
> >          jr t0
> >   ENDPROC(ftrace_caller)
> >
> > -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> >   ENTRY(ftrace_regs_caller)
> > -       SAVE_ALL
> > -
> > -       addi    a0, t0, -FENTRY_RA_OFFSET
> > -       la      a1, function_trace_op
> > -       REG_L   a2, 0(a1)
> > -       mv      a1, ra
> > -       mv      a3, sp
> > +       SAVE_ABI_REGS 1
> > +       PREPARE_ARGS
> >
> >   ftrace_regs_call:
> >          .global ftrace_regs_call
> >          call    ftrace_stub
> >
> > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > -       addi    a0, sp, PT_RA
> > -       REG_L   a1, PT_T0(sp)
> > -       addi    a1, a1, -FENTRY_RA_OFFSET
> > -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > -       mv      a2, s0
> > -#endif
> > -ftrace_graph_regs_call:
> > -       .global ftrace_graph_regs_call
> > -       call    ftrace_stub
> > -#endif
> >
> > -       RESTORE_ALL
> > +       RESTORE_ABI_REGS 1
> >          jr t0
> >   ENDPROC(ftrace_regs_caller)
> > +
> > +ENTRY(ftrace_caller)
> > +       SAVE_ABI_REGS 0
> > +       PREPARE_ARGS
> > +
> > +ftrace_call:
> > +       .global ftrace_call
> > +       call    ftrace_stub
> > +
> > +       RESTORE_ABI_REGS 0
> > +       jr t0
> > +ENDPROC(ftrace_caller)
> >   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> > --
> > 2.36.1
> >
> >
> Regards,
> Evgenii
>
>


--
Best Regards
 Guo Ren
Guo Ren Jan. 11, 2023, 8:41 a.m. UTC | #3
On Wed, Jan 11, 2023 at 4:23 PM Guo Ren <guoren@kernel.org> wrote:
>
> On Wed, Jan 11, 2023 at 1:16 AM Evgenii Shatokhin <e.shatokhin@yadro.com> wrote:
> >
> > On 07.01.2023 16:35, guoren@kernel.org wrote:
> > > From: Song Shuai <suagrfillet@gmail.com>
> > >
> > > In order to make the function graph use ftrace directly, ftrace_caller
> > > should be adjusted to save the necessary regs against the pt_regs layout
> > > so it can call ftrace_graph_func reasonably.
> > >
> > > SAVE_ALL now saves all the regs according to the pt_regs struct. Here
> > > supersedes SAVE_ALL by SAVE_ABI_REGS which has an extra option to allow
> > > saving only the necessary ABI-related regs for ftrace_caller.
> > >
> > > ftrace_caller and ftrace_regs_caller save their regs with the respective
> > > option of SAVE_ABI_REGS, then call the tracing function, especially
> > > graph_ops's ftrace_graph_func. So the ftrace_graph_[regs]_call labels
> > > aren't needed anymore if FTRACE_WITH_REGS is defined.
> > >
> > > As the previous patch described, the ftrace_caller remains with its
> > > ftrace_graph_call if FTRACE_WITH_REGS isn't defined,
> > >
> > > For convenience, the original argument setup for the tracing function in
> > > ftrace_[regs]_caller is separated as PREPARE_ARGS.
> > >
> > > Signed-off-by: Song Shuai <suagrfillet@gmail.com>
> > > Tested-by: Guo Ren <guoren@kernel.org>
> > > Signed-off-by: Guo Ren <guoren@kernel.org>
> > > ---
> > >   arch/riscv/kernel/mcount-dyn.S | 142 ++++++++++++++++++++++++---------
> > >   1 file changed, 104 insertions(+), 38 deletions(-)
> > >
> > > diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
> > > index b75332ced757..d7d4d51b4bd7 100644
> > > --- a/arch/riscv/kernel/mcount-dyn.S
> > > +++ b/arch/riscv/kernel/mcount-dyn.S
> > > @@ -57,19 +57,52 @@
> > >          .endm
> > >
> > >   #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > > -       .macro SAVE_ALL
> > > +
> > > +/**
> > > +* SAVE_ABI_REGS - save regs against the pt_regs struct
> > > +*
> > > +* @all: tell if saving all the regs
> > > +*
> > > +* If all is set, all the regs will be saved, otherwise only ABI
> > > +* related regs (a0-a7,epc,ra and optional s0) will be saved.
> > > +*
> > > +* After the stack is established,
> > > +*
> > > +* 0(sp) stores the PC of the traced function which can be accessed
> > > +* by &(fregs)->regs->epc in tracing function. Note that the real
> > > +* function entry address should be computed with -FENTRY_RA_OFFSET.
> > > +*
> > > +* 8(sp) stores the function return address (i.e. parent IP) that
> > > +* can be accessed by &(fregs)->regs->ra in tracing function.
> > > +*
> > > +* The other regs are saved at the respective localtion and accessed
> > > +* by the respective pt_regs member.
> > > +*
> > > +* Here is the layout of stack for your reference.
> > > +*
> > > +* PT_SIZE_ON_STACK  ->  +++++++++
> > > +*                       + ..... +
> > > +*                       + t3-t6 +
> > > +*                       + s2-s11+
> > > +*                       + a0-a7 + --++++-> ftrace_caller saved
> > > +*                       + s1    +   +
> > > +*                       + s0    + --+
> > > +*                       + t0-t2 +   +
> > > +*                       + tp    +   +
> > > +*                       + gp    +   +
> > > +*                       + sp    +   +
> > > +*                       + ra    + --+ // parent IP
> > > +*               sp  ->  + epc   + --+ // PC
> > > +*                       +++++++++
> > > +**/
> > > +       .macro SAVE_ABI_REGS, all=0
> > >          addi    sp, sp, -PT_SIZE_ON_STACK
> > >
> > >          REG_S t0,  PT_EPC(sp)
> > >          REG_S x1,  PT_RA(sp)
> > > -       REG_S x2,  PT_SP(sp)
> > > -       REG_S x3,  PT_GP(sp)
> > > -       REG_S x4,  PT_TP(sp)
> > > -       REG_S x5,  PT_T0(sp)
> > > -       REG_S x6,  PT_T1(sp)
> > > -       REG_S x7,  PT_T2(sp)
> > > -       REG_S x8,  PT_S0(sp)
> > > -       REG_S x9,  PT_S1(sp)
> > > +
> > > +       // always save the ABI regs
> > > +
> > >          REG_S x10, PT_A0(sp)
> > >          REG_S x11, PT_A1(sp)
> > >          REG_S x12, PT_A2(sp)
> > > @@ -78,6 +111,18 @@
> > >          REG_S x15, PT_A5(sp)
> > >          REG_S x16, PT_A6(sp)
> > >          REG_S x17, PT_A7(sp)
> > > +
> > > +       // save the leftover regs
> > > +
> > > +       .if \all == 1
> > > +       REG_S x2,  PT_SP(sp)
> > > +       REG_S x3,  PT_GP(sp)
> > > +       REG_S x4,  PT_TP(sp)
> > > +       REG_S x5,  PT_T0(sp)
> > > +       REG_S x6,  PT_T1(sp)
> > > +       REG_S x7,  PT_T2(sp)
> > > +       REG_S x8,  PT_S0(sp)
> > > +       REG_S x9,  PT_S1(sp)
> > >          REG_S x18, PT_S2(sp)
> > >          REG_S x19, PT_S3(sp)
> > >          REG_S x20, PT_S4(sp)
> > > @@ -92,19 +137,19 @@
> > >          REG_S x29, PT_T4(sp)
> > >          REG_S x30, PT_T5(sp)
> > >          REG_S x31, PT_T6(sp)
> > > +
> > > +       // save s0 if FP_TEST defined
> > > +
> > > +       .else
> > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > > +       REG_S x8,  PT_S0(sp)
> > > +#endif
> > > +       .endif
> > >          .endm
> > >
> > > -       .macro RESTORE_ALL
> > > +       .macro RESTORE_ABI_REGS, all=0
> > >          REG_L t0,  PT_EPC(sp)
> > >          REG_L x1,  PT_RA(sp)
> > > -       REG_L x2,  PT_SP(sp)
> > > -       REG_L x3,  PT_GP(sp)
> > > -       REG_L x4,  PT_TP(sp)
> > > -       REG_L x5,  PT_T0(sp)
> > > -       REG_L x6,  PT_T1(sp)
> > > -       REG_L x7,  PT_T2(sp)
> > > -       REG_L x8,  PT_S0(sp)
> > > -       REG_L x9,  PT_S1(sp)
> > >          REG_L x10, PT_A0(sp)
> > >          REG_L x11, PT_A1(sp)
> > >          REG_L x12, PT_A2(sp)
> > > @@ -113,6 +158,16 @@
> > >          REG_L x15, PT_A5(sp)
> > >          REG_L x16, PT_A6(sp)
> > >          REG_L x17, PT_A7(sp)
> > > +
> > > +       .if \all == 1
> > > +       REG_L x2,  PT_SP(sp)
> > > +       REG_L x3,  PT_GP(sp)
> > > +       REG_L x4,  PT_TP(sp)
> > > +       REG_L x5,  PT_T0(sp)
> >
> > Same as for the patch #3, please skip "REG_L x5,  PT_T0(sp)" here. The
> > correct value of t0/x5 has already been read from PT_EPC(sp) at this point.
> Oh, I don't want to do that here. It's a common macro. Because it's a
> continuous load within the cacheline, I don't think it would cause a
> performance gap.
I misunderstood here; you're correct. The "REG_L x5,  PT_T0(sp)"
should be skipped.

>
>
> >
> > > +       REG_L x6,  PT_T1(sp)
> > > +       REG_L x7,  PT_T2(sp)
> > > +       REG_L x8,  PT_S0(sp)
> > > +       REG_L x9,  PT_S1(sp)
> > >          REG_L x18, PT_S2(sp)
> > >          REG_L x19, PT_S3(sp)
> > >          REG_L x20, PT_S4(sp)
> > > @@ -128,10 +183,25 @@
> > >          REG_L x30, PT_T5(sp)
> > >          REG_L x31, PT_T6(sp)
> > >
> > > +       .else
> > > +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > > +       REG_L x8,  PT_S0(sp)
> > > +#endif
> > > +       .endif
> > >          addi    sp, sp, PT_SIZE_ON_STACK
> > >          .endm
> > > +
> > > +       .macro PREPARE_ARGS
> > > +       addi    a0, t0, -FENTRY_RA_OFFSET       // ip
> > > +       la      a1, function_trace_op
> > > +       REG_L   a2, 0(a1)                       // op
> > > +       mv      a1, ra                          // parent_ip
> > > +       mv      a3, sp                          // fregs
> > > +       .endm
> > > +
> > >   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> > >
> > > +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > >   ENTRY(ftrace_caller)
> > >          SAVE_ABI
> > >
> > > @@ -160,33 +230,29 @@ ftrace_graph_call:
> > >          jr t0
> > >   ENDPROC(ftrace_caller)
> > >
> > > -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> > >   ENTRY(ftrace_regs_caller)
> > > -       SAVE_ALL
> > > -
> > > -       addi    a0, t0, -FENTRY_RA_OFFSET
> > > -       la      a1, function_trace_op
> > > -       REG_L   a2, 0(a1)
> > > -       mv      a1, ra
> > > -       mv      a3, sp
> > > +       SAVE_ABI_REGS 1
> > > +       PREPARE_ARGS
> > >
> > >   ftrace_regs_call:
> > >          .global ftrace_regs_call
> > >          call    ftrace_stub
> > >
> > > -#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > > -       addi    a0, sp, PT_RA
> > > -       REG_L   a1, PT_T0(sp)
> > > -       addi    a1, a1, -FENTRY_RA_OFFSET
> > > -#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
> > > -       mv      a2, s0
> > > -#endif
> > > -ftrace_graph_regs_call:
> > > -       .global ftrace_graph_regs_call
> > > -       call    ftrace_stub
> > > -#endif
> > >
> > > -       RESTORE_ALL
> > > +       RESTORE_ABI_REGS 1
> > >          jr t0
> > >   ENDPROC(ftrace_regs_caller)
> > > +
> > > +ENTRY(ftrace_caller)
> > > +       SAVE_ABI_REGS 0
> > > +       PREPARE_ARGS
> > > +
> > > +ftrace_call:
> > > +       .global ftrace_call
> > > +       call    ftrace_stub
> > > +
> > > +       RESTORE_ABI_REGS 0
> > > +       jr t0
> > > +ENDPROC(ftrace_caller)
> > >   #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
> > > --
> > > 2.36.1
> > >
> > >
> > Regards,
> > Evgenii
> >
> >
>
>
> --
> Best Regards
>  Guo Ren
diff mbox series

Patch

diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index b75332ced757..d7d4d51b4bd7 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -57,19 +57,52 @@ 
 	.endm
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-	.macro SAVE_ALL
+
+/**
+* SAVE_ABI_REGS - save regs against the pt_regs struct
+*
+* @all: tell if saving all the regs
+*
+* If all is set, all the regs will be saved, otherwise only ABI
+* related regs (a0-a7,epc,ra and optional s0) will be saved.
+*
+* After the stack is established,
+*
+* 0(sp) stores the PC of the traced function which can be accessed
+* by &(fregs)->regs->epc in tracing function. Note that the real
+* function entry address should be computed with -FENTRY_RA_OFFSET.
+*
+* 8(sp) stores the function return address (i.e. parent IP) that
+* can be accessed by &(fregs)->regs->ra in tracing function.
+*
+* The other regs are saved at the respective localtion and accessed
+* by the respective pt_regs member.
+*
+* Here is the layout of stack for your reference.
+*
+* PT_SIZE_ON_STACK  ->  +++++++++
+*                       + ..... +
+*                       + t3-t6 +
+*                       + s2-s11+
+*                       + a0-a7 + --++++-> ftrace_caller saved
+*                       + s1    +   +
+*                       + s0    + --+
+*                       + t0-t2 +   +
+*                       + tp    +   +
+*                       + gp    +   +
+*                       + sp    +   +
+*                       + ra    + --+ // parent IP
+*               sp  ->  + epc   + --+ // PC
+*                       +++++++++
+**/
+	.macro SAVE_ABI_REGS, all=0
 	addi	sp, sp, -PT_SIZE_ON_STACK
 
 	REG_S t0,  PT_EPC(sp)
 	REG_S x1,  PT_RA(sp)
-	REG_S x2,  PT_SP(sp)
-	REG_S x3,  PT_GP(sp)
-	REG_S x4,  PT_TP(sp)
-	REG_S x5,  PT_T0(sp)
-	REG_S x6,  PT_T1(sp)
-	REG_S x7,  PT_T2(sp)
-	REG_S x8,  PT_S0(sp)
-	REG_S x9,  PT_S1(sp)
+
+	// always save the ABI regs
+
 	REG_S x10, PT_A0(sp)
 	REG_S x11, PT_A1(sp)
 	REG_S x12, PT_A2(sp)
@@ -78,6 +111,18 @@ 
 	REG_S x15, PT_A5(sp)
 	REG_S x16, PT_A6(sp)
 	REG_S x17, PT_A7(sp)
+
+	// save the leftover regs
+
+	.if \all == 1
+	REG_S x2,  PT_SP(sp)
+	REG_S x3,  PT_GP(sp)
+	REG_S x4,  PT_TP(sp)
+	REG_S x5,  PT_T0(sp)
+	REG_S x6,  PT_T1(sp)
+	REG_S x7,  PT_T2(sp)
+	REG_S x8,  PT_S0(sp)
+	REG_S x9,  PT_S1(sp)
 	REG_S x18, PT_S2(sp)
 	REG_S x19, PT_S3(sp)
 	REG_S x20, PT_S4(sp)
@@ -92,19 +137,19 @@ 
 	REG_S x29, PT_T4(sp)
 	REG_S x30, PT_T5(sp)
 	REG_S x31, PT_T6(sp)
+
+	// save s0 if FP_TEST defined
+
+	.else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	REG_S x8,  PT_S0(sp)
+#endif
+	.endif
 	.endm
 
-	.macro RESTORE_ALL
+	.macro RESTORE_ABI_REGS, all=0
 	REG_L t0,  PT_EPC(sp)
 	REG_L x1,  PT_RA(sp)
-	REG_L x2,  PT_SP(sp)
-	REG_L x3,  PT_GP(sp)
-	REG_L x4,  PT_TP(sp)
-	REG_L x5,  PT_T0(sp)
-	REG_L x6,  PT_T1(sp)
-	REG_L x7,  PT_T2(sp)
-	REG_L x8,  PT_S0(sp)
-	REG_L x9,  PT_S1(sp)
 	REG_L x10, PT_A0(sp)
 	REG_L x11, PT_A1(sp)
 	REG_L x12, PT_A2(sp)
@@ -113,6 +158,16 @@ 
 	REG_L x15, PT_A5(sp)
 	REG_L x16, PT_A6(sp)
 	REG_L x17, PT_A7(sp)
+
+	.if \all == 1
+	REG_L x2,  PT_SP(sp)
+	REG_L x3,  PT_GP(sp)
+	REG_L x4,  PT_TP(sp)
+	REG_L x5,  PT_T0(sp)
+	REG_L x6,  PT_T1(sp)
+	REG_L x7,  PT_T2(sp)
+	REG_L x8,  PT_S0(sp)
+	REG_L x9,  PT_S1(sp)
 	REG_L x18, PT_S2(sp)
 	REG_L x19, PT_S3(sp)
 	REG_L x20, PT_S4(sp)
@@ -128,10 +183,25 @@ 
 	REG_L x30, PT_T5(sp)
 	REG_L x31, PT_T6(sp)
 
+	.else
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	REG_L x8,  PT_S0(sp)
+#endif
+	.endif
 	addi	sp, sp, PT_SIZE_ON_STACK
 	.endm
+
+	.macro PREPARE_ARGS
+	addi	a0, t0, -FENTRY_RA_OFFSET	// ip
+	la	a1, function_trace_op
+	REG_L	a2, 0(a1)			// op
+	mv	a1, ra				// parent_ip
+	mv	a3, sp				// fregs
+	.endm
+
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
+#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 ENTRY(ftrace_caller)
 	SAVE_ABI
 
@@ -160,33 +230,29 @@  ftrace_graph_call:
 	jr t0
 ENDPROC(ftrace_caller)
 
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 ENTRY(ftrace_regs_caller)
-	SAVE_ALL
-
-	addi	a0, t0, -FENTRY_RA_OFFSET
-	la	a1, function_trace_op
-	REG_L	a2, 0(a1)
-	mv	a1, ra
-	mv	a3, sp
+	SAVE_ABI_REGS 1
+	PREPARE_ARGS
 
 ftrace_regs_call:
 	.global ftrace_regs_call
 	call	ftrace_stub
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	addi	a0, sp, PT_RA
-	REG_L	a1, PT_T0(sp)
-	addi	a1, a1, -FENTRY_RA_OFFSET
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	mv	a2, s0
-#endif
-ftrace_graph_regs_call:
-	.global ftrace_graph_regs_call
-	call	ftrace_stub
-#endif
 
-	RESTORE_ALL
+	RESTORE_ABI_REGS 1
 	jr t0
 ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+	SAVE_ABI_REGS 0
+	PREPARE_ARGS
+
+ftrace_call:
+	.global ftrace_call
+	call	ftrace_stub
+
+	RESTORE_ABI_REGS 0
+	jr t0
+ENDPROC(ftrace_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */