diff mbox

[PATCHv3] arm: ftrace: Adds support for CONFIG_DYNAMIC_FTRACE_WITH_REGS

Message ID 1486508275-18449-1-git-send-email-abelvesa@linux.com (mailing list archive)
State New, archived
Headers show

Commit Message

Abel Vesa Feb. 7, 2017, 10:57 p.m. UTC
The DYNAMIC_FTRACE_WITH_REGS configuration makes it possible for a ftrace
operation to specify if registers need to saved/restored by the ftrace handler.
This is needed by kgraft and possibly other ftrace-based tools, and the ARM
architecture is currently lacking this feature. It would also be the first step
to support the "Kprobes-on-ftrace" optimization on ARM.

This patch introduces a new ftrace handler that stores the registers on the
stack before calling the next stage. The registers are restored from the stack
before going back to the instrumented function.

A side-effect of this patch is to activate the support for ftrace_modify_call()
as it defines ARCH_SUPPORTS_FTRACE_OPS for the ARM architecture.

Signed-off-by: Abel Vesa <abelvesa@linux.com>
---
 arch/arm/Kconfig               |  1 +
 arch/arm/include/asm/ftrace.h  |  4 +++
 arch/arm/kernel/entry-ftrace.S | 79 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/ftrace.c       | 37 ++++++++++++++++++++
 4 files changed, 121 insertions(+)

Comments

Jean-Jacques Hiblot Feb. 9, 2017, 3:38 p.m. UTC | #1
2017-02-07 23:57 GMT+01:00 Abel Vesa <abelvesa@linux.com>:
> The DYNAMIC_FTRACE_WITH_REGS configuration makes it possible for a ftrace
> operation to specify if registers need to saved/restored by the ftrace handler.
> This is needed by kgraft and possibly other ftrace-based tools, and the ARM
> architecture is currently lacking this feature. It would also be the first step
> to support the "Kprobes-on-ftrace" optimization on ARM.
>
> This patch introduces a new ftrace handler that stores the registers on the
> stack before calling the next stage. The registers are restored from the stack
> before going back to the instrumented function.
>
> A side-effect of this patch is to activate the support for ftrace_modify_call()
> as it defines ARCH_SUPPORTS_FTRACE_OPS for the ARM architecture.
>
> Signed-off-by: Abel Vesa <abelvesa@linux.com>
> ---
>  arch/arm/Kconfig               |  1 +
>  arch/arm/include/asm/ftrace.h  |  4 +++
>  arch/arm/kernel/entry-ftrace.S | 79 ++++++++++++++++++++++++++++++++++++++++++
>  arch/arm/kernel/ftrace.c       | 37 ++++++++++++++++++++
>  4 files changed, 121 insertions(+)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 186c4c2..df401f4 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -50,6 +50,7 @@ config ARM
>         select HAVE_DMA_API_DEBUG
>         select HAVE_DMA_CONTIGUOUS if MMU
>         select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU
> +       select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE && OLD_MCOUNT
>         select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
>         select HAVE_EXIT_THREAD
>         select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
> diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
> index 22b7311..f379881 100644
> --- a/arch/arm/include/asm/ftrace.h
> +++ b/arch/arm/include/asm/ftrace.h
> @@ -1,6 +1,10 @@
>  #ifndef _ASM_ARM_FTRACE
>  #define _ASM_ARM_FTRACE
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +#define ARCH_SUPPORTS_FTRACE_OPS 1
> +#endif
> +
>  #ifdef CONFIG_FUNCTION_TRACER
>  #define MCOUNT_ADDR            ((unsigned long)(__gnu_mcount_nc))
>  #define MCOUNT_INSN_SIZE       4 /* sizeof mcount call */
> diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
> index c73c403..b1fee6c 100644
> --- a/arch/arm/kernel/entry-ftrace.S
> +++ b/arch/arm/kernel/entry-ftrace.S
> @@ -92,12 +92,74 @@
>  2:     mcount_exit
>  .endm
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +
> +.macro __ftrace_regs_caller
> +
> +       add     ip, sp, #4      @ move in IP the value of SP as it was
> +                               @ before the push {lr} of the mcount mechanism
> +       stmdb   sp!, {ip,lr,pc}
> +       stmdb   sp!, {r0-r11,lr}
> +
> +       @ stack content at this point:
> +       @ 0  4          44    48   52       56   60   64
> +       @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
> +
> +       mov r3, sp                              @ struct pt_regs*
> +       ldr r2, =function_trace_op
> +       ldr r2, [r2]                            @ pointer to the current
> +                                               @ function tracing op
> +       ldr     r1, [sp, #64]                   @ lr of instrumented func
> +       mcount_adjust_addr      r0, lr          @ instrumented function
> +
> +       .globl ftrace_regs_call
> +ftrace_regs_call:
> +       bl      ftrace_stub
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +       .globl ftrace_graph_regs_call
> +ftrace_graph_regs_call:
> +       mov     r0, r0
> +#endif
> +       @ pop saved regs
> +       ldr     lr, [sp, #64]                   @ get the previous LR value from stack
> +       ldmia   sp, {r0-r11, ip, sp, pc}        @ pop the saved registers INCLUDING
> +                                               @ the stack pointer
> +.endm
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +.macro __ftrace_graph_regs_caller
> +
> +       sub     r0, fp, #4              @ lr of instrumented routine (parent)
> +
> +       @ called from __ftrace_regs_caller
> +       ldr     r1, [sp, #56]           @ instrumented routine (func)
> +       mcount_adjust_addr      r1, r1
> +
> +       sub     r2, r0, #4              @ frame pointer
why not mov r2, fp  ?


> +       bl      prepare_ftrace_return
> +
> +       @ pop regs saved in ftrace_regs_caller
> +       ldr     lr, [sp, #64]                   @ restore lr from the stack
> +       ldmia   sp, {r0-r11, ip, sp, pc}        @ restore r0 through pc
> +
> +.endm
> +#endif
> +#endif
> +
>  .macro __ftrace_caller suffix
>         mcount_enter
>
>         mcount_get_lr   r1                      @ lr of instrumented func
>         mcount_adjust_addr      r0, lr          @ instrumented function
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +       ldr r2, =function_trace_op
> +       ldr r2, [r2]                            @ pointer to the current
> +                                               @ function tracing op
> +       mov r3, #0                              @ regs is NULL
> +#endif
> +
>         .globl ftrace_call\suffix
>  ftrace_call\suffix:
>         bl      ftrace_stub
> @@ -212,6 +274,15 @@ UNWIND(.fnstart)
>         __ftrace_caller
>  UNWIND(.fnend)
>  ENDPROC(ftrace_caller)
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +ENTRY(ftrace_regs_caller)
> +UNWIND(.fnstart)
> +       __ftrace_regs_caller
> +UNWIND(.fnend)
> +ENDPROC(ftrace_regs_caller)
> +#endif
> +
>  #endif
>
>  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
> @@ -220,6 +291,14 @@ UNWIND(.fnstart)
>         __ftrace_graph_caller
>  UNWIND(.fnend)
>  ENDPROC(ftrace_graph_caller)
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +ENTRY(ftrace_graph_regs_caller)
> +UNWIND(.fnstart)
> +       __ftrace_graph_regs_caller
> +UNWIND(.fnend)
> +ENDPROC(ftrace_graph_regs_caller)
> +#endif
>  #endif
>
>  .purgem mcount_enter
> diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
> index 3f17594..cb0358c 100644
> --- a/arch/arm/kernel/ftrace.c
> +++ b/arch/arm/kernel/ftrace.c
> @@ -139,6 +139,15 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
>
>         ret = ftrace_modify_code(pc, 0, new, false);
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +       if (!ret) {
> +               pc = (unsigned long)&ftrace_regs_call;
> +               new = ftrace_call_replace(pc, (unsigned long)func);
> +
> +               ret = ftrace_modify_code(pc, 0, new, false);
> +       }
> +#endif
> +
>  #ifdef CONFIG_OLD_MCOUNT
>         if (!ret) {
>                 pc = (unsigned long)&ftrace_call_old;
> @@ -157,11 +166,29 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>         unsigned long ip = rec->ip;
>
>         old = ftrace_nop_replace(rec);
> +
> +       new = ftrace_call_replace(ip, adjust_address(rec, addr));
> +
> +       return ftrace_modify_code(rec->ip, old, new, true);
> +}
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +
> +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> +                               unsigned long addr)
> +{
> +       unsigned long new, old;
> +       unsigned long ip = rec->ip;
> +
> +       old = ftrace_call_replace(ip, adjust_address(rec, old_addr));
> +
>         new = ftrace_call_replace(ip, adjust_address(rec, addr));
>
>         return ftrace_modify_code(rec->ip, old, new, true);
>  }
>
> +#endif
> +
>  int ftrace_make_nop(struct module *mod,
>                     struct dyn_ftrace *rec, unsigned long addr)
>  {
> @@ -229,6 +256,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
>  extern unsigned long ftrace_graph_call;
>  extern unsigned long ftrace_graph_call_old;
>  extern void ftrace_graph_caller_old(void);
> +extern unsigned long ftrace_graph_regs_call;
> +extern void ftrace_graph_regs_caller(void);
>
>  static int __ftrace_modify_caller(unsigned long *callsite,
>                                   void (*func) (void), bool enable)
> @@ -251,6 +280,14 @@ static int ftrace_modify_graph_caller(bool enable)
>                                      ftrace_graph_caller,
>                                      enable);
>
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +       if (!ret)
> +               ret = __ftrace_modify_caller(&ftrace_graph_regs_call,
> +                                    ftrace_graph_regs_caller,
> +                                    enable);
> +#endif
> +
> +
>  #ifdef CONFIG_OLD_MCOUNT
>         if (!ret)
>                 ret = __ftrace_modify_caller(&ftrace_graph_call_old,
> --
> 2.7.4
>
Russell King (Oracle) Feb. 9, 2017, 3:49 p.m. UTC | #2
On Thu, Feb 09, 2017 at 04:38:32PM +0100, Jean-Jacques Hiblot wrote:
> 2017-02-07 23:57 GMT+01:00 Abel Vesa <abelvesa@linux.com>:
> > +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> > +.macro __ftrace_graph_regs_caller
> > +
> > +       sub     r0, fp, #4              @ lr of instrumented routine (parent)
> > +
> > +       @ called from __ftrace_regs_caller
> > +       ldr     r1, [sp, #56]           @ instrumented routine (func)
> > +       mcount_adjust_addr      r1, r1
> > +
> > +       sub     r2, r0, #4              @ frame pointer
> why not mov r2, fp  ?

	r0 = fp - 4
	r2 = r0 - 4

therefore

	r2 = (fp - 4) - 4
	r2 = fp - 8, not r2 = fp
Russell King (Oracle) Feb. 9, 2017, 4:29 p.m. UTC | #3
On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> +
> +.macro __ftrace_regs_caller
> +
> +	add 	ip, sp, #4	@ move in IP the value of SP as it was
> +				@ before the push {lr} of the mcount mechanism
> +	stmdb	sp!, {ip,lr,pc}
> +	stmdb	sp!, {r0-r11,lr}
> +
> +	@ stack content at this point:
> +	@ 0  4          44    48   52       56   60   64
> +	@ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |

How important is this to be close to "struct pt_regs" ?  Do we care about
r12 being "wrong" ?  The other issue is that pt_regs is actually 72
bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
leaking some of the kernel stack to userspace through ftrace?

It's possible to save all the registers like this if we need to provide
a complete picture of the register set at function entry:

	str	ip, [sp, #-16]!
	add	ip, sp, #20
	stmia	sp, {ip, lr, pc}
	stmdb	sp!, {r0 - r11}

However, is that even correct - don't we want pt_regs' LR and PC to be
related to the function call itself?  The "previous LR" as you describe
it is where the called function (the one that is being traced) will
return to.  The current LR at this point is the address within the
traced function.  So actually I think this is more strictly correct, if
I'm understanding the intention here correctly:

	str	ip, [sp, #S_IP - PT_REGS_SIZE]!	@ save current IP
	ldr	ip, [sp, #PT_REGS_SIZE - S_IP]	@ get LR at traced function entry
	str	lr, [sp, #S_PC - S_IP]		@ save current LR as PC
	str	ip, [sp, #S_LR - S_IP]		@ save traced function return
	add	ip, sp, #PT_REGS_SIZE - S_IP + 4
	str	ip, [sp, #S_SP - SP_IP]		@ save stack pointer at function entry
	stmdb	sp!, {r0 - r11}
	@ clear CPSR and old_r0 words
	mov	r3, #0
	str	r3, [sp, #S_PSR]
	str	r3, [sp, #S_OLD_R0]

However, that has the side effect of misaligning the stack (the stack
needs to be aligned to 8 bytes).  So, if we decide we don't care about
the saved LR value (except as a mechanism to preserve it across the
call into the ftrace code):

	str	ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
	str	lr, [sp, #S_PC - S_IP]
	ldr	lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
	add	ip, sp, #PT_REGS_SIZE - S_IP
	stmib	sp, {ip, lr}
	stmdb	sp!, {r0 - r11}
	@ clear CPSR and old_r0 words
	mov	r3, #0
	str	r3, [sp, #S_PSR]
	str	r3, [sp, #S_OLD_R0]

and the return would be:

	ldmia	sp, {r0 - pc}

That all said - maybe someone from the ftrace community can comment on
how much of pt_regs is actually necessary here?
Steven Rostedt Feb. 9, 2017, 5:13 p.m. UTC | #4
On Thu, 9 Feb 2017 16:29:56 +0000
Russell King - ARM Linux <linux@armlinux.org.uk> wrote:

> On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
> > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +
> > +.macro __ftrace_regs_caller
> > +
> > +	add 	ip, sp, #4	@ move in IP the value of SP as it was
> > +				@ before the push {lr} of the mcount mechanism
> > +	stmdb	sp!, {ip,lr,pc}
> > +	stmdb	sp!, {r0-r11,lr}
> > +
> > +	@ stack content at this point:
> > +	@ 0  4          44    48   52       56   60   64
> > +	@ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |  
> 
> How important is this to be close to "struct pt_regs" ?  Do we care about
> r12 being "wrong" ?  The other issue is that pt_regs is actually 72
> bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
> leaking some of the kernel stack to userspace through ftrace?

The regs passed to the ftrace code isn't passed to userspace. It's used
by kprobes as a "fake breakpoint" (like fake news?), and by kernel live
patching to modify what function actually gets called after ftrace
returns.


> 
> It's possible to save all the registers like this if we need to provide
> a complete picture of the register set at function entry:
> 
> 	str	ip, [sp, #-16]!
> 	add	ip, sp, #20
> 	stmia	sp, {ip, lr, pc}
> 	stmdb	sp!, {r0 - r11}
> 
> However, is that even correct - don't we want pt_regs' LR and PC to be
> related to the function call itself?  The "previous LR" as you describe
> it is where the called function (the one that is being traced) will
> return to.  The current LR at this point is the address within the
> traced function.  So actually I think this is more strictly correct, if
> I'm understanding the intention here correctly:
> 
> 	str	ip, [sp, #S_IP - PT_REGS_SIZE]!	@ save current IP
> 	ldr	ip, [sp, #PT_REGS_SIZE - S_IP]	@ get LR at traced function entry
> 	str	lr, [sp, #S_PC - S_IP]		@ save current LR as PC
> 	str	ip, [sp, #S_LR - S_IP]		@ save traced function return
> 	add	ip, sp, #PT_REGS_SIZE - S_IP + 4
> 	str	ip, [sp, #S_SP - SP_IP]		@ save stack pointer at function entry
> 	stmdb	sp!, {r0 - r11}
> 	@ clear CPSR and old_r0 words
> 	mov	r3, #0
> 	str	r3, [sp, #S_PSR]
> 	str	r3, [sp, #S_OLD_R0]
> 
> However, that has the side effect of misaligning the stack (the stack
> needs to be aligned to 8 bytes).  So, if we decide we don't care about
> the saved LR value (except as a mechanism to preserve it across the
> call into the ftrace code):
> 
> 	str	ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
> 	str	lr, [sp, #S_PC - S_IP]
> 	ldr	lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
> 	add	ip, sp, #PT_REGS_SIZE - S_IP
> 	stmib	sp, {ip, lr}
> 	stmdb	sp!, {r0 - r11}
> 	@ clear CPSR and old_r0 words
> 	mov	r3, #0
> 	str	r3, [sp, #S_PSR]
> 	str	r3, [sp, #S_OLD_R0]
> 
> and the return would be:
> 
> 	ldmia	sp, {r0 - pc}
> 
> That all said - maybe someone from the ftrace community can comment on
> how much of pt_regs is actually necessary here?
> 

Matters about the users. The REGS was originally created for kprobes,
to simulate a kprobe breakpoint. As calling kprobes directly is much
faster than going through the breakpoint mechanism. As adding a kprobe
to the start of a function is a very common practice, it made sense to
have ftrace give it a boost.

Then came along live kernel patching, which I believe this series is
trying to support. What is needed by pt_regs is a way to "hijack" the
function being called to instead call the patched function. That is,
ftrace is not being used for tracing, but in reality, being used to
modify the running kernel. It is being used to change what function
gets called. ftrace is just a hook for that mechanism.

-- Steve
Russell King (Oracle) Feb. 9, 2017, 6:06 p.m. UTC | #5
On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:
> Then came along live kernel patching, which I believe this series is
> trying to support. What is needed by pt_regs is a way to "hijack" the
> function being called to instead call the patched function. That is,
> ftrace is not being used for tracing, but in reality, being used to
> modify the running kernel. It is being used to change what function
> gets called. ftrace is just a hook for that mechanism.

So, would I be correct to assume that the only parts of pt_regs that
would be touched are those which contain arguments to the function,
and the register which would contain the return value?
Steven Rostedt Feb. 9, 2017, 6:14 p.m. UTC | #6
On Thu, 9 Feb 2017 18:06:44 +0000
Russell King - ARM Linux <linux@armlinux.org.uk> wrote:

> On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:
> > Then came along live kernel patching, which I believe this series is
> > trying to support. What is needed by pt_regs is a way to "hijack" the
> > function being called to instead call the patched function. That is,
> > ftrace is not being used for tracing, but in reality, being used to
> > modify the running kernel. It is being used to change what function
> > gets called. ftrace is just a hook for that mechanism.  
> 
> So, would I be correct to assume that the only parts of pt_regs that
> would be touched are those which contain arguments to the function,
> and the register which would contain the return value?
> 

For live kernel patching, perhaps.

But for kprobes, I think they can touch anything. Matters what the
creater of the kprobe wanted to do.

-- Steve
Steven Rostedt Feb. 9, 2017, 6:14 p.m. UTC | #7
[ sending again with Masami Cc'd ]

On Thu, 9 Feb 2017 13:14:14 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Thu, 9 Feb 2017 18:06:44 +0000
> Russell King - ARM Linux <linux@armlinux.org.uk> wrote:
> 
> > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:  
> > > Then came along live kernel patching, which I believe this series is
> > > trying to support. What is needed by pt_regs is a way to "hijack" the
> > > function being called to instead call the patched function. That is,
> > > ftrace is not being used for tracing, but in reality, being used to
> > > modify the running kernel. It is being used to change what function
> > > gets called. ftrace is just a hook for that mechanism.    
> > 
> > So, would I be correct to assume that the only parts of pt_regs that
> > would be touched are those which contain arguments to the function,
> > and the register which would contain the return value?
> >   
> 
> For live kernel patching, perhaps.
> 
> But for kprobes, I think they can touch anything. Matters what the
> creater of the kprobe wanted to do.
> 
> -- Steve
Abel Vesa Feb. 9, 2017, 6:18 p.m. UTC | #8
On Thu, Feb 09, 2017 at 06:06:44PM +0000, Russell King - ARM Linux wrote:
> On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:
> > Then came along live kernel patching, which I believe this series is
> > trying to support. What is needed by pt_regs is a way to "hijack" the
> > function being called to instead call the patched function. That is,
> > ftrace is not being used for tracing, but in reality, being used to
> > modify the running kernel. It is being used to change what function
> > gets called. ftrace is just a hook for that mechanism.
> 
> So, would I be correct to assume that the only parts of pt_regs that
> would be touched are those which contain arguments to the function,
> and the register which would contain the return value?
At this point, yeah, but I intend to come up with another patch series
for livepatching which needs the exact same context as the function 
livepatched. This means all the regs need to be saved and restored 
before calling the replacing function.
> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
Abel Vesa Feb. 9, 2017, 6:30 p.m. UTC | #9
On Thu, Feb 09, 2017 at 04:29:56PM +0000, Russell King - ARM Linux wrote:
> On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
> > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> > +
> > +.macro __ftrace_regs_caller
> > +
> > +	add 	ip, sp, #4	@ move in IP the value of SP as it was
> > +				@ before the push {lr} of the mcount mechanism
> > +	stmdb	sp!, {ip,lr,pc}
> > +	stmdb	sp!, {r0-r11,lr}
> > +
> > +	@ stack content at this point:
> > +	@ 0  4          44    48   52       56   60   64
> > +	@ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
> 
> How important is this to be close to "struct pt_regs" ?  Do we care about
> r12 being "wrong" ?  The other issue is that pt_regs is actually 72
> bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
> leaking some of the kernel stack to userspace through ftrace?
You are right. pt_regs is 72 (due to old_r0, AFAIU). The risk is actually to
corrupt the stack if any ftrace_call implementation is writing to pt_regs->uregs[i],
where i >= 16 (at this point). A solution would be to decrement the SP with 4
at the beginning of ftrace_regs_caller, this way ensuring that every ftrace_call
implementation gets to play with the whole size of pt_regs. Will take this into
consideration in the next iteration.
> 
> It's possible to save all the registers like this if we need to provide
> a complete picture of the register set at function entry:
> 
> 	str	ip, [sp, #-16]!
> 	add	ip, sp, #20
> 	stmia	sp, {ip, lr, pc}
> 	stmdb	sp!, {r0 - r11}
> 
> However, is that even correct - don't we want pt_regs' LR and PC to be
> related to the function call itself?  The "previous LR" as you describe
> it is where the called function (the one that is being traced) will
> return to.  The current LR at this point is the address within the
> traced function.  So actually I think this is more strictly correct, if
> I'm understanding the intention here correctly:
> 
> 	str	ip, [sp, #S_IP - PT_REGS_SIZE]!	@ save current IP
> 	ldr	ip, [sp, #PT_REGS_SIZE - S_IP]	@ get LR at traced function entry
> 	str	lr, [sp, #S_PC - S_IP]		@ save current LR as PC
> 	str	ip, [sp, #S_LR - S_IP]		@ save traced function return
> 	add	ip, sp, #PT_REGS_SIZE - S_IP + 4
> 	str	ip, [sp, #S_SP - SP_IP]		@ save stack pointer at function entry
> 	stmdb	sp!, {r0 - r11}
> 	@ clear CPSR and old_r0 words
> 	mov	r3, #0
> 	str	r3, [sp, #S_PSR]
> 	str	r3, [sp, #S_OLD_R0]
> 
> However, that has the side effect of misaligning the stack (the stack
> needs to be aligned to 8 bytes).  So, if we decide we don't care about
> the saved LR value (except as a mechanism to preserve it across the
> call into the ftrace code):
> 
The solution proposed upwards will take care of the stack alignment as well.
Again, LR needed by ftrace_graph_caller/ftrace_regs_graph_caller.
> 	str	ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
> 	str	lr, [sp, #S_PC - S_IP]
> 	ldr	lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
> 	add	ip, sp, #PT_REGS_SIZE - S_IP
> 	stmib	sp, {ip, lr}
> 	stmdb	sp!, {r0 - r11}
> 	@ clear CPSR and old_r0 words
> 	mov	r3, #0
> 	str	r3, [sp, #S_PSR]
> 	str	r3, [sp, #S_OLD_R0]
> 
> and the return would be:
> 
> 	ldmia	sp, {r0 - pc}
> 
> That all said - maybe someone from the ftrace community can comment on
> how much of pt_regs is actually necessary here?
> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
Abel Vesa Feb. 9, 2017, 7:01 p.m. UTC | #10
On Thu, Feb 09, 2017 at 01:14:52PM -0500, Steven Rostedt wrote:
> 
> [ sending again with Masami Cc'd ]
> 
> On Thu, 9 Feb 2017 13:14:14 -0500
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Thu, 9 Feb 2017 18:06:44 +0000
> > Russell King - ARM Linux <linux@armlinux.org.uk> wrote:
> > 
> > > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:  
> > > > Then came along live kernel patching, which I believe this series is
> > > > trying to support. What is needed by pt_regs is a way to "hijack" the
> > > > function being called to instead call the patched function. That is,
> > > > ftrace is not being used for tracing, but in reality, being used to
> > > > modify the running kernel. It is being used to change what function
> > > > gets called. ftrace is just a hook for that mechanism.    
> > > 
> > > So, would I be correct to assume that the only parts of pt_regs that
> > > would be touched are those which contain arguments to the function,
> > > and the register which would contain the return value?
> > >   
> > 
> > For live kernel patching, perhaps.
> > 
> > But for kprobes, I think they can touch anything. Matters what the
> > creater of the kprobe wanted to do.
> > 
Thing is, by saving all of them is the easiest way to ensure that the
whole context is the same when the replacing function gets called, as
I said before.

We can't be sure that while __ftrace_ops_list_func is executing, any of
the regs will have the value they had when the function-to-be-replaced
was called. That's the reason I say we need to save them all.
> > -- Steve
>
Abel Vesa Feb. 9, 2017, 7:09 p.m. UTC | #11
On Thu, Feb 09, 2017 at 07:01:10PM +0000, Abel Vesa wrote:
> On Thu, Feb 09, 2017 at 01:14:52PM -0500, Steven Rostedt wrote:
> > 
> > [ sending again with Masami Cc'd ]
> > 
> > On Thu, 9 Feb 2017 13:14:14 -0500
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > On Thu, 9 Feb 2017 18:06:44 +0000
> > > Russell King - ARM Linux <linux@armlinux.org.uk> wrote:
> > > 
> > > > On Thu, Feb 09, 2017 at 12:13:22PM -0500, Steven Rostedt wrote:  
> > > > > Then came along live kernel patching, which I believe this series is
> > > > > trying to support. What is needed by pt_regs is a way to "hijack" the
> > > > > function being called to instead call the patched function. That is,
> > > > > ftrace is not being used for tracing, but in reality, being used to
> > > > > modify the running kernel. It is being used to change what function
> > > > > gets called. ftrace is just a hook for that mechanism.    
> > > > 
> > > > So, would I be correct to assume that the only parts of pt_regs that
> > > > would be touched are those which contain arguments to the function,
> > > > and the register which would contain the return value?
> > > >   
> > > 
> > > For live kernel patching, perhaps.
> > > 
> > > But for kprobes, I think they can touch anything. Matters what the
> > > creater of the kprobe wanted to do.
> > > 
> Thing is, by saving all of them is the easiest way to ensure that the
> whole context is the same when the replacing function gets called, as
> I said before.
> 
> We can't be sure that while __ftrace_ops_list_func is executing, any of
> the regs will have the value they had when the function-to-be-replaced
> was called. That's the reason I say we need to save them all.
Scratch that, I'm wrong, the reason is stupid. The context gets restored anyway after
__ftrace_ops_list_func is done.
> > > -- Steve
> >
Jean-Jacques Hiblot Feb. 10, 2017, 10:36 a.m. UTC | #12
2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>:
> On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
>> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>> +
>> +.macro __ftrace_regs_caller
>> +
>> +     add     ip, sp, #4      @ move in IP the value of SP as it was
>> +                             @ before the push {lr} of the mcount mechanism
>> +     stmdb   sp!, {ip,lr,pc}
>> +     stmdb   sp!, {r0-r11,lr}
>> +
>> +     @ stack content at this point:
>> +     @ 0  4          44    48   52       56   60   64
>> +     @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
>
> How important is this to be close to "struct pt_regs" ?  Do we care about
> r12 being "wrong" ?  The other issue is that pt_regs is actually 72
> bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
> leaking some of the kernel stack to userspace through ftrace?
>
> It's possible to save all the registers like this if we need to provide
> a complete picture of the register set at function entry:
>
>         str     ip, [sp, #-16]!
>         add     ip, sp, #20
>         stmia   sp, {ip, lr, pc}
>         stmdb   sp!, {r0 - r11}
>
> However, is that even correct - don't we want pt_regs' LR and PC to be
> related to the function call itself?  The "previous LR" as you describe
> it is where the called function (the one that is being traced) will
> return to.  The current LR at this point is the address within the
> traced function.  So actually I think this is more strictly correct, if
> I'm understanding the intention here correctly:
>
>         str     ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP
>         ldr     ip, [sp, #PT_REGS_SIZE - S_IP]  @ get LR at traced function entry
>         str     lr, [sp, #S_PC - S_IP]          @ save current LR as PC
>         str     ip, [sp, #S_LR - S_IP]          @ save traced function return
>         add     ip, sp, #PT_REGS_SIZE - S_IP + 4
>         str     ip, [sp, #S_SP - SP_IP]         @ save stack pointer at function entry
>         stmdb   sp!, {r0 - r11}
>         @ clear CPSR and old_r0 words
>         mov     r3, #0
>         str     r3, [sp, #S_PSR]
>         str     r3, [sp, #S_OLD_R0]
>
> However, that has the side effect of misaligning the stack (the stack
> needs to be aligned to 8 bytes).  So, if we decide we don't care about
> the saved LR value (except as a mechanism to preserve it across the
> call into the ftrace code):
>
>         str     ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
>         str     lr, [sp, #S_PC - S_IP]
>         ldr     lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
>         add     ip, sp, #PT_REGS_SIZE - S_IP
>         stmib   sp, {ip, lr}
>         stmdb   sp!, {r0 - r11}
>         @ clear CPSR and old_r0 words
>         mov     r3, #0
>         str     r3, [sp, #S_PSR]
>         str     r3, [sp, #S_OLD_R0]
>
> and the return would be:
>
>         ldmia   sp, {r0 - pc}
>
> That all said - maybe someone from the ftrace community can comment on
> how much of pt_regs is actually necessary here?

I would suggest the following:
r0-r11: filled with current values.
r12 :  the value of r12 doesn't matter (Intra-procedure call scratch
reg), we can either save it or not.
r13 - sp: the value as it was when the instrumented function was
entered. in the mcount case, it's the current sp value - 4, otherwise
it'f sp -4
r14 - lr: the value as it was when the instrumented function was
entered. first element in stack or available in frame depending on
GCC's version (mcount vs __gnu_mcount_nc)
r15 - pc : the address after the modified instruction (value of lr
when the ftrace caller is entered)

I don't think we need CSPR and ORIG_r0.

>
> --
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
Abel Vesa Feb. 10, 2017, 12:03 p.m. UTC | #13
On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote:
> 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>:
> > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> >> +
> >> +.macro __ftrace_regs_caller
> >> +
> >> +     add     ip, sp, #4      @ move in IP the value of SP as it was
> >> +                             @ before the push {lr} of the mcount mechanism
> >> +     stmdb   sp!, {ip,lr,pc}
> >> +     stmdb   sp!, {r0-r11,lr}
> >> +
> >> +     @ stack content at this point:
> >> +     @ 0  4          44    48   52       56   60   64
> >> +     @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
> >
> > How important is this to be close to "struct pt_regs" ?  Do we care about
> > r12 being "wrong" ?  The other issue is that pt_regs is actually 72
> > bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
> > leaking some of the kernel stack to userspace through ftrace?
> >
> > It's possible to save all the registers like this if we need to provide
> > a complete picture of the register set at function entry:
> >
> >         str     ip, [sp, #-16]!
> >         add     ip, sp, #20
> >         stmia   sp, {ip, lr, pc}
> >         stmdb   sp!, {r0 - r11}
> >
> > However, is that even correct - don't we want pt_regs' LR and PC to be
> > related to the function call itself?  The "previous LR" as you describe
> > it is where the called function (the one that is being traced) will
> > return to.  The current LR at this point is the address within the
> > traced function.  So actually I think this is more strictly correct, if
> > I'm understanding the intention here correctly:
> >
> >         str     ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP
> >         ldr     ip, [sp, #PT_REGS_SIZE - S_IP]  @ get LR at traced function entry
> >         str     lr, [sp, #S_PC - S_IP]          @ save current LR as PC
> >         str     ip, [sp, #S_LR - S_IP]          @ save traced function return
> >         add     ip, sp, #PT_REGS_SIZE - S_IP + 4
> >         str     ip, [sp, #S_SP - SP_IP]         @ save stack pointer at function entry
> >         stmdb   sp!, {r0 - r11}
> >         @ clear CPSR and old_r0 words
> >         mov     r3, #0
> >         str     r3, [sp, #S_PSR]
> >         str     r3, [sp, #S_OLD_R0]
> >
> > However, that has the side effect of misaligning the stack (the stack
> > needs to be aligned to 8 bytes).  So, if we decide we don't care about
> > the saved LR value (except as a mechanism to preserve it across the
> > call into the ftrace code):
> >
> >         str     ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
> >         str     lr, [sp, #S_PC - S_IP]
> >         ldr     lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
> >         add     ip, sp, #PT_REGS_SIZE - S_IP
> >         stmib   sp, {ip, lr}
> >         stmdb   sp!, {r0 - r11}
> >         @ clear CPSR and old_r0 words
> >         mov     r3, #0
> >         str     r3, [sp, #S_PSR]
> >         str     r3, [sp, #S_OLD_R0]
> >
> > and the return would be:
> >
> >         ldmia   sp, {r0 - pc}
> >
> > That all said - maybe someone from the ftrace community can comment on
> > how much of pt_regs is actually necessary here?
> 
> I would suggest the following:
> r0-r11: filled with current values.
> r12 :  the value of r12 doesn't matter (Intra-procedure call scratch
> reg), we can either save it or not.
> r13 - sp: the value as it was when the instrumented function was
> entered. in the mcount case, it's the current sp value - 4, otherwise
> it'f sp -4
> r14 - lr: the value as it was when the instrumented function was
> entered. first element in stack or available in frame depending on
> GCC's version (mcount vs __gnu_mcount_nc)
> r15 - pc : the address after the modified instruction (value of lr
> when the ftrace caller is entered)
> 
So basically you're saying: save all regs, r0 through r15, except r12.
Based on that, I think it's easier to save all of them and then restore
all of them except r12. Plus, you have to take into consideration all 
the possible users of ftrace with regs. At some point some implementation
of ftrace_regs_call will probably need the value from r12. 
> I don't think we need CSPR and ORIG_r0.
I think we do. As I said before, because PT_REGS is 72 and some function
might (in the future) make use of CSPR or ORIG_r0, to ensure there is no
stack corruption taking place, we have to provide whole pt_regs, that is
72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell
mentioned would be fixed.

The only problem I don't have a solution for at this point is OLD_LR (or previous LR
as it is called in this patch). If we take the approach described earlier,
we need to add to pt_regs the OLD_LR which I really don't like because it is 
breaking the whole purpose of pt_regs (it should only contain one copy of each reg, 
though it already has the ORIG_r0 in it) and will also break the stack alignment.
> 
> >
> > --
> > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> > according to speedtest.net.
Jean-Jacques Hiblot Feb. 10, 2017, 1:57 p.m. UTC | #14
2017-02-10 13:03 GMT+01:00 Abel Vesa <abelvesa@gmail.com>:
> On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote:
>> 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>:
>> > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
>> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
>> >> +
>> >> +.macro __ftrace_regs_caller
>> >> +
>> >> +     add     ip, sp, #4      @ move in IP the value of SP as it was
>> >> +                             @ before the push {lr} of the mcount mechanism
>> >> +     stmdb   sp!, {ip,lr,pc}
>> >> +     stmdb   sp!, {r0-r11,lr}
>> >> +
>> >> +     @ stack content at this point:
>> >> +     @ 0  4          44    48   52       56   60   64
>> >> +     @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
>> >
>> > How important is this to be close to "struct pt_regs" ?  Do we care about
>> > r12 being "wrong" ?  The other issue is that pt_regs is actually 72
>> > bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
>> > leaking some of the kernel stack to userspace through ftrace?
>> >
>> > It's possible to save all the registers like this if we need to provide
>> > a complete picture of the register set at function entry:
>> >
>> >         str     ip, [sp, #-16]!
>> >         add     ip, sp, #20
>> >         stmia   sp, {ip, lr, pc}
>> >         stmdb   sp!, {r0 - r11}
>> >
>> > However, is that even correct - don't we want pt_regs' LR and PC to be
>> > related to the function call itself?  The "previous LR" as you describe
>> > it is where the called function (the one that is being traced) will
>> > return to.  The current LR at this point is the address within the
>> > traced function.  So actually I think this is more strictly correct, if
>> > I'm understanding the intention here correctly:
>> >
>> >         str     ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP
>> >         ldr     ip, [sp, #PT_REGS_SIZE - S_IP]  @ get LR at traced function entry
>> >         str     lr, [sp, #S_PC - S_IP]          @ save current LR as PC
>> >         str     ip, [sp, #S_LR - S_IP]          @ save traced function return
>> >         add     ip, sp, #PT_REGS_SIZE - S_IP + 4
>> >         str     ip, [sp, #S_SP - SP_IP]         @ save stack pointer at function entry
>> >         stmdb   sp!, {r0 - r11}
>> >         @ clear CPSR and old_r0 words
>> >         mov     r3, #0
>> >         str     r3, [sp, #S_PSR]
>> >         str     r3, [sp, #S_OLD_R0]
>> >
>> > However, that has the side effect of misaligning the stack (the stack
>> > needs to be aligned to 8 bytes).  So, if we decide we don't care about
>> > the saved LR value (except as a mechanism to preserve it across the
>> > call into the ftrace code):
>> >
>> >         str     ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
>> >         str     lr, [sp, #S_PC - S_IP]
>> >         ldr     lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
>> >         add     ip, sp, #PT_REGS_SIZE - S_IP
>> >         stmib   sp, {ip, lr}
>> >         stmdb   sp!, {r0 - r11}
>> >         @ clear CPSR and old_r0 words
>> >         mov     r3, #0
>> >         str     r3, [sp, #S_PSR]
>> >         str     r3, [sp, #S_OLD_R0]
>> >
>> > and the return would be:
>> >
>> >         ldmia   sp, {r0 - pc}
>> >
>> > That all said - maybe someone from the ftrace community can comment on
>> > how much of pt_regs is actually necessary here?
>>
>> I would suggest the following:
>> r0-r11: filled with current values.
>> r12 :  the value of r12 doesn't matter (Intra-procedure call scratch
>> reg), we can either save it or not.
>> r13 - sp: the value as it was when the instrumented function was
>> entered. in the mcount case, it's the current sp value - 4, otherwise
>> it'f sp -4
>> r14 - lr: the value as it was when the instrumented function was
>> entered. first element in stack or available in frame depending on
>> GCC's version (mcount vs __gnu_mcount_nc)
>> r15 - pc : the address after the modified instruction (value of lr
>> when the ftrace caller is entered)
>>
> So basically you're saying: save all regs, r0 through r15, except r12.
> Based on that, I think it's easier to save all of them and then restore
> all of them except r12. Plus, you have to take into consideration all
> the possible users of ftrace with regs. At some point some implementation
> of ftrace_regs_call will probably need the value from r12.
>> I don't think we need CSPR and ORIG_r0.
> I think we do. As I said before, because PT_REGS is 72 and some function
> might (in the future) make use of CSPR or ORIG_r0, to ensure there is no
> stack corruption taking place, we have to provide whole pt_regs, that is
> 72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell
> mentioned would be fixed.
You're right for the size of the structure. For the content, I don't
think we need all of them but it won't hurt to save more than
necessary.
>
> The only problem I don't have a solution for at this point is OLD_LR (or previous LR
> as it is called in this patch). If we take the approach described earlier,

previous LR is the lr when the instrumented function is entered, it
should be stored in pt_regs as r14

> we need to add to pt_regs the OLD_LR which I really don't like because it is
> breaking the whole purpose of pt_regs (it should only contain one copy of each reg,
> though it already has the ORIG_r0 in it) and will also break the stack alignment.
>>
>> >
>> > --
>> > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
>> > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
>> > according to speedtest.net.
Russell King (Oracle) Feb. 10, 2017, 2:28 p.m. UTC | #15
On Fri, Feb 10, 2017 at 12:03:06PM +0000, Abel Vesa wrote:
> The only problem I don't have a solution for at this point is OLD_LR (or
> previous LR as it is called in this patch).

If you want the context at function entry, then you need to save the
registers as they were at that point.

The stacking of LR in the gnu_mcount thing is there to avoid this problem:

a:
	push	{lr}
	bl	__gnu_mcount_mc

That "bl" instruction can be thought of as being effectively this:

	adr	lr, 1f
	b	__gnu_mcount_mc
1:

and from that, you can plainly see that "lr" gets corrupted by the call.
So, to save the register state as it was at point "a", you need to
save (in order):

	r0 through to sp
	the saved lr on the stack (which was the value of lr at point a)
	the current lr (which is the value of the PC _after_ __gnu_mcount_mc
		returns)
	cpsr
	write zero to old_r0

Stacking actual value of the PC at the point that you're stacking these
registers is really senseless - it doesn't convey any useful information
about the context being saved.

Does it make sense to leave the compiler's saving of lr on the stack?
Probably not - which I think my last iteration overwrote with the old_r0
value.  The only thing my last iteration did not do was save a real value
for CPSR.

I didn't test it either...
Abel Vesa Feb. 10, 2017, 5:17 p.m. UTC | #16
On Fri, Feb 10, 2017 at 02:28:47PM +0000, Russell King - ARM Linux wrote:
> On Fri, Feb 10, 2017 at 12:03:06PM +0000, Abel Vesa wrote:
> > The only problem I don't have a solution for at this point is OLD_LR (or
> > previous LR as it is called in this patch).
> 
> If you want the context at function entry, then you need to save the
> registers as they were at that point.
> 
> The stacking of LR in the gnu_mcount thing is there to avoid this problem:
> 
> a:
> 	push	{lr}
> 	bl	__gnu_mcount_mc
> 
> That "bl" instruction can be thought of as being effectively this:
> 
> 	adr	lr, 1f
> 	b	__gnu_mcount_mc
> 1:
> 
> and from that, you can plainly see that "lr" gets corrupted by the call.
> So, to save the register state as it was at point "a", you need to
> save (in order):
> 
> 	r0 through to sp
> 	the saved lr on the stack (which was the value of lr at point a)
> 	the current lr (which is the value of the PC _after_ __gnu_mcount_mc
> 		returns)
> 	cpsr
> 	write zero to old_r0
> 
> Stacking actual value of the PC at the point that you're stacking these
> registers is really senseless - it doesn't convey any useful information
> about the context being saved.
> 
> Does it make sense to leave the compiler's saving of lr on the stack?
> Probably not - which I think my last iteration overwrote with the old_r0
Actually, the "compiler's saving of lr" is needed by prepare_ftrace_return
(which is called from __ftrace_graph_regs_caller/__ftrace_graph_caller) to
be replaced by return_to_handler.

> value.  The only thing my last iteration did not do was save a real value
> for CPSR.
> 
The stack needs to look like this:
Right before __gnu_mcount_mc is called:

  0			    4
  | compiler's saving of lr | ... (we were wrong, stack was actually aligned to 8)

After regs saving in ftrace_regs_caller (the replacer of __gnu_mcount_mc):

  0    4    8     52       56       60   64     68       72                        76
  | R0 | R1 | ... | SP + 4 | new LR | PC | CPSR | OLD_R0 | compiler's saving of lr | ...

  this means the saving needs to be something like this:

   sub     sp, sp, #8        @ space for CPSR and OLD_R0 (not used at this point)
   add     ip, sp, #12       @ move in IP the value of SP as it was ( compute "SP + 4" )
   stmdb   sp!, {ip,lr,pc}   @ push PC, new LR, "SP + 4" (in this order)
   stmdb   sp!, {r0-r11,lr}  @ push new LR, R11 through to R0 (in this order)

And then the restoring needs to be like this:

   ldr     lr, [sp, #PT_REGS_SIZE]  @ load "compiler's saved of lr"
   ldmia   sp, {r0-r11, ip, sp, pc} @ pop r0-r11, "new LR" in ip, "SP + 4" in SP 
                                    @ and "new LR" in PC

After this, SP would be at '76', PC will contain the address of the next instruction
after "b __gnu_mcount_mc", and LR will be "compiler's saved of lr". The only register
that would have a different value than before would be IP.

I know we can skip saving and restoring IP, but it doesn't seem to be worth it.

I hope this time I'm not mistaken.

> I didn't test it either...
> 
> -- 
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> according to speedtest.net.
Abel Vesa Feb. 10, 2017, 5:27 p.m. UTC | #17
On Fri, Feb 10, 2017 at 02:57:38PM +0100, Jean-Jacques Hiblot wrote:
> 2017-02-10 13:03 GMT+01:00 Abel Vesa <abelvesa@gmail.com>:
> > On Fri, Feb 10, 2017 at 11:36:12AM +0100, Jean-Jacques Hiblot wrote:
> >> 2017-02-09 17:29 GMT+01:00 Russell King - ARM Linux <linux@armlinux.org.uk>:
> >> > On Tue, Feb 07, 2017 at 10:57:55PM +0000, Abel Vesa wrote:
> >> >> +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> >> >> +
> >> >> +.macro __ftrace_regs_caller
> >> >> +
> >> >> +     add     ip, sp, #4      @ move in IP the value of SP as it was
> >> >> +                             @ before the push {lr} of the mcount mechanism
> >> >> +     stmdb   sp!, {ip,lr,pc}
> >> >> +     stmdb   sp!, {r0-r11,lr}
> >> >> +
> >> >> +     @ stack content at this point:
> >> >> +     @ 0  4          44    48   52       56   60   64
> >> >> +     @ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
> >> >
> >> > How important is this to be close to "struct pt_regs" ?  Do we care about
> >> > r12 being "wrong" ?  The other issue is that pt_regs is actually 72
> >> > bytes in size, not 68 bytes.  So, does that mean we end up inappropriately
> >> > leaking some of the kernel stack to userspace through ftrace?
> >> >
> >> > It's possible to save all the registers like this if we need to provide
> >> > a complete picture of the register set at function entry:
> >> >
> >> >         str     ip, [sp, #-16]!
> >> >         add     ip, sp, #20
> >> >         stmia   sp, {ip, lr, pc}
> >> >         stmdb   sp!, {r0 - r11}
> >> >
> >> > However, is that even correct - don't we want pt_regs' LR and PC to be
> >> > related to the function call itself?  The "previous LR" as you describe
> >> > it is where the called function (the one that is being traced) will
> >> > return to.  The current LR at this point is the address within the
> >> > traced function.  So actually I think this is more strictly correct, if
> >> > I'm understanding the intention here correctly:
> >> >
> >> >         str     ip, [sp, #S_IP - PT_REGS_SIZE]! @ save current IP
> >> >         ldr     ip, [sp, #PT_REGS_SIZE - S_IP]  @ get LR at traced function entry
> >> >         str     lr, [sp, #S_PC - S_IP]          @ save current LR as PC
> >> >         str     ip, [sp, #S_LR - S_IP]          @ save traced function return
> >> >         add     ip, sp, #PT_REGS_SIZE - S_IP + 4
> >> >         str     ip, [sp, #S_SP - SP_IP]         @ save stack pointer at function entry
> >> >         stmdb   sp!, {r0 - r11}
> >> >         @ clear CPSR and old_r0 words
> >> >         mov     r3, #0
> >> >         str     r3, [sp, #S_PSR]
> >> >         str     r3, [sp, #S_OLD_R0]
> >> >
> >> > However, that has the side effect of misaligning the stack (the stack
> >> > needs to be aligned to 8 bytes).  So, if we decide we don't care about
> >> > the saved LR value (except as a mechanism to preserve it across the
> >> > call into the ftrace code):
> >> >
> >> >         str     ip, [sp, #S_IP - PT_REGS_SIZE + 4]!
> >> >         str     lr, [sp, #S_PC - S_IP]
> >> >         ldr     lr, [sp, #PT_REGS_SIZE - 4 - S_IP]
> >> >         add     ip, sp, #PT_REGS_SIZE - S_IP
> >> >         stmib   sp, {ip, lr}
> >> >         stmdb   sp!, {r0 - r11}
> >> >         @ clear CPSR and old_r0 words
> >> >         mov     r3, #0
> >> >         str     r3, [sp, #S_PSR]
> >> >         str     r3, [sp, #S_OLD_R0]
> >> >
> >> > and the return would be:
> >> >
> >> >         ldmia   sp, {r0 - pc}
> >> >
> >> > That all said - maybe someone from the ftrace community can comment on
> >> > how much of pt_regs is actually necessary here?
> >>
> >> I would suggest the following:
> >> r0-r11: filled with current values.
> >> r12 :  the value of r12 doesn't matter (Intra-procedure call scratch
> >> reg), we can either save it or not.
> >> r13 - sp: the value as it was when the instrumented function was
> >> entered. in the mcount case, it's the current sp value - 4, otherwise
> >> it'f sp -4
> >> r14 - lr: the value as it was when the instrumented function was
> >> entered. first element in stack or available in frame depending on
> >> GCC's version (mcount vs __gnu_mcount_nc)
> >> r15 - pc : the address after the modified instruction (value of lr
> >> when the ftrace caller is entered)
> >>
> > So basically you're saying: save all regs, r0 through r15, except r12.
> > Based on that, I think it's easier to save all of them and then restore
> > all of them except r12. Plus, you have to take into consideration all
> > the possible users of ftrace with regs. At some point some implementation
> > of ftrace_regs_call will probably need the value from r12.
> >> I don't think we need CSPR and ORIG_r0.
> > I think we do. As I said before, because PT_REGS is 72 and some function
> > might (in the future) make use of CSPR or ORIG_r0, to ensure there is no
> > stack corruption taking place, we have to provide whole pt_regs, that is
> > 72 (including CSPR and ORIG_r0). Plus, the stack alignment thing Russell
> > mentioned would be fixed.
> You're right for the size of the structure. For the content, I don't
> think we need all of them but it won't hurt to save more than
> necessary.
Exactly.
> >
> > The only problem I don't have a solution for at this point is OLD_LR (or previous LR
> > as it is called in this patch). If we take the approach described earlier,
> 
> previous LR is the lr when the instrumented function is entered, it
> should be stored in pt_regs as r14
> 
No, I disagree. I just realized I was also wrong. It doesn't have to be a part of 
pt_regs at all. prepare_ftrace_return will receive it as first parameter due to this:

 sub     r0, fp, #4              @ lr of instrumented routine (parent)

first instruction in __ftrace_graph_regs_caller.
> > we need to add to pt_regs the OLD_LR which I really don't like because it is
> > breaking the whole purpose of pt_regs (it should only contain one copy of each reg,
> > though it already has the ORIG_r0 in it) and will also break the stack alignment.
> >>
> >> >
> >> > --
> >> > RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> >> > FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
> >> > according to speedtest.net.
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 186c4c2..df401f4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,6 +50,7 @@  config ARM
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS if MMU
 	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE && OLD_MCOUNT
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
 	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 22b7311..f379881 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -1,6 +1,10 @@ 
 #ifndef _ASM_ARM_FTRACE
 #define _ASM_ARM_FTRACE
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
+
 #ifdef CONFIG_FUNCTION_TRACER
 #define MCOUNT_ADDR		((unsigned long)(__gnu_mcount_nc))
 #define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
diff --git a/arch/arm/kernel/entry-ftrace.S b/arch/arm/kernel/entry-ftrace.S
index c73c403..b1fee6c 100644
--- a/arch/arm/kernel/entry-ftrace.S
+++ b/arch/arm/kernel/entry-ftrace.S
@@ -92,12 +92,74 @@ 
 2:	mcount_exit
 .endm
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+
+.macro __ftrace_regs_caller
+
+	add 	ip, sp, #4	@ move in IP the value of SP as it was
+				@ before the push {lr} of the mcount mechanism
+	stmdb	sp!, {ip,lr,pc}
+	stmdb	sp!, {r0-r11,lr}
+
+	@ stack content at this point:
+	@ 0  4          44    48   52       56   60   64
+	@ R0 | R1 | ... | R11 | LR | SP + 4 | LR | PC | previous LR |
+
+	mov r3, sp				@ struct pt_regs*
+	ldr r2, =function_trace_op
+	ldr r2, [r2]				@ pointer to the current
+						@ function tracing op
+	ldr	r1, [sp, #64]			@ lr of instrumented func
+	mcount_adjust_addr	r0, lr		@ instrumented function
+
+	.globl ftrace_regs_call
+ftrace_regs_call:
+	bl	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl ftrace_graph_regs_call
+ftrace_graph_regs_call:
+	mov	r0, r0
+#endif
+	@ pop saved regs
+	ldr	lr, [sp, #64]			@ get the previous LR value from stack
+	ldmia   sp, {r0-r11, ip, sp, pc}	@ pop the saved registers INCLUDING
+						@ the stack pointer
+.endm
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.macro __ftrace_graph_regs_caller
+
+	sub	r0, fp, #4		@ lr of instrumented routine (parent)
+
+	@ called from __ftrace_regs_caller
+	ldr     r1, [sp, #56]		@ instrumented routine (func)
+	mcount_adjust_addr	r1, r1
+
+	sub	r2, r0, #4		@ frame pointer
+	bl	prepare_ftrace_return
+
+	@ pop regs saved in ftrace_regs_caller
+	ldr	lr, [sp, #64]			@ restore lr from the stack
+	ldmia   sp, {r0-r11, ip, sp, pc}	@ restore r0 through pc
+
+.endm
+#endif
+#endif
+
 .macro __ftrace_caller suffix
 	mcount_enter
 
 	mcount_get_lr	r1			@ lr of instrumented func
 	mcount_adjust_addr	r0, lr		@ instrumented function
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	ldr r2, =function_trace_op
+	ldr r2, [r2]				@ pointer to the current
+						@ function tracing op
+	mov r3, #0				@ regs is NULL
+#endif
+
 	.globl ftrace_call\suffix
 ftrace_call\suffix:
 	bl	ftrace_stub
@@ -212,6 +274,15 @@  UNWIND(.fnstart)
 	__ftrace_caller
 UNWIND(.fnend)
 ENDPROC(ftrace_caller)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ENTRY(ftrace_regs_caller)
+UNWIND(.fnstart)
+	__ftrace_regs_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_regs_caller)
+#endif
+
 #endif
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -220,6 +291,14 @@  UNWIND(.fnstart)
 	__ftrace_graph_caller
 UNWIND(.fnend)
 ENDPROC(ftrace_graph_caller)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ENTRY(ftrace_graph_regs_caller)
+UNWIND(.fnstart)
+	__ftrace_graph_regs_caller
+UNWIND(.fnend)
+ENDPROC(ftrace_graph_regs_caller)
+#endif
 #endif
 
 .purgem mcount_enter
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 3f17594..cb0358c 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -139,6 +139,15 @@  int ftrace_update_ftrace_func(ftrace_func_t func)
 
 	ret = ftrace_modify_code(pc, 0, new, false);
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (!ret) {
+		pc = (unsigned long)&ftrace_regs_call;
+		new = ftrace_call_replace(pc, (unsigned long)func);
+
+		ret = ftrace_modify_code(pc, 0, new, false);
+	}
+#endif
+
 #ifdef CONFIG_OLD_MCOUNT
 	if (!ret) {
 		pc = (unsigned long)&ftrace_call_old;
@@ -157,11 +166,29 @@  int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 	unsigned long ip = rec->ip;
 
 	old = ftrace_nop_replace(rec);
+
+	new = ftrace_call_replace(ip, adjust_address(rec, addr));
+
+	return ftrace_modify_code(rec->ip, old, new, true);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+				unsigned long addr)
+{
+	unsigned long new, old;
+	unsigned long ip = rec->ip;
+
+	old = ftrace_call_replace(ip, adjust_address(rec, old_addr));
+
 	new = ftrace_call_replace(ip, adjust_address(rec, addr));
 
 	return ftrace_modify_code(rec->ip, old, new, true);
 }
 
+#endif
+
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
@@ -229,6 +256,8 @@  void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 extern unsigned long ftrace_graph_call;
 extern unsigned long ftrace_graph_call_old;
 extern void ftrace_graph_caller_old(void);
+extern unsigned long ftrace_graph_regs_call;
+extern void ftrace_graph_regs_caller(void);
 
 static int __ftrace_modify_caller(unsigned long *callsite,
 				  void (*func) (void), bool enable)
@@ -251,6 +280,14 @@  static int ftrace_modify_graph_caller(bool enable)
 				     ftrace_graph_caller,
 				     enable);
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	if (!ret)
+		ret = __ftrace_modify_caller(&ftrace_graph_regs_call,
+				     ftrace_graph_regs_caller,
+				     enable);
+#endif
+
+
 #ifdef CONFIG_OLD_MCOUNT
 	if (!ret)
 		ret = __ftrace_modify_caller(&ftrace_graph_call_old,