diff mbox

[v6,4/7] arm64: Add ftrace support

Message ID 1394705630-12384-5-git-send-email-takahiro.akashi@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

AKASHI Takahiro March 13, 2014, 10:13 a.m. UTC
This patch implements arm64 specific part to support function tracers,
such as function (CONFIG_FUNCTION_TRACER), function_graph
(CONFIG_FUNCTION_GRAPH_TRACER) and function profiler
(CONFIG_FUNCTION_PROFILER).

With 'function' tracer, all the functions in the kernel are traced with
timestamps in ${sysfs}/tracing/trace. If function_graph tracer is
specified, call graph is generated.

The kernel must be compiled with -pg option so that _mcount() is inserted
at the beginning of functions. This function is called on every function's
entry as long as tracing is enabled.
In addition, function_graph tracer also needs to be able to probe function's
exit. ftrace_graph_caller() & return_to_handler do this by faking link
register's value to intercept function's return path.

More details on architecture specific requirements are described in
Documentation/trace/ftrace-design.txt.

Reviewed-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/Kconfig               |    2 +
 arch/arm64/include/asm/ftrace.h  |   23 +++++
 arch/arm64/kernel/Makefile       |    4 +
 arch/arm64/kernel/arm64ksyms.c   |    4 +
 arch/arm64/kernel/entry-ftrace.S |  175 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/ftrace.c       |   64 ++++++++++++++
 6 files changed, 272 insertions(+)
 create mode 100644 arch/arm64/include/asm/ftrace.h
 create mode 100644 arch/arm64/kernel/entry-ftrace.S
 create mode 100644 arch/arm64/kernel/ftrace.c

Comments

Will Deacon March 13, 2014, 5:08 p.m. UTC | #1
On Thu, Mar 13, 2014 at 10:13:47AM +0000, AKASHI Takahiro wrote:
> This patch implements arm64 specific part to support function tracers,
> such as function (CONFIG_FUNCTION_TRACER), function_graph
> (CONFIG_FUNCTION_GRAPH_TRACER) and function profiler
> (CONFIG_FUNCTION_PROFILER).
> 
> With 'function' tracer, all the functions in the kernel are traced with
> timestamps in ${sysfs}/tracing/trace. If function_graph tracer is
> specified, call graph is generated.
> 
> The kernel must be compiled with -pg option so that _mcount() is inserted
> at the beginning of functions. This function is called on every function's
> entry as long as tracing is enabled.
> In addition, function_graph tracer also needs to be able to probe function's
> exit. ftrace_graph_caller() & return_to_handler do this by faking link
> register's value to intercept function's return path.
> 
> More details on architecture specific requirements are described in
> Documentation/trace/ftrace-design.txt.

[...]

> diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
> new file mode 100644
> index 0000000..0ac31c8
> --- /dev/null
> +++ b/arch/arm64/kernel/entry-ftrace.S
> @@ -0,0 +1,175 @@
> +/*
> + * arch/arm64/kernel/entry-ftrace.S
> + *
> + * Copyright (C) 2013 Linaro Limited
> + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/ftrace.h>
> +#include <asm/insn.h>
> +
> +/*
> + * Gcc with -pg will put the following code in the beginning of each function:
> + *      mov x0, x30
> + *      bl _mcount
> + *     [function's body ...]
> + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
> + * ftrace is enabled.
> + *
> + * Please note that x0 as an argument will not be used here because we can
> + * get lr(x30) of instrumented function at any time by winding up call stack
> + * as long as the kernel is compiled without -fomit-frame-pointer.
> + * (or CONFIG_FRAME_POINTER, this is forced on arm64)
> + *
> + * stack layout after mcount_enter in _mcount():
> + *
> + * current sp/fp =>  0:+-----+
> + * in _mcount()               | x29 | -> instrumented function's fp
> + *                    +-----+
> + *                    | x30 | -> _mcount()'s lr (= instrumented function's pc)
> + * old sp      => +16:+-----+
> + * when instrumented   |     |
> + * function calls      | ... |
> + * _mcount()          |     |
> + *                    |     |
> + * instrumented => +xx:+-----+
> + * function's fp       | x29 | -> parent's fp
> + *                    +-----+
> + *                    | x30 | -> instrumented function's lr (= parent's pc)
> + *                    +-----+
> + *                    | ... |

I guess it's just the diff that's misaligning your ASCII art here?

> +/*
> + * void return_to_handler(void)
> + *
> + * Run ftrace_return_to_handler() before going back to parent.
> + * @fp is checked against the value passed by ftrace_graph_caller()
> + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
> + */
> +       .global return_to_handler
> +return_to_handler:

ENTRY(return_to_handler)

> +       str     x0, [sp, #-16]!
> +       mov     x0, x29                 //     parent's fp
> +       bl      ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
> +       mov     x30, x0                 // restore the original return address
> +       ldr     x0, [sp], #16
> +       ret

and an ENDPROC here.

> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
> new file mode 100644
> index 0000000..a559ab8
> --- /dev/null
> +++ b/arch/arm64/kernel/ftrace.c
> @@ -0,0 +1,64 @@
> +/*
> + * arch/arm64/kernel/ftrace.c
> + *
> + * Copyright (C) 2013 Linaro Limited
> + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/ftrace.h>
> +#include <linux/swab.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/ftrace.h>
> +#include <asm/insn.h>
> +
> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
> +/*
> + * function_graph tracer expects ftrace_return_to_handler() to be called
> + * on the way back to parent. For this purpose, this function is called
> + * in _mcount() or ftrace_caller() to replace return address (*parent) on
> + * the call stack to return_to_handler.
> + *
> + * Note that @frame_pointer is used only for sanity check later.
> + */
> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
> +                          unsigned long frame_pointer)
> +{
> +       unsigned long return_hooker = (unsigned long)&return_to_handler;
> +       unsigned long old;
> +       struct ftrace_graph_ent trace;
> +       int err;
> +
> +       if (unlikely(atomic_read(&current->tracing_graph_pause)))
> +               return;
> +
> +       /*
> +        * Note:
> +        * No protection against faulting at *parent, which may be seen
> +        * on other archs. It's unlikely on AArch64.
> +        */
> +       old = *parent;
> +       *parent = return_hooker;

return_hook? People might take it personally otherwise ;)

> +       trace.func = self_addr;

in kernel/ftrace/ftrace.c there's an smb_wmb() between setting up
tracing_graph_pause and setting the ret_stack with a comment saying:

    /* Make sure the tasks see the -1 first: */

Why don't we have a corresponding read-barrier here?

Will
Steven Rostedt March 13, 2014, 6:27 p.m. UTC | #2
On Thu, 2014-03-13 at 17:08 +0000, Will Deacon wrote:

> > +       /*
> > +        * Note:
> > +        * No protection against faulting at *parent, which may be seen
> > +        * on other archs. It's unlikely on AArch64.
> > +        */
> > +       old = *parent;
> > +       *parent = return_hooker;
> 
> return_hook? People might take it personally otherwise ;)

No, return_hooker is consistent with all the other archs. Hey, it's a
rugby position! Note, which I was when I played. ;-)

> 
> > +       trace.func = self_addr;
> 
> in kernel/ftrace/ftrace.c there's an smb_wmb() between setting up
> tracing_graph_pause and setting the ret_stack with a comment saying:
> 
>     /* Make sure the tasks see the -1 first: */
> 
> Why don't we have a corresponding read-barrier here?

The corresponding rmb is in kernel/trace/trace_function_graph
ftrace_push_return_trace().

-- Steve
Will Deacon March 13, 2014, 6:37 p.m. UTC | #3
On Thu, Mar 13, 2014 at 06:27:39PM +0000, Steven Rostedt wrote:
> On Thu, 2014-03-13 at 17:08 +0000, Will Deacon wrote:
> 
> > > +       /*
> > > +        * Note:
> > > +        * No protection against faulting at *parent, which may be seen
> > > +        * on other archs. It's unlikely on AArch64.
> > > +        */
> > > +       old = *parent;
> > > +       *parent = return_hooker;
> > 
> > return_hook? People might take it personally otherwise ;)
> 
> No, return_hooker is consistent with all the other archs. Hey, it's a
> rugby position! Note, which I was when I played. ;-)

Hehe, in which case your children will be able to execute that line of
ftrace!

> > 
> > > +       trace.func = self_addr;
> > 
> > in kernel/ftrace/ftrace.c there's an smb_wmb() between setting up
> > tracing_graph_pause and setting the ret_stack with a comment saying:
> > 
> >     /* Make sure the tasks see the -1 first: */
> > 
> > Why don't we have a corresponding read-barrier here?
> 
> The corresponding rmb is in kernel/trace/trace_function_graph
> ftrace_push_return_trace().

Okey doke then, I guess we don't really care about tracing_graph_pause
getting out-of-sync with curr_ret_stack.

Will
Steven Rostedt March 13, 2014, 6:49 p.m. UTC | #4
On Thu, 2014-03-13 at 18:37 +0000, Will Deacon wrote:
>  
> > No, return_hooker is consistent with all the other archs. Hey, it's a
> > rugby position! Note, which I was when I played. ;-)
> 
> Hehe, in which case your children will be able to execute that line of
> ftrace!
> 

My kids already know I was :-)

> > > 
> > > > +       trace.func = self_addr;
> > > 
> > > in kernel/ftrace/ftrace.c there's an smb_wmb() between setting up
> > > tracing_graph_pause and setting the ret_stack with a comment saying:
> > > 
> > >     /* Make sure the tasks see the -1 first: */
> > > 
> > > Why don't we have a corresponding read-barrier here?
> > 
> > The corresponding rmb is in kernel/trace/trace_function_graph
> > ftrace_push_return_trace().
> 
> Okey doke then, I guess we don't really care about tracing_graph_pause
> getting out-of-sync with curr_ret_stack.

Yep, the tracing_graph_pause is more to prevent crazy stuff happening
when an interrupt comes in. So it is just local cpu context, and doesn't
have issues with other CPUS. If it takes a bit for the task to see it go
to zero, the worse thing is that you might miss a few tracing functions
until that gets synced. Which will probably happen by the time tracing
is fully activated anyway.

-- Steve
AKASHI Takahiro March 14, 2014, 4:45 a.m. UTC | #5
On 03/14/2014 02:08 AM, Will Deacon wrote:
> On Thu, Mar 13, 2014 at 10:13:47AM +0000, AKASHI Takahiro wrote:
>> This patch implements arm64 specific part to support function tracers,
>> such as function (CONFIG_FUNCTION_TRACER), function_graph
>> (CONFIG_FUNCTION_GRAPH_TRACER) and function profiler
>> (CONFIG_FUNCTION_PROFILER).
>>
>> With 'function' tracer, all the functions in the kernel are traced with
>> timestamps in ${sysfs}/tracing/trace. If function_graph tracer is
>> specified, call graph is generated.
>>
>> The kernel must be compiled with -pg option so that _mcount() is inserted
>> at the beginning of functions. This function is called on every function's
>> entry as long as tracing is enabled.
>> In addition, function_graph tracer also needs to be able to probe function's
>> exit. ftrace_graph_caller() & return_to_handler do this by faking link
>> register's value to intercept function's return path.
>>
>> More details on architecture specific requirements are described in
>> Documentation/trace/ftrace-design.txt.
>
> [...]

You seem not to like this statement :-)

>> diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
>> new file mode 100644
>> index 0000000..0ac31c8
>> --- /dev/null
>> +++ b/arch/arm64/kernel/entry-ftrace.S
>> @@ -0,0 +1,175 @@
>> +/*
>> + * arch/arm64/kernel/entry-ftrace.S
>> + *
>> + * Copyright (C) 2013 Linaro Limited
>> + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/linkage.h>
>> +#include <asm/ftrace.h>
>> +#include <asm/insn.h>
>> +
>> +/*
>> + * Gcc with -pg will put the following code in the beginning of each function:
>> + *      mov x0, x30
>> + *      bl _mcount
>> + *     [function's body ...]
>> + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
>> + * ftrace is enabled.
>> + *
>> + * Please note that x0 as an argument will not be used here because we can
>> + * get lr(x30) of instrumented function at any time by winding up call stack
>> + * as long as the kernel is compiled without -fomit-frame-pointer.
>> + * (or CONFIG_FRAME_POINTER, this is forced on arm64)
>> + *
>> + * stack layout after mcount_enter in _mcount():
>> + *
>> + * current sp/fp =>  0:+-----+
>> + * in _mcount()               | x29 | -> instrumented function's fp
>> + *                    +-----+
>> + *                    | x30 | -> _mcount()'s lr (= instrumented function's pc)
>> + * old sp      => +16:+-----+
>> + * when instrumented   |     |
>> + * function calls      | ... |
>> + * _mcount()          |     |
>> + *                    |     |
>> + * instrumented => +xx:+--- --+
>> + * function's fp       | x29 | -> parent's fp
>> + *                    +-----+
>> + *                    | x30 | -> instrumented function's lr (= parent's pc)
>> + *                    +-----+
>> + *                    | ... |
>
> I guess it's just the diff that's misaligning your ASCII art here?

Yes, I think so. Misaligned due to "tab"

>> +/*
>> + * void return_to_handler(void)
>> + *
>> + * Run ftrace_return_to_handler() before going back to parent.
>> + * @fp is checked against the value passed by ftrace_graph_caller()
>> + * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.a
>> + */
>> +       .global return_to_handler
>> +return_to_handler:
>
> ENTRY(return_to_handler)

Fix it.

>> +       str     x0, [sp, #-16]!
>> +       mov     x0, x29                 //     parent's fp
>> +       bl      ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
>> +       mov     x30, x0                 // restore the original return address
>> +       ldr     x0, [sp], #16
>> +       ret
>
> and an ENDPROC here.

Fix it.
But please note that this (return_to_handler) is not a real function.
Declaring it as ENDPROC is not very useful.

>> +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>> diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
>> new file mode 100644
>> index 0000000..a559ab8
>> --- /dev/null
>> +++ b/arch/arm64/kernel/ftrace.c
>> @@ -0,0 +1,64 @@
>> +/*
>> + * arch/arm64/kernel/ftrace.c
>> + *
>> + * Copyright (C) 2013 Linaro Limited
>> + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/ftrace.h>
>> +#include <linux/swab.h>
>> +#include <linux/uaccess.h>
>> +
>> +#include <asm/cacheflush.h>
>> +#include <asm/ftrace.h>
>> +#include <asm/insn.h>
>> +
>> +#ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> +/*
>> + * function_graph tracer expects ftrace_return_to_handler() to be called
>> + * on the way back to parent. For this purpose, this function is called
>> + * in _mcount() or ftrace_caller() to replace return address (*parent) on
>> + * the call stack to return_to_handler.
>> + *
>> + * Note that @frame_pointer is used only for sanity check later.
>> + */
>> +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
>> +                          unsigned long frame_pointer)
>> +{
>> +       unsigned long return_hooker = (unsigned long)&return_to_handler;
>> +       unsigned long old;
>> +       struct ftrace_graph_ent trace;
>> +       int err;
>> +
>> +       if (unlikely(atomic_read(&current->tracing_graph_pause)))
>> +               return;
>> +
>> +       /*
>> +        * Note:
>> +        * No protection against faulting at *parent, which may be seen
>> +        * on other archs. It's unlikely on AArch64.
>> +        */
>> +       old = *parent;
>> +       *parent = return_hooker;
>
> return_hook? People might take it personally otherwise ;)

I didn't know what "(return) hooker" means.
Anyway, leave it as it is after you and Steven's discussion.

>> +       trace.func = self_addr;
>
> in kernel/ftrace/ftrace.c there's an smb_wmb() between setting up
> tracing_graph_pause and setting the ret_stack with a comment saying:
>
>      /* Make sure the tasks see the -1 first: */
>
> Why don't we have a corresponding read-barrier here?

I think Steven answered it.

-Takahiro AKASHI

> Will
>
Will Deacon March 14, 2014, 10:07 a.m. UTC | #6
On Fri, Mar 14, 2014 at 04:45:13AM +0000, AKASHI Takahiro wrote:
> On 03/14/2014 02:08 AM, Will Deacon wrote:
> > On Thu, Mar 13, 2014 at 10:13:47AM +0000, AKASHI Takahiro wrote:
> >> This patch implements arm64 specific part to support function tracers,
> >> such as function (CONFIG_FUNCTION_TRACER), function_graph
> >> (CONFIG_FUNCTION_GRAPH_TRACER) and function profiler
> >> (CONFIG_FUNCTION_PROFILER).
> >>
> >> With 'function' tracer, all the functions in the kernel are traced with
> >> timestamps in ${sysfs}/tracing/trace. If function_graph tracer is
> >> specified, call graph is generated.
> >>
> >> The kernel must be compiled with -pg option so that _mcount() is inserted
> >> at the beginning of functions. This function is called on every function's
> >> entry as long as tracing is enabled.
> >> In addition, function_graph tracer also needs to be able to probe function's
> >> exit. ftrace_graph_caller() & return_to_handler do this by faking link
> >> register's value to intercept function's return path.
> >>
> >> More details on architecture specific requirements are described in
> >> Documentation/trace/ftrace-design.txt.
> >
> > [...]
> 
> You seem not to like this statement :-)

Huh? I was just using '[...]' to signify that I'd removed a lot of context
(since the code all looked fine there). Otherwise, review emails can get
really long.

> >> +       str     x0, [sp, #-16]!
> >> +       mov     x0, x29                 //     parent's fp
> >> +       bl      ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
> >> +       mov     x30, x0                 // restore the original return address
> >> +       ldr     x0, [sp], #16
> >> +       ret
> >
> > and an ENDPROC here.
> 
> Fix it.
> But please note that this (return_to_handler) is not a real function.
> Declaring it as ENDPROC is not very useful.

Then use END(...). The opening macro was the main point, since that avoid
the need for an explicit .globl.

Will
AKASHI Takahiro March 14, 2014, 4:13 p.m. UTC | #7
On 03/14/2014 07:07 PM, Will Deacon wrote:
> On Fri, Mar 14, 2014 at 04:45:13AM +0000, AKASHI Takahiro wrote:
>> On 03/14/2014 02:08 AM, Will Deacon wrote:
>>> On Thu, Mar 13, 2014 at 10:13:47AM +0000, AKASHI Takahiro wrote:
>>>> This patch implements arm64 specific part to support function tracers,
>>>> such as function (CONFIG_FUNCTION_TRACER), function_graph
>>>> (CONFIG_FUNCTION_GRAPH_TRACER) and function profiler
>>>> (CONFIG_FUNCTION_PROFILER).
>>>>
>>>> With 'function' tracer, all the functions in the kernel are traced with
>>>> timestamps in ${sysfs}/tracing/trace. If function_graph tracer is
>>>> specified, call graph is generated.
>>>>
>>>> The kernel must be compiled with -pg option so that _mcount() is inserted
>>>> at the beginning of functions. This function is called on every function's
>>>> entry as long as tracing is enabled.
>>>> In addition, function_graph tracer also needs to be able to probe function's
>>>> exit. ftrace_graph_caller() & return_to_handler do this by faking link
>>>> register's value to intercept function's return path.
>>>>
>>>> More details on architecture specific requirements are described in
>>>> Documentation/trace/ftrace-design.txt.
>>>
>>> [...]
>>
>> You seem not to like this statement :-)
>
> Huh? I was just using '[...]' to signify that I'd removed a lot of context
> (since the code all looked fine there). Otherwise, review emails can get
> really long.

Please ignore my comment.

>>>> +       str     x0, [sp, #-16]!
>>>> +       mov     x0, x29                 //     parent's fp
>>>> +       bl      ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
>>>> +       mov     x30, x0                 // restore the original return address
>>>> +       ldr     x0, [sp], #16
>>>> +       ret
>>>
>>> and an ENDPROC here.
>>
>> Fix it.
>> But please note that this (return_to_handler) is not a real function.
>> Declaring it as ENDPROC is not very useful.
>
> Then use END(...). The opening macro was the main point, since that avoid
> the need for an explicit .globl.

Fix it.

-Takahiro AKASHI

> Will
>
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 340e344..6b3fef6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -35,6 +35,8 @@  config ARM64
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_MEMBLOCK
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
new file mode 100644
index 0000000..58ea595
--- /dev/null
+++ b/arch/arm64/include/asm/ftrace.h
@@ -0,0 +1,23 @@ 
+/*
+ * arch/arm64/include/asm/ftrace.h
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_FTRACE_H
+#define __ASM_FTRACE_H
+
+#include <asm/insn.h>
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
+
+#ifndef __ASSEMBLY__
+extern void _mcount(unsigned long);
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_FTRACE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d4554b..ac67fd0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -5,6 +5,9 @@ 
 CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_insn.o = -pg
+
 # Object file lists.
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
@@ -13,6 +16,7 @@  arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
+arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index 338b568..7f0512f 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -56,3 +56,7 @@  EXPORT_SYMBOL(clear_bit);
 EXPORT_SYMBOL(test_and_clear_bit);
 EXPORT_SYMBOL(change_bit);
 EXPORT_SYMBOL(test_and_change_bit);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
new file mode 100644
index 0000000..0ac31c8
--- /dev/null
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -0,0 +1,175 @@ 
+/*
+ * arch/arm64/kernel/entry-ftrace.S
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+/*
+ * Gcc with -pg will put the following code in the beginning of each function:
+ *      mov x0, x30
+ *      bl _mcount
+ *	[function's body ...]
+ * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic
+ * ftrace is enabled.
+ *
+ * Please note that x0 as an argument will not be used here because we can
+ * get lr(x30) of instrumented function at any time by winding up call stack
+ * as long as the kernel is compiled without -fomit-frame-pointer.
+ * (or CONFIG_FRAME_POINTER, this is forced on arm64)
+ *
+ * stack layout after mcount_enter in _mcount():
+ *
+ * current sp/fp =>  0:+-----+
+ * in _mcount()	       | x29 | -> instrumented function's fp
+ *		       +-----+
+ *		       | x30 | -> _mcount()'s lr (= instrumented function's pc)
+ * old sp	=> +16:+-----+
+ * when instrumented   |     |
+ * function calls      | ... |
+ * _mcount()	       |     |
+ *		       |     |
+ * instrumented => +xx:+-----+
+ * function's fp       | x29 | -> parent's fp
+ *		       +-----+
+ *		       | x30 | -> instrumented function's lr (= parent's pc)
+ *		       +-----+
+ *		       | ... |
+ */
+
+	.macro mcount_enter
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+	.endm
+
+	.macro mcount_exit
+	ldp	x29, x30, [sp], #16
+	ret
+	.endm
+
+	.macro mcount_adjust_addr rd, rn
+	sub	\rd, \rn, #AARCH64_INSN_SIZE
+	.endm
+
+	/* for instrumented function's parent */
+	.macro mcount_get_parent_fp reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg]
+	.endm
+
+	/* for instrumented function */
+	.macro mcount_get_pc0 reg
+	mcount_adjust_addr	\reg, x30
+	.endm
+
+	.macro mcount_get_pc reg
+	ldr	\reg, [x29, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr reg
+	ldr	\reg, [x29]
+	ldr	\reg, [\reg, #8]
+	mcount_adjust_addr	\reg, \reg
+	.endm
+
+	.macro mcount_get_lr_addr reg
+	ldr	\reg, [x29]
+	add	\reg, \reg, #8
+	.endm
+
+/*
+ * void _mcount(unsigned long return_address)
+ * @return_address: return address to instrumented function
+ *
+ * This function makes calls, if enabled, to:
+ *     - tracer function to probe instrumented function's entry,
+ *     - ftrace_graph_caller to set up an exit hook
+ */
+ENTRY(_mcount)
+#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	ldr	x0, =ftrace_trace_stop
+	ldr	x0, [x0]		// if ftrace_trace_stop
+	ret				//   return;
+#endif
+	mcount_enter
+
+	ldr	x0, =ftrace_trace_function
+	ldr	x2, [x0]
+	adr	x0, ftrace_stub
+	cmp	x0, x2			// if (ftrace_trace_function
+	b.eq	skip_ftrace_call	//     != ftrace_stub) {
+
+	mcount_get_pc	x0		//       function's pc
+	mcount_get_lr	x1		//       function's lr (= parent's pc)
+	blr	x2			//   (*ftrace_trace_function)(pc, lr);
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+skip_ftrace_call:			//   return;
+	mcount_exit			// }
+#else
+	mcount_exit			//   return;
+					// }
+skip_ftrace_call:
+	ldr	x1, =ftrace_graph_return
+	ldr	x2, [x1]		//   if ((ftrace_graph_return
+	cmp	x0, x2			//        != ftrace_stub)
+	b.ne	ftrace_graph_caller
+
+	ldr	x1, =ftrace_graph_entry	//     || (ftrace_graph_entry
+	ldr	x2, [x1]		//        != ftrace_graph_entry_stub))
+	ldr	x0, =ftrace_graph_entry_stub
+	cmp	x0, x2
+	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
+
+	mcount_exit
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ENDPROC(_mcount)
+
+ENTRY(ftrace_stub)
+	ret
+ENDPROC(ftrace_stub)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * void ftrace_graph_caller(void)
+ *
+ * Called from _mcount() or ftrace_caller() when function_graph tracer is
+ * selected.
+ * This function w/ prepare_ftrace_return() fakes link register's value on
+ * the call stack in order to intercept instrumented function's return path
+ * and run return_to_handler() later on its exit.
+ */
+ENTRY(ftrace_graph_caller)
+	mcount_get_lr_addr	  x0	//     pointer to function's saved lr
+	mcount_get_pc		  x1	//     function's pc
+	mcount_get_parent_fp	  x2	//     parent's fp
+	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp)
+
+	mcount_exit
+ENDPROC(ftrace_graph_caller)
+
+/*
+ * void return_to_handler(void)
+ *
+ * Run ftrace_return_to_handler() before going back to parent.
+ * @fp is checked against the value passed by ftrace_graph_caller()
+ * only when CONFIG_FUNCTION_GRAPH_FP_TEST is enabled.
+ */
+	.global return_to_handler
+return_to_handler:
+	str	x0, [sp, #-16]!
+	mov	x0, x29			//     parent's fp
+	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
+	mov	x30, x0			// restore the original return address
+	ldr	x0, [sp], #16
+	ret
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
new file mode 100644
index 0000000..a559ab8
--- /dev/null
+++ b/arch/arm64/kernel/ftrace.c
@@ -0,0 +1,64 @@ 
+/*
+ * arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2013 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/swab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * function_graph tracer expects ftrace_return_to_handler() to be called
+ * on the way back to parent. For this purpose, this function is called
+ * in _mcount() or ftrace_caller() to replace return address (*parent) on
+ * the call stack to return_to_handler.
+ *
+ * Note that @frame_pointer is used only for sanity check later.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	unsigned long old;
+	struct ftrace_graph_ent trace;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * Note:
+	 * No protection against faulting at *parent, which may be seen
+	 * on other archs. It's unlikely on AArch64.
+	 */
+	old = *parent;
+	*parent = return_hooker;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace)) {
+		*parent = old;
+		return;
+	}
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer);
+	if (err == -EBUSY) {
+		*parent = old;
+		return;
+	}
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */