diff mbox series

[v2] ARM: stacktrace: Add USER_STACKTRACE support

Message ID 20240730021532.1752582-1-ruanjinjie@huawei.com (mailing list archive)
State New, archived
Headers show
Series [v2] ARM: stacktrace: Add USER_STACKTRACE support | expand

Commit Message

Jinjie Ruan July 30, 2024, 2:15 a.m. UTC
Currently, userstacktrace is unsupported for ARM. So use the
perf_callchain_user() code as blueprint to implement the
arch_stack_walk_user() which add userstacktrace support on ARM.
Meanwhile, we can use arch_stack_walk_user() to simplify the implementation
of perf_callchain_user().

A ftrace test case is shown as below:
	# cd /sys/kernel/debug/tracing
	# echo 1 > options/userstacktrace
	# echo 1 > options/sym-userobj
	# echo 1 > events/sched/sched_process_fork/enable
	# cat trace

	......
	              sh-100     [000] .....    51.779261: sched_process_fork: comm=sh pid=100 child_comm=sh child_pid=108
	              sh-100     [000] .....    51.779285: <user stack trace>
	 => /lib/libc.so.6[+0xb3c8c]
	 => /bin/busybox[+0xffb901f1]

Also a simple perf test is ok as below:
	# perf record -e cpu-clock --call-graph fp top
	# perf report --call-graph

	.....
	  [[31m  65.00%[[m     0.00%  top      [kernel.kallsyms]  [k] __ret_fast_syscall

	            |
	            ---__ret_fast_syscall
	               |
	               |--[[31m30.00%[[m--__se_sys_getdents64
	               |          iterate_dir
	               |          |
	               |          |--[[31m25.00%[[m--proc_pid_readdir

Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
---
v2:
- Update the wrong patch title.
- Take off the merged bugfix patch.
- Remove the Tested-by.
---
 arch/arm/Kconfig                 |  1 +
 arch/arm/kernel/perf_callchain.c | 70 +++-----------------------------
 arch/arm/kernel/stacktrace.c     | 65 +++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 64 deletions(-)

Comments

Russell King (Oracle) Aug. 2, 2024, 11:48 a.m. UTC | #1
On Tue, Jul 30, 2024 at 10:15:32AM +0800, Jinjie Ruan wrote:
> Currently, userstacktrace is unsupported for ARM. So use the
> perf_callchain_user() code as blueprint to implement the
> arch_stack_walk_user() which add userstacktrace support on ARM.
> Meanwhile, we can use arch_stack_walk_user() to simplify the implementation
> of perf_callchain_user().
> 
> A ftrace test case is shown as below:
> 	# cd /sys/kernel/debug/tracing
> 	# echo 1 > options/userstacktrace
> 	# echo 1 > options/sym-userobj
> 	# echo 1 > events/sched/sched_process_fork/enable
> 	# cat trace
> 
> 	......
> 	              sh-100     [000] .....    51.779261: sched_process_fork: comm=sh pid=100 child_comm=sh child_pid=108
> 	              sh-100     [000] .....    51.779285: <user stack trace>
> 	 => /lib/libc.so.6[+0xb3c8c]
> 	 => /bin/busybox[+0xffb901f1]
> 
> Also a simple perf test is ok as below:
> 	# perf record -e cpu-clock --call-graph fp top
> 	# perf report --call-graph
> 
> 	.....
> 	  [[31m  65.00%[[m     0.00%  top      [kernel.kallsyms]  [k] __ret_fast_syscall
> 
> 	            |
> 	            ---__ret_fast_syscall
> 	               |
> 	               |--[[31m30.00%[[m--__se_sys_getdents64
> 	               |          iterate_dir
> 	               |          |
> 	               |          |--[[31m25.00%[[m--proc_pid_readdir
> 
> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>

Do you have a use case for this feature?

Given that userspace is free to do whatever it likes with stack frames,
I think this is going to be hit and miss whether it works.
Jinjie Ruan Aug. 12, 2024, 6:45 a.m. UTC | #2
On 2024/8/2 19:48, Russell King (Oracle) wrote:
> On Tue, Jul 30, 2024 at 10:15:32AM +0800, Jinjie Ruan wrote:
>> Currently, userstacktrace is unsupported for ARM. So use the
>> perf_callchain_user() code as blueprint to implement the
>> arch_stack_walk_user() which add userstacktrace support on ARM.
>> Meanwhile, we can use arch_stack_walk_user() to simplify the implementation
>> of perf_callchain_user().
>>
>> A ftrace test case is shown as below:
>> 	# cd /sys/kernel/debug/tracing
>> 	# echo 1 > options/userstacktrace
>> 	# echo 1 > options/sym-userobj
>> 	# echo 1 > events/sched/sched_process_fork/enable
>> 	# cat trace
>>
>> 	......
>> 	              sh-100     [000] .....    51.779261: sched_process_fork: comm=sh pid=100 child_comm=sh child_pid=108
>> 	              sh-100     [000] .....    51.779285: <user stack trace>
>> 	 => /lib/libc.so.6[+0xb3c8c]
>> 	 => /bin/busybox[+0xffb901f1]
>>
>> Also a simple perf test is ok as below:
>> 	# perf record -e cpu-clock --call-graph fp top
>> 	# perf report --call-graph
>>
>> 	.....
>> 	  [[31m  65.00%[[m     0.00%  top      [kernel.kallsyms]  [k] __ret_fast_syscall
>>
>> 	            |
>> 	            ---__ret_fast_syscall
>> 	               |
>> 	               |--[[31m30.00%[[m--__se_sys_getdents64
>> 	               |          iterate_dir
>> 	               |          |
>> 	               |          |--[[31m25.00%[[m--proc_pid_readdir
>>
>> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> 
> Do you have a use case for this feature?

To my knowledge, user stack trace is used in both uprobes and ftrace.

> 
> Given that userspace is free to do whatever it likes with stack frames,
> I think this is going to be hit and miss whether it works.

To be honest, I referred to the implementation of ARM64. Does anyone
have suggestions for improvements or modifications?

>
Russell King (Oracle) Aug. 12, 2024, 3:02 p.m. UTC | #3
On Mon, Aug 12, 2024 at 02:45:40PM +0800, Jinjie Ruan wrote:
> 
> 
> On 2024/8/2 19:48, Russell King (Oracle) wrote:
> > On Tue, Jul 30, 2024 at 10:15:32AM +0800, Jinjie Ruan wrote:
> >> Currently, userstacktrace is unsupported for ARM. So use the
> >> perf_callchain_user() code as blueprint to implement the
> >> arch_stack_walk_user() which add userstacktrace support on ARM.
> >> Meanwhile, we can use arch_stack_walk_user() to simplify the implementation
> >> of perf_callchain_user().
> >>
> >> A ftrace test case is shown as below:
> >> 	# cd /sys/kernel/debug/tracing
> >> 	# echo 1 > options/userstacktrace
> >> 	# echo 1 > options/sym-userobj
> >> 	# echo 1 > events/sched/sched_process_fork/enable
> >> 	# cat trace
> >>
> >> 	......
> >> 	              sh-100     [000] .....    51.779261: sched_process_fork: comm=sh pid=100 child_comm=sh child_pid=108
> >> 	              sh-100     [000] .....    51.779285: <user stack trace>
> >> 	 => /lib/libc.so.6[+0xb3c8c]
> >> 	 => /bin/busybox[+0xffb901f1]
> >>
> >> Also a simple perf test is ok as below:
> >> 	# perf record -e cpu-clock --call-graph fp top
> >> 	# perf report --call-graph
> >>
> >> 	.....
> >> 	  [[31m  65.00%[[m     0.00%  top      [kernel.kallsyms]  [k] __ret_fast_syscall
> >>
> >> 	            |
> >> 	            ---__ret_fast_syscall
> >> 	               |
> >> 	               |--[[31m30.00%[[m--__se_sys_getdents64
> >> 	               |          iterate_dir
> >> 	               |          |
> >> 	               |          |--[[31m25.00%[[m--proc_pid_readdir
> >>
> >> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
> > 
> > Do you have a use case for this feature?
> 
> To my knowledge, user stack trace is used in both uprobes and ftrace.
> 
> > 
> > Given that userspace is free to do whatever it likes with stack frames,
> > I think this is going to be hit and miss whether it works.
> 
> To be honest, I referred to the implementation of ARM64. Does anyone
> have suggestions for improvements or modifications?

So you're lifting code from Arm64 and dropping it into Arm32 in the hope
that it's suitable.

Here's a couple of examples - I've just used objdump on Debian Stable's
/bin/cat which contains functions where the prologue and epilogue are:

    1a2c:       b508            push    {r3, lr}
    ...
    1a56:       bd08            pop     {r3, pc}

    1de4:       b570            push    {r4, r5, r6, lr}
    ...
    1dea:       b084            sub     sp, #16
    ...
    1e18:       b004            add     sp, #16
    1e1a:       bd70            pop     {r4, r5, r6, pc}

These kinds of stack frames can not be unwound by the kernel - there
is no frame pointer there, and the only way it can be unwound is with
unwind information specific to the code objects concerned.

If I look at Arm64, then:

    26b0:       a9be7bfd        stp     x29, x30, [sp, #-32]!
    26b4:       910003fd        mov     x29, sp
...
    26f0:       a8c27bfd        ldp     x29, x30, [sp], #32
    26f4:       d65f03c0        ret

So, x29 appears to be frame pointer like, creating a linked list of
stack frames. If this is part of the Arm64 ABI, then yes, the kernel
can use the guarantee that user programs will have this stack structure
and thus can walk the stack.

However, as has been shown, this is not true of 32-bit Arm - there is
no guarantee that userspace has any regular structure to its stack
frames, and thus there is no guarantee that the stack frames can be
walked by the kernel.
diff mbox series

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 54b2bb817a7f..eb9a587935ef 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -152,6 +152,7 @@  config ARM
 	select HAVE_ARCH_VMAP_STACK if MMU && ARM_HAS_GROUP_RELOCS
 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	select USE_OF if !(ARCH_FOOTBRIDGE || ARCH_RPC || ARCH_SA1100)
+	select USER_STACKTRACE_SUPPORT
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
 	help
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
index 1d230ac9d0eb..cdb7aa31c6ec 100644
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -12,70 +12,6 @@ 
 
 #include <asm/stacktrace.h>
 
-/*
- * The registers we're interested in are at the end of the variable
- * length saved register structure. The fp points at the end of this
- * structure so the address of this struct is:
- * (struct frame_tail *)(xxx->fp)-1
- *
- * This code has been adapted from the ARM OProfile support.
- */
-struct frame_tail {
-	struct frame_tail __user *fp;
-	unsigned long sp;
-	unsigned long lr;
-} __attribute__((packed));
-
-/*
- * Get the return address for a single stackframe and return a pointer to the
- * next frame tail.
- */
-static struct frame_tail __user *
-user_backtrace(struct frame_tail __user *tail,
-	       struct perf_callchain_entry_ctx *entry)
-{
-	struct frame_tail buftail;
-	unsigned long err;
-
-	if (!access_ok(tail, sizeof(buftail)))
-		return NULL;
-
-	pagefault_disable();
-	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
-	pagefault_enable();
-
-	if (err)
-		return NULL;
-
-	perf_callchain_store(entry, buftail.lr);
-
-	/*
-	 * Frame pointers should strictly progress back up the stack
-	 * (towards higher addresses).
-	 */
-	if (tail + 1 >= buftail.fp)
-		return NULL;
-
-	return buftail.fp - 1;
-}
-
-void
-perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
-{
-	struct frame_tail __user *tail;
-
-	perf_callchain_store(entry, regs->ARM_pc);
-
-	if (!current->mm)
-		return;
-
-	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
-
-	while ((entry->nr < entry->max_stack) &&
-	       tail && !((unsigned long)tail & 0x3))
-		tail = user_backtrace(tail, entry);
-}
-
 /*
  * Gets called by walk_stackframe() for every stackframe. This will be called
  * whist unwinding the stackframe and is like a subroutine return so we use
@@ -88,6 +24,12 @@  callchain_trace(void *data, unsigned long pc)
 	return perf_callchain_store(entry, pc) == 0;
 }
 
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	arch_stack_walk_user(callchain_trace, entry, regs);
+}
+
 void
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 620aa82e3bdd..b744792755b5 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -194,4 +194,69 @@  void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 
 	walk_stackframe(&frame, consume_entry, cookie);
 }
+
+/*
+ * The registers we're interested in are at the end of the variable
+ * length saved register structure. The fp points at the end of this
+ * structure so the address of this struct is:
+ * (struct frame_tail *)(xxx->fp)-1
+ *
+ * This code has been adapted from the ARM OProfile support.
+ */
+struct frame_tail {
+	struct frame_tail __user *fp;
+	unsigned long sp;
+	unsigned long lr;
+} __packed;
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static struct frame_tail __user *
+unwind_user_frame(struct frame_tail __user *tail, void *cookie,
+		  stack_trace_consume_fn consume_entry)
+{
+	struct frame_tail buftail;
+	unsigned long err;
+
+	if (!access_ok(tail, sizeof(buftail)))
+		return NULL;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err)
+		return NULL;
+
+	if (!consume_entry(cookie, buftail.lr))
+		return NULL;
+
+	/*
+	 * Frame pointers should strictly progress back up the stack
+	 * (towards higher addresses).
+	 */
+	if (tail + 1 >= buftail.fp)
+		return NULL;
+
+	return buftail.fp - 1;
+}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+			  const struct pt_regs *regs)
+{
+	struct frame_tail __user *tail;
+
+	if (!consume_entry(cookie, regs->ARM_pc))
+		return;
+
+	if (!current->mm)
+		return;
+
+	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
+
+	while (tail && !((unsigned long)tail & 0x3))
+		tail = unwind_user_frame(tail, cookie, consume_entry);
+}
 #endif