diff mbox series

[v2,2/4] perf,uprobes: fix user stack traces in the presence of pending uretprobes

Message ID 20240522013845.1631305-3-andrii@kernel.org (mailing list archive)
State Handled Elsewhere, archived
Delegated to: BPF
Headers show
Series Fix user stack traces captured from uprobes | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Andrii Nakryiko May 22, 2024, 1:38 a.m. UTC
When kernel has pending uretprobes installed, it hijacks original user
function return address on the stack with a uretprobe trampoline
address. There could be multiple such pending uretprobes (either on
different user functions or on the same recursive one) at any given
time within the same task.

This approach interferes with the user stack trace capture logic, which
would report suprising addresses (like 0x7fffffffe000) that correspond
to a special "[uprobes]" section that kernel installs in the target
process address space for uretprobe trampoline code, while logically it
should be an address somewhere within the calling function of another
traced user function.

This is easy to correct for, though. Uprobes subsystem keeps track of
pending uretprobes and records original return addresses. This patch is
using this to do a post-processing step and restore each trampoline
address entries with correct original return address. This is done only
if there are pending uretprobes for current task.

This is a similar approach to what fprobe/kretprobe infrastructure is
doing when capturing kernel stack traces in the presence of pending
return probes.

Reported-by: Riham Selim <rihams@meta.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
 kernel/events/uprobes.c   |  9 ++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

Comments

Masami Hiramatsu (Google) June 4, 2024, 2:13 p.m. UTC | #1
On Tue, 21 May 2024 18:38:43 -0700
Andrii Nakryiko <andrii@kernel.org> wrote:

> When kernel has pending uretprobes installed, it hijacks original user
> function return address on the stack with a uretprobe trampoline
> address. There could be multiple such pending uretprobes (either on
> different user functions or on the same recursive one) at any given
> time within the same task.
> 
> This approach interferes with the user stack trace capture logic, which
> would report suprising addresses (like 0x7fffffffe000) that correspond
> to a special "[uprobes]" section that kernel installs in the target
> process address space for uretprobe trampoline code, while logically it
> should be an address somewhere within the calling function of another
> traced user function.
> 
> This is easy to correct for, though. Uprobes subsystem keeps track of
> pending uretprobes and records original return addresses. This patch is
> using this to do a post-processing step and restore each trampoline
> address entries with correct original return address. This is done only
> if there are pending uretprobes for current task.
> 
> This is a similar approach to what fprobe/kretprobe infrastructure is
> doing when capturing kernel stack traces in the presence of pending
> return probes.
> 

This looks good to me because this trampoline information is only
managed in uprobes. And it should be provided when unwinding user
stack.

Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Thank you!

> Reported-by: Riham Selim <rihams@meta.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
>  kernel/events/uprobes.c   |  9 ++++++++
>  2 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
> index 1273be84392c..b17e3323f7f6 100644
> --- a/kernel/events/callchain.c
> +++ b/kernel/events/callchain.c
> @@ -11,6 +11,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/slab.h>
>  #include <linux/sched/task_stack.h>
> +#include <linux/uprobes.h>
>  
>  #include "internal.h"
>  
> @@ -176,13 +177,51 @@ put_callchain_entry(int rctx)
>  	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
>  }
>  
> +static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry,
> +					       int start_entry_idx)
> +{
> +#ifdef CONFIG_UPROBES
> +	struct uprobe_task *utask = current->utask;
> +	struct return_instance *ri;
> +	__u64 *cur_ip, *last_ip, tramp_addr;
> +
> +	if (likely(!utask || !utask->return_instances))
> +		return;
> +
> +	cur_ip = &entry->ip[start_entry_idx];
> +	last_ip = &entry->ip[entry->nr - 1];
> +	ri = utask->return_instances;
> +	tramp_addr = uprobe_get_trampoline_vaddr();
> +
> +	/*
> +	 * If there are pending uretprobes for the current thread, they are
> +	 * recorded in a list inside utask->return_instances; each such
> +	 * pending uretprobe replaces traced user function's return address on
> +	 * the stack, so when stack trace is captured, instead of seeing
> +	 * actual function's return address, we'll have one or many uretprobe
> +	 * trampoline addresses in the stack trace, which are not helpful and
> +	 * misleading to users.
> +	 * So here we go over the pending list of uretprobes, and each
> +	 * encountered trampoline address is replaced with actual return
> +	 * address.
> +	 */
> +	while (ri && cur_ip <= last_ip) {
> +		if (*cur_ip == tramp_addr) {
> +			*cur_ip = ri->orig_ret_vaddr;
> +			ri = ri->next;
> +		}
> +		cur_ip++;
> +	}
> +#endif
> +}
> +
>  struct perf_callchain_entry *
>  get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
>  		   u32 max_stack, bool crosstask, bool add_mark)
>  {
>  	struct perf_callchain_entry *entry;
>  	struct perf_callchain_entry_ctx ctx;
> -	int rctx;
> +	int rctx, start_entry_idx;
>  
>  	entry = get_callchain_entry(&rctx);
>  	if (!entry)
> @@ -215,7 +254,9 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
>  			if (add_mark)
>  				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
>  
> +			start_entry_idx = entry->nr;
>  			perf_callchain_user(&ctx, regs);
> +			fixup_uretprobe_trampoline_entries(entry, start_entry_idx);
>  		}
>  	}
>  
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index d60d24f0f2f4..1c99380dc89d 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -2149,6 +2149,15 @@ static void handle_trampoline(struct pt_regs *regs)
>  
>  		instruction_pointer_set(regs, ri->orig_ret_vaddr);
>  		do {
> +			/* pop current instance from the stack of pending return instances,
> +			 * as it's not pending anymore: we just fixed up original
> +			 * instruction pointer in regs and are about to call handlers;
> +			 * this allows fixup_uretprobe_trampoline_entries() to properly fix up
> +			 * captured stack traces from uretprobe handlers, in which pending
> +			 * trampoline addresses on the stack are replaced with correct
> +			 * original return addresses
> +			 */
> +			utask->return_instances = ri->next;
>  			if (valid)
>  				handle_uretprobe_chain(ri, regs);
>  			ri = free_ret_instance(ri);
> -- 
> 2.43.0
>
Andrii Nakryiko June 4, 2024, 5:16 p.m. UTC | #2
On Tue, Jun 4, 2024 at 7:13 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Tue, 21 May 2024 18:38:43 -0700
> Andrii Nakryiko <andrii@kernel.org> wrote:
>
> > When kernel has pending uretprobes installed, it hijacks original user
> > function return address on the stack with a uretprobe trampoline
> > address. There could be multiple such pending uretprobes (either on
> > different user functions or on the same recursive one) at any given
> > time within the same task.
> >
> > This approach interferes with the user stack trace capture logic, which
> > would report suprising addresses (like 0x7fffffffe000) that correspond
> > to a special "[uprobes]" section that kernel installs in the target
> > process address space for uretprobe trampoline code, while logically it
> > should be an address somewhere within the calling function of another
> > traced user function.
> >
> > This is easy to correct for, though. Uprobes subsystem keeps track of
> > pending uretprobes and records original return addresses. This patch is
> > using this to do a post-processing step and restore each trampoline
> > address entries with correct original return address. This is done only
> > if there are pending uretprobes for current task.
> >
> > This is a similar approach to what fprobe/kretprobe infrastructure is
> > doing when capturing kernel stack traces in the presence of pending
> > return probes.
> >
>
> This looks good to me because this trampoline information is only
> managed in uprobes. And it should be provided when unwinding user
> stack.
>
> Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
>
> Thank you!

Great, thanks for reviewing, Masami!

Would you take this fix through your tree, or where should it be routed to?

>
> > Reported-by: Riham Selim <rihams@meta.com>
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
> >  kernel/events/uprobes.c   |  9 ++++++++
> >  2 files changed, 51 insertions(+), 1 deletion(-)
> >

[...]
Andrii Nakryiko June 17, 2024, 10:37 p.m. UTC | #3
On Tue, Jun 4, 2024 at 10:16 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jun 4, 2024 at 7:13 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> >
> > On Tue, 21 May 2024 18:38:43 -0700
> > Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > > When kernel has pending uretprobes installed, it hijacks original user
> > > function return address on the stack with a uretprobe trampoline
> > > address. There could be multiple such pending uretprobes (either on
> > > different user functions or on the same recursive one) at any given
> > > time within the same task.
> > >
> > > This approach interferes with the user stack trace capture logic, which
> > > would report suprising addresses (like 0x7fffffffe000) that correspond
> > > to a special "[uprobes]" section that kernel installs in the target
> > > process address space for uretprobe trampoline code, while logically it
> > > should be an address somewhere within the calling function of another
> > > traced user function.
> > >
> > > This is easy to correct for, though. Uprobes subsystem keeps track of
> > > pending uretprobes and records original return addresses. This patch is
> > > using this to do a post-processing step and restore each trampoline
> > > address entries with correct original return address. This is done only
> > > if there are pending uretprobes for current task.
> > >
> > > This is a similar approach to what fprobe/kretprobe infrastructure is
> > > doing when capturing kernel stack traces in the presence of pending
> > > return probes.
> > >
> >
> > This looks good to me because this trampoline information is only
> > managed in uprobes. And it should be provided when unwinding user
> > stack.
> >
> > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> >
> > Thank you!
>
> Great, thanks for reviewing, Masami!
>
> Would you take this fix through your tree, or where should it be routed to?
>

Ping! What would you like me to do with this patch set? Should I
resend it without patch 3 (the one that tries to guess whether we are
at the entry to the function?), or did I manage to convince you that
this heuristic is OK, given perf's stack trace capturing logic already
makes heavy assumption of rbp register being used for frame pointer?

Please let me know your preference, I could drop patch 3 and send it
separately, if that helps move the main fix forward. Thanks!

> >
> > > Reported-by: Riham Selim <rihams@meta.com>
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
> > >  kernel/events/uprobes.c   |  9 ++++++++
> > >  2 files changed, 51 insertions(+), 1 deletion(-)
> > >
>
> [...]
Andrii Nakryiko June 24, 2024, 8:32 p.m. UTC | #4
On Mon, Jun 17, 2024 at 3:37 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Jun 4, 2024 at 10:16 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Jun 4, 2024 at 7:13 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > >
> > > On Tue, 21 May 2024 18:38:43 -0700
> > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > >
> > > > When kernel has pending uretprobes installed, it hijacks original user
> > > > function return address on the stack with a uretprobe trampoline
> > > > address. There could be multiple such pending uretprobes (either on
> > > > different user functions or on the same recursive one) at any given
> > > > time within the same task.
> > > >
> > > > This approach interferes with the user stack trace capture logic, which
> > > > would report suprising addresses (like 0x7fffffffe000) that correspond
> > > > to a special "[uprobes]" section that kernel installs in the target
> > > > process address space for uretprobe trampoline code, while logically it
> > > > should be an address somewhere within the calling function of another
> > > > traced user function.
> > > >
> > > > This is easy to correct for, though. Uprobes subsystem keeps track of
> > > > pending uretprobes and records original return addresses. This patch is
> > > > using this to do a post-processing step and restore each trampoline
> > > > address entries with correct original return address. This is done only
> > > > if there are pending uretprobes for current task.
> > > >
> > > > This is a similar approach to what fprobe/kretprobe infrastructure is
> > > > doing when capturing kernel stack traces in the presence of pending
> > > > return probes.
> > > >
> > >
> > > This looks good to me because this trampoline information is only
> > > managed in uprobes. And it should be provided when unwinding user
> > > stack.
> > >
> > > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > >
> > > Thank you!
> >
> > Great, thanks for reviewing, Masami!
> >
> > Would you take this fix through your tree, or where should it be routed to?
> >
>
> Ping! What would you like me to do with this patch set? Should I
> resend it without patch 3 (the one that tries to guess whether we are
> at the entry to the function?), or did I manage to convince you that
> this heuristic is OK, given perf's stack trace capturing logic already
> makes heavy assumption of rbp register being used for frame pointer?
>
> Please let me know your preference, I could drop patch 3 and send it
> separately, if that helps move the main fix forward. Thanks!

Masami,

Another week went by with absolutely no action or reaction from you.
Is there any way I can help improve the collaboration here?

I'm preparing more patches for uprobes and about to submit them. If
each reviewed and ready to be applied patch set has to sit idle for
multiple weeks for no good reason, we all will soon be lost just plain
forgetting the context in which the patch was prepared.

Please, prioritize handling patches that are meant to be routed
through your tree in a more timely fashion. Or propose some
alternative acceptable arrangement.

Thank you.

>
> > >
> > > > Reported-by: Riham Selim <rihams@meta.com>
> > > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > > ---
> > > >  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
> > > >  kernel/events/uprobes.c   |  9 ++++++++
> > > >  2 files changed, 51 insertions(+), 1 deletion(-)
> > > >
> >
> > [...]
Masami Hiramatsu (Google) June 25, 2024, 12:39 a.m. UTC | #5
On Mon, 24 Jun 2024 13:32:35 -0700
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:

> On Mon, Jun 17, 2024 at 3:37 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Jun 4, 2024 at 10:16 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Tue, Jun 4, 2024 at 7:13 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > >
> > > > On Tue, 21 May 2024 18:38:43 -0700
> > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > >
> > > > > When kernel has pending uretprobes installed, it hijacks original user
> > > > > function return address on the stack with a uretprobe trampoline
> > > > > address. There could be multiple such pending uretprobes (either on
> > > > > different user functions or on the same recursive one) at any given
> > > > > time within the same task.
> > > > >
> > > > > This approach interferes with the user stack trace capture logic, which
> > > > > would report suprising addresses (like 0x7fffffffe000) that correspond
> > > > > to a special "[uprobes]" section that kernel installs in the target
> > > > > process address space for uretprobe trampoline code, while logically it
> > > > > should be an address somewhere within the calling function of another
> > > > > traced user function.
> > > > >
> > > > > This is easy to correct for, though. Uprobes subsystem keeps track of
> > > > > pending uretprobes and records original return addresses. This patch is
> > > > > using this to do a post-processing step and restore each trampoline
> > > > > address entries with correct original return address. This is done only
> > > > > if there are pending uretprobes for current task.
> > > > >
> > > > > This is a similar approach to what fprobe/kretprobe infrastructure is
> > > > > doing when capturing kernel stack traces in the presence of pending
> > > > > return probes.
> > > > >
> > > >
> > > > This looks good to me because this trampoline information is only
> > > > managed in uprobes. And it should be provided when unwinding user
> > > > stack.
> > > >
> > > > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > >
> > > > Thank you!
> > >
> > > Great, thanks for reviewing, Masami!
> > >
> > > Would you take this fix through your tree, or where should it be routed to?
> > >
> >
> > Ping! What would you like me to do with this patch set? Should I
> > resend it without patch 3 (the one that tries to guess whether we are
> > at the entry to the function?), or did I manage to convince you that
> > this heuristic is OK, given perf's stack trace capturing logic already
> > makes heavy assumption of rbp register being used for frame pointer?
> >
> > Please let me know your preference, I could drop patch 3 and send it
> > separately, if that helps move the main fix forward. Thanks!
> 
> Masami,
> 
> Another week went by with absolutely no action or reaction from you.
> Is there any way I can help improve the collaboration here?

OK, if there is no change without [3/4], let me pick the others on
probes/for-next directly. [3/4] I need other x86 maintainer's
comments. And it should be handled by PMU maintainers.

Thanks,


> 
> I'm preparing more patches for uprobes and about to submit them. If
> each reviewed and ready to be applied patch set has to sit idle for
> multiple weeks for no good reason, we all will soon be lost just plain
> forgetting the context in which the patch was prepared.
> 
> Please, prioritize handling patches that are meant to be routed
> through your tree in a more timely fashion. Or propose some
> alternative acceptable arrangement.
> 
> Thank you.
> 
> >
> > > >
> > > > > Reported-by: Riham Selim <rihams@meta.com>
> > > > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > > > ---
> > > > >  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
> > > > >  kernel/events/uprobes.c   |  9 ++++++++
> > > > >  2 files changed, 51 insertions(+), 1 deletion(-)
> > > > >
> > >
> > > [...]
Andrii Nakryiko June 25, 2024, 2:51 a.m. UTC | #6
On Mon, Jun 24, 2024 at 5:39 PM Masami Hiramatsu <mhiramat@kernel.org> wrote:
>
> On Mon, 24 Jun 2024 13:32:35 -0700
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> > On Mon, Jun 17, 2024 at 3:37 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Tue, Jun 4, 2024 at 10:16 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Tue, Jun 4, 2024 at 7:13 AM Masami Hiramatsu <mhiramat@kernel.org> wrote:
> > > > >
> > > > > On Tue, 21 May 2024 18:38:43 -0700
> > > > > Andrii Nakryiko <andrii@kernel.org> wrote:
> > > > >
> > > > > > When kernel has pending uretprobes installed, it hijacks original user
> > > > > > function return address on the stack with a uretprobe trampoline
> > > > > > address. There could be multiple such pending uretprobes (either on
> > > > > > different user functions or on the same recursive one) at any given
> > > > > > time within the same task.
> > > > > >
> > > > > > This approach interferes with the user stack trace capture logic, which
> > > > > > would report suprising addresses (like 0x7fffffffe000) that correspond
> > > > > > to a special "[uprobes]" section that kernel installs in the target
> > > > > > process address space for uretprobe trampoline code, while logically it
> > > > > > should be an address somewhere within the calling function of another
> > > > > > traced user function.
> > > > > >
> > > > > > This is easy to correct for, though. Uprobes subsystem keeps track of
> > > > > > pending uretprobes and records original return addresses. This patch is
> > > > > > using this to do a post-processing step and restore each trampoline
> > > > > > address entries with correct original return address. This is done only
> > > > > > if there are pending uretprobes for current task.
> > > > > >
> > > > > > This is a similar approach to what fprobe/kretprobe infrastructure is
> > > > > > doing when capturing kernel stack traces in the presence of pending
> > > > > > return probes.
> > > > > >
> > > > >
> > > > > This looks good to me because this trampoline information is only
> > > > > managed in uprobes. And it should be provided when unwinding user
> > > > > stack.
> > > > >
> > > > > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > >
> > > > > Thank you!
> > > >
> > > > Great, thanks for reviewing, Masami!
> > > >
> > > > Would you take this fix through your tree, or where should it be routed to?
> > > >
> > >
> > > Ping! What would you like me to do with this patch set? Should I
> > > resend it without patch 3 (the one that tries to guess whether we are
> > > at the entry to the function?), or did I manage to convince you that
> > > this heuristic is OK, given perf's stack trace capturing logic already
> > > makes heavy assumption of rbp register being used for frame pointer?
> > >
> > > Please let me know your preference, I could drop patch 3 and send it
> > > separately, if that helps move the main fix forward. Thanks!
> >
> > Masami,
> >
> > Another week went by with absolutely no action or reaction from you.
> > Is there any way I can help improve the collaboration here?
>
> OK, if there is no change without [3/4], let me pick the others on

Thanks, Masami!

Selftest is probably failing (as it expects correct stack trace), but
that's ok, we can fix it up once linux-trace-kernel and bpf-next trees
converge.

> probes/for-next directly. [3/4] I need other x86 maintainer's
> comments. And it should be handled by PMU maintainers.

Sounds good, I'll repost it separately. Do I need to CC anyone else
besides people on this thread already?

>
> Thanks,
>
>
> >
> > I'm preparing more patches for uprobes and about to submit them. If
> > each reviewed and ready to be applied patch set has to sit idle for
> > multiple weeks for no good reason, we all will soon be lost just plain
> > forgetting the context in which the patch was prepared.
> >
> > Please, prioritize handling patches that are meant to be routed
> > through your tree in a more timely fashion. Or propose some
> > alternative acceptable arrangement.
> >
> > Thank you.
> >
> > >
> > > > >
> > > > > > Reported-by: Riham Selim <rihams@meta.com>
> > > > > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > > > > ---
> > > > > >  kernel/events/callchain.c | 43 ++++++++++++++++++++++++++++++++++++++-
> > > > > >  kernel/events/uprobes.c   |  9 ++++++++
> > > > > >  2 files changed, 51 insertions(+), 1 deletion(-)
> > > > > >
> > > >
> > > > [...]
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
diff mbox series

Patch

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 1273be84392c..b17e3323f7f6 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -11,6 +11,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/slab.h>
 #include <linux/sched/task_stack.h>
+#include <linux/uprobes.h>
 
 #include "internal.h"
 
@@ -176,13 +177,51 @@  put_callchain_entry(int rctx)
 	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
 }
 
+static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry,
+					       int start_entry_idx)
+{
+#ifdef CONFIG_UPROBES
+	struct uprobe_task *utask = current->utask;
+	struct return_instance *ri;
+	__u64 *cur_ip, *last_ip, tramp_addr;
+
+	if (likely(!utask || !utask->return_instances))
+		return;
+
+	cur_ip = &entry->ip[start_entry_idx];
+	last_ip = &entry->ip[entry->nr - 1];
+	ri = utask->return_instances;
+	tramp_addr = uprobe_get_trampoline_vaddr();
+
+	/*
+	 * If there are pending uretprobes for the current thread, they are
+	 * recorded in a list inside utask->return_instances; each such
+	 * pending uretprobe replaces traced user function's return address on
+	 * the stack, so when stack trace is captured, instead of seeing
+	 * actual function's return address, we'll have one or many uretprobe
+	 * trampoline addresses in the stack trace, which are not helpful and
+	 * misleading to users.
+	 * So here we go over the pending list of uretprobes, and each
+	 * encountered trampoline address is replaced with actual return
+	 * address.
+	 */
+	while (ri && cur_ip <= last_ip) {
+		if (*cur_ip == tramp_addr) {
+			*cur_ip = ri->orig_ret_vaddr;
+			ri = ri->next;
+		}
+		cur_ip++;
+	}
+#endif
+}
+
 struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark)
 {
 	struct perf_callchain_entry *entry;
 	struct perf_callchain_entry_ctx ctx;
-	int rctx;
+	int rctx, start_entry_idx;
 
 	entry = get_callchain_entry(&rctx);
 	if (!entry)
@@ -215,7 +254,9 @@  get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 			if (add_mark)
 				perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
 
+			start_entry_idx = entry->nr;
 			perf_callchain_user(&ctx, regs);
+			fixup_uretprobe_trampoline_entries(entry, start_entry_idx);
 		}
 	}
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index d60d24f0f2f4..1c99380dc89d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2149,6 +2149,15 @@  static void handle_trampoline(struct pt_regs *regs)
 
 		instruction_pointer_set(regs, ri->orig_ret_vaddr);
 		do {
+			/* pop current instance from the stack of pending return instances,
+			 * as it's not pending anymore: we just fixed up original
+			 * instruction pointer in regs and are about to call handlers;
+			 * this allows fixup_uretprobe_trampoline_entries() to properly fix up
+			 * captured stack traces from uretprobe handlers, in which pending
+			 * trampoline addresses on the stack are replaced with correct
+			 * original return addresses
+			 */
+			utask->return_instances = ri->next;
 			if (valid)
 				handle_uretprobe_chain(ri, regs);
 			ri = free_ret_instance(ri);