diff mbox series

[PATCHv5,bpf-next,02/13] uprobe: Add support for session consumer

Message ID 20240929205717.3813648-3-jolsa@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series uprobe, bpf: Add session support | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 563 this patch: 563
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 12 maintainers not CCed: irogers@google.com adrian.hunter@intel.com kan.liang@linux.intel.com namhyung@kernel.org mingo@redhat.com linux-hardening@vger.kernel.org kees@kernel.org linux-perf-users@vger.kernel.org mark.rutland@arm.com gustavoars@kernel.org alexander.shishkin@linux.intel.com acme@kernel.org
netdev/build_clang success Errors and warnings before: 1046 this patch: 1046
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 14731 this patch: 14731
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: No space is necessary after a cast WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Jiri Olsa Sept. 29, 2024, 8:57 p.m. UTC
This change allows the uprobe consumer to behave as session which
means that 'handler' and 'ret_handler' callbacks are connected in
a way that allows to:

  - control execution of 'ret_handler' from 'handler' callback
  - share data between 'handler' and 'ret_handler' callbacks

The session concept fits to our common use case where we do filtering
on entry uprobe and based on the result we decide to run the return
uprobe (or not).

It's also convenient to share the data between session callbacks.

To achive this we are adding new return value the uprobe consumer
can return from 'handler' callback:

  UPROBE_HANDLER_IGNORE
  - Ignore 'ret_handler' callback for this consumer.

And store cookie and pass it to 'ret_handler' when consumer has both
'handler' and 'ret_handler' callbacks defined.

We store shared data in the return_consumer object array as part of
the return_instance object. This way the handle_uretprobe_chain can
find related return_consumer and its shared data.

We also store entry handler return value, for cases when there are
multiple consumers on single uprobe and some of them are ignored and
some of them not, in which case the return probe gets installed and
we need to have a way to find out which consumer needs to be ignored.

The tricky part is when consumer is registered 'after' the uprobe
entry handler is hit. In such case this consumer's 'ret_handler' gets
executed as well, but it won't have the proper data pointer set,
so we can filter it out.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/uprobes.h |  21 +++++-
 kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
 2 files changed, 137 insertions(+), 32 deletions(-)

Comments

Oleg Nesterov Sept. 30, 2024, 9:40 a.m. UTC | #1
Jiri,

LGTM. But I'm afraid you need to send v6, sorry ;)

This change has some (trivial) conflicts in prepare_uretprobe() with the
cleanups I sent yesterday, and Peter is going to queue them.

See https://lore.kernel.org/all/20240929144201.GA9429@redhat.com/

Oleg.

On 09/29, Jiri Olsa wrote:
>
> This change allows the uprobe consumer to behave as session which
> means that 'handler' and 'ret_handler' callbacks are connected in
> a way that allows to:
> 
>   - control execution of 'ret_handler' from 'handler' callback
>   - share data between 'handler' and 'ret_handler' callbacks
> 
> The session concept fits to our common use case where we do filtering
> on entry uprobe and based on the result we decide to run the return
> uprobe (or not).
> 
> It's also convenient to share the data between session callbacks.
> 
> To achive this we are adding new return value the uprobe consumer
> can return from 'handler' callback:
> 
>   UPROBE_HANDLER_IGNORE
>   - Ignore 'ret_handler' callback for this consumer.
> 
> And store cookie and pass it to 'ret_handler' when consumer has both
> 'handler' and 'ret_handler' callbacks defined.
> 
> We store shared data in the return_consumer object array as part of
> the return_instance object. This way the handle_uretprobe_chain can
> find related return_consumer and its shared data.
> 
> We also store entry handler return value, for cases when there are
> multiple consumers on single uprobe and some of them are ignored and
> some of them not, in which case the return probe gets installed and
> we need to have a way to find out which consumer needs to be ignored.
> 
> The tricky part is when consumer is registered 'after' the uprobe
> entry handler is hit. In such case this consumer's 'ret_handler' gets
> executed as well, but it won't have the proper data pointer set,
> so we can filter it out.
> 
> Suggested-by: Oleg Nesterov <oleg@redhat.com>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/uprobes.h |  21 +++++-
>  kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
>  2 files changed, 137 insertions(+), 32 deletions(-)
> 
> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index bb265a632b91..dbaf04189548 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -23,8 +23,17 @@ struct inode;
>  struct notifier_block;
>  struct page;
>  
> +/*
> + * Allowed return values from uprobe consumer's handler callback
> + * with following meaning:
> + *
> + * UPROBE_HANDLER_REMOVE
> + * - Remove the uprobe breakpoint from current->mm.
> + * UPROBE_HANDLER_IGNORE
> + * - Ignore ret_handler callback for this consumer.
> + */
>  #define UPROBE_HANDLER_REMOVE		1
> -#define UPROBE_HANDLER_MASK		1
> +#define UPROBE_HANDLER_IGNORE		2
>  
>  #define MAX_URETPROBE_DEPTH		64
>  
> @@ -44,6 +53,8 @@ struct uprobe_consumer {
>  	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
>  
>  	struct list_head cons_node;
> +
> +	__u64 id;	/* set when uprobe_consumer is registered */
>  };
>  
>  #ifdef CONFIG_UPROBES
> @@ -83,14 +94,22 @@ struct uprobe_task {
>  	unsigned int			depth;
>  };
>  
> +struct return_consumer {
> +	__u64	cookie;
> +	__u64	id;
> +};
> +
>  struct return_instance {
>  	struct uprobe		*uprobe;
>  	unsigned long		func;
>  	unsigned long		stack;		/* stack pointer */
>  	unsigned long		orig_ret_vaddr; /* original return address */
>  	bool			chained;	/* true, if instance is nested */
> +	int			consumers_cnt;
>  
>  	struct return_instance	*next;		/* keep as stack */
> +
> +	struct return_consumer	consumers[] __counted_by(consumers_cnt);
>  };
>  
>  enum rp_check {
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2ba93f8a31aa..76fe535c9b3c 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -65,7 +65,7 @@ struct uprobe {
>  	struct rcu_head		rcu;
>  	loff_t			offset;
>  	loff_t			ref_ctr_offset;
> -	unsigned long		flags;
> +	unsigned long		flags;		/* "unsigned long" so bitops work */
>  
>  	/*
>  	 * The generic code assumes that it has two members of unknown type
> @@ -825,8 +825,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
>  
>  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
> +	static atomic64_t id;
> +
>  	down_write(&uprobe->consumer_rwsem);
>  	list_add_rcu(&uc->cons_node, &uprobe->consumers);
> +	uc->id = (__u64) atomic64_inc_return(&id);
>  	up_write(&uprobe->consumer_rwsem);
>  }
>  
> @@ -1797,6 +1800,34 @@ static struct uprobe_task *get_utask(void)
>  	return current->utask;
>  }
>  
> +static size_t ri_size(int consumers_cnt)
> +{
> +	struct return_instance *ri;
> +
> +	return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
> +}
> +
> +#define DEF_CNT 4
> +
> +static struct return_instance *alloc_return_instance(void)
> +{
> +	struct return_instance *ri;
> +
> +	ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
> +	if (!ri)
> +		return ZERO_SIZE_PTR;
> +
> +	ri->consumers_cnt = DEF_CNT;
> +	return ri;
> +}
> +
> +static struct return_instance *dup_return_instance(struct return_instance *old)
> +{
> +	size_t size = ri_size(old->consumers_cnt);
> +
> +	return kmemdup(old, size, GFP_KERNEL);
> +}
> +
>  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>  {
>  	struct uprobe_task *n_utask;
> @@ -1809,11 +1840,10 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>  
>  	p = &n_utask->return_instances;
>  	for (o = o_utask->return_instances; o; o = o->next) {
> -		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> +		n = dup_return_instance(o);
>  		if (!n)
>  			return -ENOMEM;
>  
> -		*n = *o;
>  		/*
>  		 * uprobe's refcnt has to be positive at this point, kept by
>  		 * utask->return_instances items; return_instances can't be
> @@ -1906,39 +1936,35 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
>  	utask->return_instances = ri;
>  }
>  
> -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
> +			      struct return_instance *ri)
>  {
> -	struct return_instance *ri;
>  	struct uprobe_task *utask;
>  	unsigned long orig_ret_vaddr, trampoline_vaddr;
>  	bool chained;
>  
>  	if (!get_xol_area())
> -		return;
> +		goto free;
>  
>  	utask = get_utask();
>  	if (!utask)
> -		return;
> +		goto free;
>  
>  	if (utask->depth >= MAX_URETPROBE_DEPTH) {
>  		printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
>  				" nestedness limit pid/tgid=%d/%d\n",
>  				current->pid, current->tgid);
> -		return;
> +		goto free;
>  	}
>  
>  	/* we need to bump refcount to store uprobe in utask */
>  	if (!try_get_uprobe(uprobe))
> -		return;
> -
> -	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> -	if (!ri)
> -		goto fail;
> +		goto free;
>  
>  	trampoline_vaddr = uprobe_get_trampoline_vaddr();
>  	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
>  	if (orig_ret_vaddr == -1)
> -		goto fail;
> +		goto put;
>  
>  	/* drop the entries invalidated by longjmp() */
>  	chained = (orig_ret_vaddr == trampoline_vaddr);
> @@ -1956,7 +1982,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  			 * attack from user-space.
>  			 */
>  			uprobe_warn(current, "handle tail call");
> -			goto fail;
> +			goto put;
>  		}
>  		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
>  	}
> @@ -1971,9 +1997,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  	utask->return_instances = ri;
>  
>  	return;
> -fail:
> -	kfree(ri);
> +put:
>  	put_uprobe(uprobe);
> +free:
> +	kfree(ri);
>  }
>  
>  /* Prepare to single-step probed instruction out of line. */
> @@ -2125,35 +2152,91 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
>  	return uprobe;
>  }
>  
> +static struct return_instance*
> +push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
> +{
> +	if (unlikely(ri == ZERO_SIZE_PTR))
> +		return ri;
> +
> +	if (unlikely(idx >= ri->consumers_cnt)) {
> +		struct return_instance *old_ri = ri;
> +
> +		ri->consumers_cnt += DEF_CNT;
> +		ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
> +		if (!ri) {
> +			kfree(old_ri);
> +			return ZERO_SIZE_PTR;
> +		}
> +	}
> +
> +	ri->consumers[idx].id = id;
> +	ri->consumers[idx].cookie = cookie;
> +	return ri;
> +}
> +
> +static struct return_consumer *
> +return_consumer_find(struct return_instance *ri, int *iter, int id)
> +{
> +	struct return_consumer *ric;
> +	int idx = *iter;
> +
> +	for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
> +		if (ric->id == id) {
> +			*iter = idx + 1;
> +			return ric;
> +		}
> +	}
> +	return NULL;
> +}
> +
> +static bool ignore_ret_handler(int rc)
> +{
> +	return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
> +}
> +
>  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  {
>  	struct uprobe_consumer *uc;
> -	int remove = UPROBE_HANDLER_REMOVE;
> -	bool need_prep = false; /* prepare return uprobe, when needed */
> -	bool has_consumers = false;
> +	bool has_consumers = false, remove = true;
> +	struct return_instance *ri = NULL;
> +	int push_idx = 0;
>  
>  	current->utask->auprobe = &uprobe->arch;
>  
>  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
>  				 srcu_read_lock_held(&uprobes_srcu)) {
> +		bool session = uc->handler && uc->ret_handler;
> +		__u64 cookie = 0;
>  		int rc = 0;
>  
>  		if (uc->handler) {
> -			rc = uc->handler(uc, regs, NULL);
> -			WARN(rc & ~UPROBE_HANDLER_MASK,
> +			rc = uc->handler(uc, regs, &cookie);
> +			WARN(rc < 0 || rc > 2,
>  				"bad rc=0x%x from %ps()\n", rc, uc->handler);
>  		}
>  
> -		if (uc->ret_handler)
> -			need_prep = true;
> -
> -		remove &= rc;
> +		remove &= rc == UPROBE_HANDLER_REMOVE;
>  		has_consumers = true;
> +
> +		if (!uc->ret_handler || ignore_ret_handler(rc))
> +			continue;
> +
> +		if (!ri)
> +			ri = alloc_return_instance();
> +
> +		if (session)
> +			ri = push_consumer(ri, push_idx++, uc->id, cookie);
>  	}
>  	current->utask->auprobe = NULL;
>  
> -	if (need_prep && !remove)
> -		prepare_uretprobe(uprobe, regs); /* put bp at return */
> +	if (!ZERO_OR_NULL_PTR(ri)) {
> +		/*
> +		 * The push_idx value has the final number of return consumers,
> +		 * and ri->consumers_cnt has number of allocated consumers.
> +		 */
> +		ri->consumers_cnt = push_idx;
> +		prepare_uretprobe(uprobe, regs, ri);
> +	}
>  
>  	if (remove && has_consumers) {
>  		down_read(&uprobe->register_rwsem);
> @@ -2172,14 +2255,17 @@ static void
>  handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
>  {
>  	struct uprobe *uprobe = ri->uprobe;
> +	struct return_consumer *ric;
>  	struct uprobe_consumer *uc;
> -	int srcu_idx;
> +	int srcu_idx, ric_idx = 0;
>  
>  	srcu_idx = srcu_read_lock(&uprobes_srcu);
>  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
>  				 srcu_read_lock_held(&uprobes_srcu)) {
> -		if (uc->ret_handler)
> -			uc->ret_handler(uc, ri->func, regs, NULL);
> +		if (uc->ret_handler) {
> +			ric = return_consumer_find(ri, &ric_idx, uc->id);
> +			uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
> +		}
>  	}
>  	srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  }
> -- 
> 2.46.1
>
Jiri Olsa Sept. 30, 2024, 11:42 a.m. UTC | #2
On Mon, Sep 30, 2024 at 11:40:15AM +0200, Oleg Nesterov wrote:
> Jiri,
> 
> LGTM. But I'm afraid you need to send v6, sorry ;)
> 
> This change has some (trivial) conflicts in prepare_uretprobe() with the
> cleanups I sent yesterday, and Peter is going to queue them.

sure, np.. will wait for any review comments and rebase/resend

thanks,
jirka

> 
> See https://lore.kernel.org/all/20240929144201.GA9429@redhat.com/
> 
> Oleg.
> 
> On 09/29, Jiri Olsa wrote:
> >
> > This change allows the uprobe consumer to behave as session which
> > means that 'handler' and 'ret_handler' callbacks are connected in
> > a way that allows to:
> > 
> >   - control execution of 'ret_handler' from 'handler' callback
> >   - share data between 'handler' and 'ret_handler' callbacks
> > 
> > The session concept fits to our common use case where we do filtering
> > on entry uprobe and based on the result we decide to run the return
> > uprobe (or not).
> > 
> > It's also convenient to share the data between session callbacks.
> > 
> > To achive this we are adding new return value the uprobe consumer
> > can return from 'handler' callback:
> > 
> >   UPROBE_HANDLER_IGNORE
> >   - Ignore 'ret_handler' callback for this consumer.
> > 
> > And store cookie and pass it to 'ret_handler' when consumer has both
> > 'handler' and 'ret_handler' callbacks defined.
> > 
> > We store shared data in the return_consumer object array as part of
> > the return_instance object. This way the handle_uretprobe_chain can
> > find related return_consumer and its shared data.
> > 
> > We also store entry handler return value, for cases when there are
> > multiple consumers on single uprobe and some of them are ignored and
> > some of them not, in which case the return probe gets installed and
> > we need to have a way to find out which consumer needs to be ignored.
> > 
> > The tricky part is when consumer is registered 'after' the uprobe
> > entry handler is hit. In such case this consumer's 'ret_handler' gets
> > executed as well, but it won't have the proper data pointer set,
> > so we can filter it out.
> > 
> > Suggested-by: Oleg Nesterov <oleg@redhat.com>
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  include/linux/uprobes.h |  21 +++++-
> >  kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
> >  2 files changed, 137 insertions(+), 32 deletions(-)
> > 
> > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > index bb265a632b91..dbaf04189548 100644
> > --- a/include/linux/uprobes.h
> > +++ b/include/linux/uprobes.h
> > @@ -23,8 +23,17 @@ struct inode;
> >  struct notifier_block;
> >  struct page;
> >  
> > +/*
> > + * Allowed return values from uprobe consumer's handler callback
> > + * with following meaning:
> > + *
> > + * UPROBE_HANDLER_REMOVE
> > + * - Remove the uprobe breakpoint from current->mm.
> > + * UPROBE_HANDLER_IGNORE
> > + * - Ignore ret_handler callback for this consumer.
> > + */
> >  #define UPROBE_HANDLER_REMOVE		1
> > -#define UPROBE_HANDLER_MASK		1
> > +#define UPROBE_HANDLER_IGNORE		2
> >  
> >  #define MAX_URETPROBE_DEPTH		64
> >  
> > @@ -44,6 +53,8 @@ struct uprobe_consumer {
> >  	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
> >  
> >  	struct list_head cons_node;
> > +
> > +	__u64 id;	/* set when uprobe_consumer is registered */
> >  };
> >  
> >  #ifdef CONFIG_UPROBES
> > @@ -83,14 +94,22 @@ struct uprobe_task {
> >  	unsigned int			depth;
> >  };
> >  
> > +struct return_consumer {
> > +	__u64	cookie;
> > +	__u64	id;
> > +};
> > +
> >  struct return_instance {
> >  	struct uprobe		*uprobe;
> >  	unsigned long		func;
> >  	unsigned long		stack;		/* stack pointer */
> >  	unsigned long		orig_ret_vaddr; /* original return address */
> >  	bool			chained;	/* true, if instance is nested */
> > +	int			consumers_cnt;
> >  
> >  	struct return_instance	*next;		/* keep as stack */
> > +
> > +	struct return_consumer	consumers[] __counted_by(consumers_cnt);
> >  };
> >  
> >  enum rp_check {
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 2ba93f8a31aa..76fe535c9b3c 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -65,7 +65,7 @@ struct uprobe {
> >  	struct rcu_head		rcu;
> >  	loff_t			offset;
> >  	loff_t			ref_ctr_offset;
> > -	unsigned long		flags;
> > +	unsigned long		flags;		/* "unsigned long" so bitops work */
> >  
> >  	/*
> >  	 * The generic code assumes that it has two members of unknown type
> > @@ -825,8 +825,11 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
> >  
> >  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
> >  {
> > +	static atomic64_t id;
> > +
> >  	down_write(&uprobe->consumer_rwsem);
> >  	list_add_rcu(&uc->cons_node, &uprobe->consumers);
> > +	uc->id = (__u64) atomic64_inc_return(&id);
> >  	up_write(&uprobe->consumer_rwsem);
> >  }
> >  
> > @@ -1797,6 +1800,34 @@ static struct uprobe_task *get_utask(void)
> >  	return current->utask;
> >  }
> >  
> > +static size_t ri_size(int consumers_cnt)
> > +{
> > +	struct return_instance *ri;
> > +
> > +	return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
> > +}
> > +
> > +#define DEF_CNT 4
> > +
> > +static struct return_instance *alloc_return_instance(void)
> > +{
> > +	struct return_instance *ri;
> > +
> > +	ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
> > +	if (!ri)
> > +		return ZERO_SIZE_PTR;
> > +
> > +	ri->consumers_cnt = DEF_CNT;
> > +	return ri;
> > +}
> > +
> > +static struct return_instance *dup_return_instance(struct return_instance *old)
> > +{
> > +	size_t size = ri_size(old->consumers_cnt);
> > +
> > +	return kmemdup(old, size, GFP_KERNEL);
> > +}
> > +
> >  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
> >  {
> >  	struct uprobe_task *n_utask;
> > @@ -1809,11 +1840,10 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
> >  
> >  	p = &n_utask->return_instances;
> >  	for (o = o_utask->return_instances; o; o = o->next) {
> > -		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> > +		n = dup_return_instance(o);
> >  		if (!n)
> >  			return -ENOMEM;
> >  
> > -		*n = *o;
> >  		/*
> >  		 * uprobe's refcnt has to be positive at this point, kept by
> >  		 * utask->return_instances items; return_instances can't be
> > @@ -1906,39 +1936,35 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
> >  	utask->return_instances = ri;
> >  }
> >  
> > -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> > +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
> > +			      struct return_instance *ri)
> >  {
> > -	struct return_instance *ri;
> >  	struct uprobe_task *utask;
> >  	unsigned long orig_ret_vaddr, trampoline_vaddr;
> >  	bool chained;
> >  
> >  	if (!get_xol_area())
> > -		return;
> > +		goto free;
> >  
> >  	utask = get_utask();
> >  	if (!utask)
> > -		return;
> > +		goto free;
> >  
> >  	if (utask->depth >= MAX_URETPROBE_DEPTH) {
> >  		printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
> >  				" nestedness limit pid/tgid=%d/%d\n",
> >  				current->pid, current->tgid);
> > -		return;
> > +		goto free;
> >  	}
> >  
> >  	/* we need to bump refcount to store uprobe in utask */
> >  	if (!try_get_uprobe(uprobe))
> > -		return;
> > -
> > -	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> > -	if (!ri)
> > -		goto fail;
> > +		goto free;
> >  
> >  	trampoline_vaddr = uprobe_get_trampoline_vaddr();
> >  	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
> >  	if (orig_ret_vaddr == -1)
> > -		goto fail;
> > +		goto put;
> >  
> >  	/* drop the entries invalidated by longjmp() */
> >  	chained = (orig_ret_vaddr == trampoline_vaddr);
> > @@ -1956,7 +1982,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> >  			 * attack from user-space.
> >  			 */
> >  			uprobe_warn(current, "handle tail call");
> > -			goto fail;
> > +			goto put;
> >  		}
> >  		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
> >  	}
> > @@ -1971,9 +1997,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> >  	utask->return_instances = ri;
> >  
> >  	return;
> > -fail:
> > -	kfree(ri);
> > +put:
> >  	put_uprobe(uprobe);
> > +free:
> > +	kfree(ri);
> >  }
> >  
> >  /* Prepare to single-step probed instruction out of line. */
> > @@ -2125,35 +2152,91 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
> >  	return uprobe;
> >  }
> >  
> > +static struct return_instance*
> > +push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
> > +{
> > +	if (unlikely(ri == ZERO_SIZE_PTR))
> > +		return ri;
> > +
> > +	if (unlikely(idx >= ri->consumers_cnt)) {
> > +		struct return_instance *old_ri = ri;
> > +
> > +		ri->consumers_cnt += DEF_CNT;
> > +		ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
> > +		if (!ri) {
> > +			kfree(old_ri);
> > +			return ZERO_SIZE_PTR;
> > +		}
> > +	}
> > +
> > +	ri->consumers[idx].id = id;
> > +	ri->consumers[idx].cookie = cookie;
> > +	return ri;
> > +}
> > +
> > +static struct return_consumer *
> > +return_consumer_find(struct return_instance *ri, int *iter, int id)
> > +{
> > +	struct return_consumer *ric;
> > +	int idx = *iter;
> > +
> > +	for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
> > +		if (ric->id == id) {
> > +			*iter = idx + 1;
> > +			return ric;
> > +		}
> > +	}
> > +	return NULL;
> > +}
> > +
> > +static bool ignore_ret_handler(int rc)
> > +{
> > +	return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
> > +}
> > +
> >  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
> >  {
> >  	struct uprobe_consumer *uc;
> > -	int remove = UPROBE_HANDLER_REMOVE;
> > -	bool need_prep = false; /* prepare return uprobe, when needed */
> > -	bool has_consumers = false;
> > +	bool has_consumers = false, remove = true;
> > +	struct return_instance *ri = NULL;
> > +	int push_idx = 0;
> >  
> >  	current->utask->auprobe = &uprobe->arch;
> >  
> >  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> >  				 srcu_read_lock_held(&uprobes_srcu)) {
> > +		bool session = uc->handler && uc->ret_handler;
> > +		__u64 cookie = 0;
> >  		int rc = 0;
> >  
> >  		if (uc->handler) {
> > -			rc = uc->handler(uc, regs, NULL);
> > -			WARN(rc & ~UPROBE_HANDLER_MASK,
> > +			rc = uc->handler(uc, regs, &cookie);
> > +			WARN(rc < 0 || rc > 2,
> >  				"bad rc=0x%x from %ps()\n", rc, uc->handler);
> >  		}
> >  
> > -		if (uc->ret_handler)
> > -			need_prep = true;
> > -
> > -		remove &= rc;
> > +		remove &= rc == UPROBE_HANDLER_REMOVE;
> >  		has_consumers = true;
> > +
> > +		if (!uc->ret_handler || ignore_ret_handler(rc))
> > +			continue;
> > +
> > +		if (!ri)
> > +			ri = alloc_return_instance();
> > +
> > +		if (session)
> > +			ri = push_consumer(ri, push_idx++, uc->id, cookie);
> >  	}
> >  	current->utask->auprobe = NULL;
> >  
> > -	if (need_prep && !remove)
> > -		prepare_uretprobe(uprobe, regs); /* put bp at return */
> > +	if (!ZERO_OR_NULL_PTR(ri)) {
> > +		/*
> > +		 * The push_idx value has the final number of return consumers,
> > +		 * and ri->consumers_cnt has number of allocated consumers.
> > +		 */
> > +		ri->consumers_cnt = push_idx;
> > +		prepare_uretprobe(uprobe, regs, ri);
> > +	}
> >  
> >  	if (remove && has_consumers) {
> >  		down_read(&uprobe->register_rwsem);
> > @@ -2172,14 +2255,17 @@ static void
> >  handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
> >  {
> >  	struct uprobe *uprobe = ri->uprobe;
> > +	struct return_consumer *ric;
> >  	struct uprobe_consumer *uc;
> > -	int srcu_idx;
> > +	int srcu_idx, ric_idx = 0;
> >  
> >  	srcu_idx = srcu_read_lock(&uprobes_srcu);
> >  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> >  				 srcu_read_lock_held(&uprobes_srcu)) {
> > -		if (uc->ret_handler)
> > -			uc->ret_handler(uc, ri->func, regs, NULL);
> > +		if (uc->ret_handler) {
> > +			ric = return_consumer_find(ri, &ric_idx, uc->id);
> > +			uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
> > +		}
> >  	}
> >  	srcu_read_unlock(&uprobes_srcu, srcu_idx);
> >  }
> > -- 
> > 2.46.1
> > 
>
Andrii Nakryiko Sept. 30, 2024, 9:36 p.m. UTC | #3
On Sun, Sep 29, 2024 at 1:57 PM Jiri Olsa <jolsa@kernel.org> wrote:
>
> This change allows the uprobe consumer to behave as session which
> means that 'handler' and 'ret_handler' callbacks are connected in
> a way that allows to:
>
>   - control execution of 'ret_handler' from 'handler' callback
>   - share data between 'handler' and 'ret_handler' callbacks
>
> The session concept fits to our common use case where we do filtering
> on entry uprobe and based on the result we decide to run the return
> uprobe (or not).
>
> It's also convenient to share the data between session callbacks.
>
> To achive this we are adding new return value the uprobe consumer
> can return from 'handler' callback:
>
>   UPROBE_HANDLER_IGNORE
>   - Ignore 'ret_handler' callback for this consumer.
>
> And store cookie and pass it to 'ret_handler' when consumer has both
> 'handler' and 'ret_handler' callbacks defined.
>
> We store shared data in the return_consumer object array as part of
> the return_instance object. This way the handle_uretprobe_chain can
> find related return_consumer and its shared data.
>
> We also store entry handler return value, for cases when there are
> multiple consumers on single uprobe and some of them are ignored and
> some of them not, in which case the return probe gets installed and
> we need to have a way to find out which consumer needs to be ignored.
>
> The tricky part is when consumer is registered 'after' the uprobe
> entry handler is hit. In such case this consumer's 'ret_handler' gets
> executed as well, but it won't have the proper data pointer set,
> so we can filter it out.
>
> Suggested-by: Oleg Nesterov <oleg@redhat.com>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/uprobes.h |  21 +++++-
>  kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
>  2 files changed, 137 insertions(+), 32 deletions(-)
>

LGTM,

Acked-by: Andrii Nakryiko <andrii@kernel.org>


Note also that I just resent the last patch from my patch set ([0]),
hopefully it will get applied, in which case you'd need to do a tiny
rebase.

  [0] https://lore.kernel.org/linux-trace-kernel/20240930212246.1829395-1-andrii@kernel.org/


[...]
Jiri Olsa Oct. 1, 2024, 1:17 p.m. UTC | #4
On Mon, Sep 30, 2024 at 02:36:03PM -0700, Andrii Nakryiko wrote:
> On Sun, Sep 29, 2024 at 1:57 PM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > This change allows the uprobe consumer to behave as session which
> > means that 'handler' and 'ret_handler' callbacks are connected in
> > a way that allows to:
> >
> >   - control execution of 'ret_handler' from 'handler' callback
> >   - share data between 'handler' and 'ret_handler' callbacks
> >
> > The session concept fits to our common use case where we do filtering
> > on entry uprobe and based on the result we decide to run the return
> > uprobe (or not).
> >
> > It's also convenient to share the data between session callbacks.
> >
> > To achive this we are adding new return value the uprobe consumer
> > can return from 'handler' callback:
> >
> >   UPROBE_HANDLER_IGNORE
> >   - Ignore 'ret_handler' callback for this consumer.
> >
> > And store cookie and pass it to 'ret_handler' when consumer has both
> > 'handler' and 'ret_handler' callbacks defined.
> >
> > We store shared data in the return_consumer object array as part of
> > the return_instance object. This way the handle_uretprobe_chain can
> > find related return_consumer and its shared data.
> >
> > We also store entry handler return value, for cases when there are
> > multiple consumers on single uprobe and some of them are ignored and
> > some of them not, in which case the return probe gets installed and
> > we need to have a way to find out which consumer needs to be ignored.
> >
> > The tricky part is when consumer is registered 'after' the uprobe
> > entry handler is hit. In such case this consumer's 'ret_handler' gets
> > executed as well, but it won't have the proper data pointer set,
> > so we can filter it out.
> >
> > Suggested-by: Oleg Nesterov <oleg@redhat.com>
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  include/linux/uprobes.h |  21 +++++-
> >  kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
> >  2 files changed, 137 insertions(+), 32 deletions(-)
> >
> 
> LGTM,
> 
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> 
> 
> Note also that I just resent the last patch from my patch set ([0]),
> hopefully it will get applied, in which case you'd need to do a tiny
> rebase.
> 
>   [0] https://lore.kernel.org/linux-trace-kernel/20240930212246.1829395-1-andrii@kernel.org/

the rebase is fine, but what I'm not clear about is that after yours and
Oleg's changes get in, my kernel changes will depend on peter's perf/core,
but bpf selftests changes will need bpf-next/master

jirka
Andrii Nakryiko Oct. 1, 2024, 5:09 p.m. UTC | #5
On Tue, Oct 1, 2024 at 6:17 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Mon, Sep 30, 2024 at 02:36:03PM -0700, Andrii Nakryiko wrote:
> > On Sun, Sep 29, 2024 at 1:57 PM Jiri Olsa <jolsa@kernel.org> wrote:
> > >
> > > This change allows the uprobe consumer to behave as session which
> > > means that 'handler' and 'ret_handler' callbacks are connected in
> > > a way that allows to:
> > >
> > >   - control execution of 'ret_handler' from 'handler' callback
> > >   - share data between 'handler' and 'ret_handler' callbacks
> > >
> > > The session concept fits to our common use case where we do filtering
> > > on entry uprobe and based on the result we decide to run the return
> > > uprobe (or not).
> > >
> > > It's also convenient to share the data between session callbacks.
> > >
> > > To achive this we are adding new return value the uprobe consumer
> > > can return from 'handler' callback:
> > >
> > >   UPROBE_HANDLER_IGNORE
> > >   - Ignore 'ret_handler' callback for this consumer.
> > >
> > > And store cookie and pass it to 'ret_handler' when consumer has both
> > > 'handler' and 'ret_handler' callbacks defined.
> > >
> > > We store shared data in the return_consumer object array as part of
> > > the return_instance object. This way the handle_uretprobe_chain can
> > > find related return_consumer and its shared data.
> > >
> > > We also store entry handler return value, for cases when there are
> > > multiple consumers on single uprobe and some of them are ignored and
> > > some of them not, in which case the return probe gets installed and
> > > we need to have a way to find out which consumer needs to be ignored.
> > >
> > > The tricky part is when consumer is registered 'after' the uprobe
> > > entry handler is hit. In such case this consumer's 'ret_handler' gets
> > > executed as well, but it won't have the proper data pointer set,
> > > so we can filter it out.
> > >
> > > Suggested-by: Oleg Nesterov <oleg@redhat.com>
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > >  include/linux/uprobes.h |  21 +++++-
> > >  kernel/events/uprobes.c | 148 +++++++++++++++++++++++++++++++---------
> > >  2 files changed, 137 insertions(+), 32 deletions(-)
> > >
> >
> > LGTM,
> >
> > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> >
> >
> > Note also that I just resent the last patch from my patch set ([0]),
> > hopefully it will get applied, in which case you'd need to do a tiny
> > rebase.
> >
> >   [0] https://lore.kernel.org/linux-trace-kernel/20240930212246.1829395-1-andrii@kernel.org/
>
> the rebase is fine, but what I'm not clear about is that after yours and
> Oleg's changes get in, my kernel changes will depend on peter's perf/core,
> but bpf selftests changes will need bpf-next/master

Yep, and I was waiting for your next revision to discuss logistics,
but perhaps we could do it right here.

I think uprobe parts should stay in tip/perf/core (if that's where all
uprobe code goes in), as we have a bunch of ongoing work that all will
conflict a bit with each other, if it lands across multiple trees.

So that means that patches #1 and #2 ideally land in tip/perf/core.
But you have a lot of BPF-specific things that would be inconvenient
to route through tip, so I'd say those should go through bpf-next.

What we can do, if Ingo and Peter are OK with that, is to create a
stable (non-rebaseable) branch off of your first two patches (applied
in tip/perf/core), which we'll merge into bpf-next/master and land the
rest of your patch set there. We've done that with recent struct fd
changes, and there were few other similar cases in the past, and that
all worked well.

Peter, Ingo, are you guys OK with that approach?

>
> jirka
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index bb265a632b91..dbaf04189548 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -23,8 +23,17 @@  struct inode;
 struct notifier_block;
 struct page;
 
+/*
+ * Allowed return values from uprobe consumer's handler callback
+ * with following meaning:
+ *
+ * UPROBE_HANDLER_REMOVE
+ * - Remove the uprobe breakpoint from current->mm.
+ * UPROBE_HANDLER_IGNORE
+ * - Ignore ret_handler callback for this consumer.
+ */
 #define UPROBE_HANDLER_REMOVE		1
-#define UPROBE_HANDLER_MASK		1
+#define UPROBE_HANDLER_IGNORE		2
 
 #define MAX_URETPROBE_DEPTH		64
 
@@ -44,6 +53,8 @@  struct uprobe_consumer {
 	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
 
 	struct list_head cons_node;
+
+	__u64 id;	/* set when uprobe_consumer is registered */
 };
 
 #ifdef CONFIG_UPROBES
@@ -83,14 +94,22 @@  struct uprobe_task {
 	unsigned int			depth;
 };
 
+struct return_consumer {
+	__u64	cookie;
+	__u64	id;
+};
+
 struct return_instance {
 	struct uprobe		*uprobe;
 	unsigned long		func;
 	unsigned long		stack;		/* stack pointer */
 	unsigned long		orig_ret_vaddr; /* original return address */
 	bool			chained;	/* true, if instance is nested */
+	int			consumers_cnt;
 
 	struct return_instance	*next;		/* keep as stack */
+
+	struct return_consumer	consumers[] __counted_by(consumers_cnt);
 };
 
 enum rp_check {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2ba93f8a31aa..76fe535c9b3c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -65,7 +65,7 @@  struct uprobe {
 	struct rcu_head		rcu;
 	loff_t			offset;
 	loff_t			ref_ctr_offset;
-	unsigned long		flags;
+	unsigned long		flags;		/* "unsigned long" so bitops work */
 
 	/*
 	 * The generic code assumes that it has two members of unknown type
@@ -825,8 +825,11 @@  static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
 
 static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
 {
+	static atomic64_t id;
+
 	down_write(&uprobe->consumer_rwsem);
 	list_add_rcu(&uc->cons_node, &uprobe->consumers);
+	uc->id = (__u64) atomic64_inc_return(&id);
 	up_write(&uprobe->consumer_rwsem);
 }
 
@@ -1797,6 +1800,34 @@  static struct uprobe_task *get_utask(void)
 	return current->utask;
 }
 
+static size_t ri_size(int consumers_cnt)
+{
+	struct return_instance *ri;
+
+	return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
+}
+
+#define DEF_CNT 4
+
+static struct return_instance *alloc_return_instance(void)
+{
+	struct return_instance *ri;
+
+	ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL);
+	if (!ri)
+		return ZERO_SIZE_PTR;
+
+	ri->consumers_cnt = DEF_CNT;
+	return ri;
+}
+
+static struct return_instance *dup_return_instance(struct return_instance *old)
+{
+	size_t size = ri_size(old->consumers_cnt);
+
+	return kmemdup(old, size, GFP_KERNEL);
+}
+
 static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
 {
 	struct uprobe_task *n_utask;
@@ -1809,11 +1840,10 @@  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
 
 	p = &n_utask->return_instances;
 	for (o = o_utask->return_instances; o; o = o->next) {
-		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
+		n = dup_return_instance(o);
 		if (!n)
 			return -ENOMEM;
 
-		*n = *o;
 		/*
 		 * uprobe's refcnt has to be positive at this point, kept by
 		 * utask->return_instances items; return_instances can't be
@@ -1906,39 +1936,35 @@  static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
 	utask->return_instances = ri;
 }
 
-static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
+static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
+			      struct return_instance *ri)
 {
-	struct return_instance *ri;
 	struct uprobe_task *utask;
 	unsigned long orig_ret_vaddr, trampoline_vaddr;
 	bool chained;
 
 	if (!get_xol_area())
-		return;
+		goto free;
 
 	utask = get_utask();
 	if (!utask)
-		return;
+		goto free;
 
 	if (utask->depth >= MAX_URETPROBE_DEPTH) {
 		printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
 				" nestedness limit pid/tgid=%d/%d\n",
 				current->pid, current->tgid);
-		return;
+		goto free;
 	}
 
 	/* we need to bump refcount to store uprobe in utask */
 	if (!try_get_uprobe(uprobe))
-		return;
-
-	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
-	if (!ri)
-		goto fail;
+		goto free;
 
 	trampoline_vaddr = uprobe_get_trampoline_vaddr();
 	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
 	if (orig_ret_vaddr == -1)
-		goto fail;
+		goto put;
 
 	/* drop the entries invalidated by longjmp() */
 	chained = (orig_ret_vaddr == trampoline_vaddr);
@@ -1956,7 +1982,7 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 			 * attack from user-space.
 			 */
 			uprobe_warn(current, "handle tail call");
-			goto fail;
+			goto put;
 		}
 		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
 	}
@@ -1971,9 +1997,10 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 	utask->return_instances = ri;
 
 	return;
-fail:
-	kfree(ri);
+put:
 	put_uprobe(uprobe);
+free:
+	kfree(ri);
 }
 
 /* Prepare to single-step probed instruction out of line. */
@@ -2125,35 +2152,91 @@  static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
 	return uprobe;
 }
 
+static struct return_instance*
+push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie)
+{
+	if (unlikely(ri == ZERO_SIZE_PTR))
+		return ri;
+
+	if (unlikely(idx >= ri->consumers_cnt)) {
+		struct return_instance *old_ri = ri;
+
+		ri->consumers_cnt += DEF_CNT;
+		ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL);
+		if (!ri) {
+			kfree(old_ri);
+			return ZERO_SIZE_PTR;
+		}
+	}
+
+	ri->consumers[idx].id = id;
+	ri->consumers[idx].cookie = cookie;
+	return ri;
+}
+
+static struct return_consumer *
+return_consumer_find(struct return_instance *ri, int *iter, int id)
+{
+	struct return_consumer *ric;
+	int idx = *iter;
+
+	for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
+		if (ric->id == id) {
+			*iter = idx + 1;
+			return ric;
+		}
+	}
+	return NULL;
+}
+
+static bool ignore_ret_handler(int rc)
+{
+	return rc == UPROBE_HANDLER_REMOVE || rc == UPROBE_HANDLER_IGNORE;
+}
+
 static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
 {
 	struct uprobe_consumer *uc;
-	int remove = UPROBE_HANDLER_REMOVE;
-	bool need_prep = false; /* prepare return uprobe, when needed */
-	bool has_consumers = false;
+	bool has_consumers = false, remove = true;
+	struct return_instance *ri = NULL;
+	int push_idx = 0;
 
 	current->utask->auprobe = &uprobe->arch;
 
 	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
 				 srcu_read_lock_held(&uprobes_srcu)) {
+		bool session = uc->handler && uc->ret_handler;
+		__u64 cookie = 0;
 		int rc = 0;
 
 		if (uc->handler) {
-			rc = uc->handler(uc, regs, NULL);
-			WARN(rc & ~UPROBE_HANDLER_MASK,
+			rc = uc->handler(uc, regs, &cookie);
+			WARN(rc < 0 || rc > 2,
 				"bad rc=0x%x from %ps()\n", rc, uc->handler);
 		}
 
-		if (uc->ret_handler)
-			need_prep = true;
-
-		remove &= rc;
+		remove &= rc == UPROBE_HANDLER_REMOVE;
 		has_consumers = true;
+
+		if (!uc->ret_handler || ignore_ret_handler(rc))
+			continue;
+
+		if (!ri)
+			ri = alloc_return_instance();
+
+		if (session)
+			ri = push_consumer(ri, push_idx++, uc->id, cookie);
 	}
 	current->utask->auprobe = NULL;
 
-	if (need_prep && !remove)
-		prepare_uretprobe(uprobe, regs); /* put bp at return */
+	if (!ZERO_OR_NULL_PTR(ri)) {
+		/*
+		 * The push_idx value has the final number of return consumers,
+		 * and ri->consumers_cnt has number of allocated consumers.
+		 */
+		ri->consumers_cnt = push_idx;
+		prepare_uretprobe(uprobe, regs, ri);
+	}
 
 	if (remove && has_consumers) {
 		down_read(&uprobe->register_rwsem);
@@ -2172,14 +2255,17 @@  static void
 handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
 {
 	struct uprobe *uprobe = ri->uprobe;
+	struct return_consumer *ric;
 	struct uprobe_consumer *uc;
-	int srcu_idx;
+	int srcu_idx, ric_idx = 0;
 
 	srcu_idx = srcu_read_lock(&uprobes_srcu);
 	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
 				 srcu_read_lock_held(&uprobes_srcu)) {
-		if (uc->ret_handler)
-			uc->ret_handler(uc, ri->func, regs, NULL);
+		if (uc->ret_handler) {
+			ric = return_consumer_find(ri, &ric_idx, uc->id);
+			uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
+		}
 	}
 	srcu_read_unlock(&uprobes_srcu, srcu_idx);
 }