diff mbox series

[3/8] uprobes: protected uprobe lifetime with SRCU

Message ID 20240731214256.3588718-4-andrii@kernel.org (mailing list archive)
State Superseded
Headers show
Series uprobes: RCU-protected hot path optimizations | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Andrii Nakryiko July 31, 2024, 9:42 p.m. UTC
To avoid unnecessarily taking a (brief) refcount on uprobe during
breakpoint handling in handle_swbp for entry uprobes, make find_uprobe()
not take refcount, but protect the lifetime of a uprobe instance with
RCU. This improves scalability, as refcount gets quite expensive due to
cache line bouncing between multiple CPUs.

Specifically, we utilize our own uprobe-specific SRCU instance for this
RCU protection. put_uprobe() will delay actual kfree() using call_srcu().

For now, uretprobe and single-stepping handling will still acquire
refcount as necessary. We'll address these issues in follow up patches
by making them use SRCU with timeout.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 kernel/events/uprobes.c | 93 ++++++++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 38 deletions(-)

Comments

Liao, Chang Aug. 1, 2024, 12:23 p.m. UTC | #1
在 2024/8/1 5:42, Andrii Nakryiko 写道:
> To avoid unnecessarily taking a (brief) refcount on uprobe during
> breakpoint handling in handle_swbp for entry uprobes, make find_uprobe()
> not take refcount, but protect the lifetime of a uprobe instance with
> RCU. This improves scalability, as refcount gets quite expensive due to
> cache line bouncing between multiple CPUs.
> 
> Specifically, we utilize our own uprobe-specific SRCU instance for this
> RCU protection. put_uprobe() will delay actual kfree() using call_srcu().
> 
> For now, uretprobe and single-stepping handling will still acquire
> refcount as necessary. We'll address these issues in follow up patches
> by making them use SRCU with timeout.
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  kernel/events/uprobes.c | 93 ++++++++++++++++++++++++-----------------
>  1 file changed, 55 insertions(+), 38 deletions(-)
> 
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 23dde3ec5b09..6d5c3f4b210f 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -41,6 +41,8 @@ static struct rb_root uprobes_tree = RB_ROOT;
>  
>  static DEFINE_RWLOCK(uprobes_treelock);	/* serialize rbtree access */
>  
> +DEFINE_STATIC_SRCU(uprobes_srcu);
> +
>  #define UPROBES_HASH_SZ	13
>  /* serialize uprobe->pending_list */
>  static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
> @@ -59,6 +61,7 @@ struct uprobe {
>  	struct list_head	pending_list;
>  	struct uprobe_consumer	*consumers;
>  	struct inode		*inode;		/* Also hold a ref to inode */
> +	struct rcu_head		rcu;
>  	loff_t			offset;
>  	loff_t			ref_ctr_offset;
>  	unsigned long		flags;
> @@ -612,6 +615,13 @@ static inline bool uprobe_is_active(struct uprobe *uprobe)
>  	return !RB_EMPTY_NODE(&uprobe->rb_node);
>  }
>  
> +static void uprobe_free_rcu(struct rcu_head *rcu)
> +{
> +	struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
> +
> +	kfree(uprobe);
> +}
> +
>  static void put_uprobe(struct uprobe *uprobe)
>  {
>  	if (!refcount_dec_and_test(&uprobe->ref))
> @@ -632,6 +642,8 @@ static void put_uprobe(struct uprobe *uprobe)
>  	mutex_lock(&delayed_uprobe_lock);
>  	delayed_uprobe_remove(uprobe, NULL);
>  	mutex_unlock(&delayed_uprobe_lock);
> +
> +	call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
>  }
>  
>  static __always_inline
> @@ -673,33 +685,25 @@ static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
>  	return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
>  }
>  
> -static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
> +/*
> + * Assumes being inside RCU protected region.
> + * No refcount is taken on returned uprobe.
> + */
> +static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
>  {
>  	struct __uprobe_key key = {
>  		.inode = inode,
>  		.offset = offset,
>  	};
> -	struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
> -
> -	if (node)
> -		return try_get_uprobe(__node_2_uprobe(node));
> +	struct rb_node *node;
>  
> -	return NULL;
> -}
> -
> -/*
> - * Find a uprobe corresponding to a given inode:offset
> - * Acquires uprobes_treelock
> - */
> -static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
> -{
> -	struct uprobe *uprobe;
> +	lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
>  
>  	read_lock(&uprobes_treelock);
> -	uprobe = __find_uprobe(inode, offset);
> +	node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
>  	read_unlock(&uprobes_treelock);
>  
> -	return uprobe;
> +	return node ? __node_2_uprobe(node) : NULL;
>  }
>  
>  /*
> @@ -1073,10 +1077,10 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
>  			goto free;
>  		/*
>  		 * We take mmap_lock for writing to avoid the race with
> -		 * find_active_uprobe() which takes mmap_lock for reading.
> +		 * find_active_uprobe_rcu() which takes mmap_lock for reading.
>  		 * Thus this install_breakpoint() can not make
> -		 * is_trap_at_addr() true right after find_uprobe()
> -		 * returns NULL in find_active_uprobe().
> +		 * is_trap_at_addr() true right after find_uprobe_rcu()
> +		 * returns NULL in find_active_uprobe_rcu().
>  		 */
>  		mmap_write_lock(mm);
>  		vma = find_vma(mm, info->vaddr);
> @@ -1885,9 +1889,13 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  		return;
>  	}
>  
> +	/* we need to bump refcount to store uprobe in utask */
> +	if (!try_get_uprobe(uprobe))
> +		return;
> +
>  	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
>  	if (!ri)
> -		return;
> +		goto fail;
>  
>  	trampoline_vaddr = uprobe_get_trampoline_vaddr();
>  	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
> @@ -1914,11 +1922,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  		}
>  		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
>  	}
> -	 /*
> -	  * uprobe's refcnt is positive, held by caller, so it's safe to
> -	  * unconditionally bump it one more time here
> -	  */
> -	ri->uprobe = get_uprobe(uprobe);
> +	ri->uprobe = uprobe;
>  	ri->func = instruction_pointer(regs);
>  	ri->stack = user_stack_pointer(regs);
>  	ri->orig_ret_vaddr = orig_ret_vaddr;
> @@ -1929,8 +1933,9 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  	utask->return_instances = ri;
>  
>  	return;
> - fail:
> +fail:
>  	kfree(ri);
> +	put_uprobe(uprobe);
>  }
>  
>  /* Prepare to single-step probed instruction out of line. */
> @@ -1945,9 +1950,14 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
>  	if (!utask)
>  		return -ENOMEM;
>  
> +	if (!try_get_uprobe(uprobe))
> +		return -EINVAL;
> +
>  	xol_vaddr = xol_get_insn_slot(uprobe);
> -	if (!xol_vaddr)
> -		return -ENOMEM;
> +	if (!xol_vaddr) {
> +		err = -ENOMEM;
> +		goto err_out;
> +	}
>  
>  	utask->xol_vaddr = xol_vaddr;
>  	utask->vaddr = bp_vaddr;
> @@ -1955,12 +1965,15 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
>  	err = arch_uprobe_pre_xol(&uprobe->arch, regs);
>  	if (unlikely(err)) {
>  		xol_free_insn_slot(current);
> -		return err;
> +		goto err_out;
>  	}
>  
>  	utask->active_uprobe = uprobe;
>  	utask->state = UTASK_SSTEP;
>  	return 0;
> +err_out:
> +	put_uprobe(uprobe);
> +	return err;
>  }
>  
>  /*
> @@ -2044,7 +2057,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
>  	return is_trap_insn(&opcode);
>  }
>  
> -static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
> +/* assumes being inside RCU protected region */
> +static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
>  {
>  	struct mm_struct *mm = current->mm;
>  	struct uprobe *uprobe = NULL;
> @@ -2057,7 +2071,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
>  			struct inode *inode = file_inode(vma->vm_file);
>  			loff_t offset = vaddr_to_offset(vma, bp_vaddr);
>  
> -			uprobe = find_uprobe(inode, offset);
> +			uprobe = find_uprobe_rcu(inode, offset);
>  		}
>  
>  		if (!uprobe)
> @@ -2201,13 +2215,15 @@ static void handle_swbp(struct pt_regs *regs)
>  {
>  	struct uprobe *uprobe;
>  	unsigned long bp_vaddr;
> -	int is_swbp;
> +	int is_swbp, srcu_idx;
>  
>  	bp_vaddr = uprobe_get_swbp_addr(regs);
>  	if (bp_vaddr == uprobe_get_trampoline_vaddr())
>  		return uprobe_handle_trampoline(regs);
>  
> -	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
> +	srcu_idx = srcu_read_lock(&uprobes_srcu);
> +
> +	uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
>  	if (!uprobe) {
>  		if (is_swbp > 0) {
>  			/* No matching uprobe; signal SIGTRAP. */
> @@ -2223,6 +2239,7 @@ static void handle_swbp(struct pt_regs *regs)
>  			 */
>  			instruction_pointer_set(regs, bp_vaddr);
>  		}
> +		srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  		return;
>  	}
>  
> @@ -2258,12 +2275,12 @@ static void handle_swbp(struct pt_regs *regs)
>  	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
>  		goto out;
>  
> -	if (!pre_ssout(uprobe, regs, bp_vaddr))
> -		return;
> +	if (pre_ssout(uprobe, regs, bp_vaddr))
> +		goto out;
>  

Regardless what pre_ssout() returns, it always reach the label 'out', so the
if block is unnecessary.


> -	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
>  out:
> -	put_uprobe(uprobe);
> +	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
> +	srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  }
>  
>  /*
Andrii Nakryiko Aug. 1, 2024, 4:49 p.m. UTC | #2
On Thu, Aug 1, 2024 at 5:23 AM Liao, Chang <liaochang1@huawei.com> wrote:
>
>
>
> 在 2024/8/1 5:42, Andrii Nakryiko 写道:
> > To avoid unnecessarily taking a (brief) refcount on uprobe during
> > breakpoint handling in handle_swbp for entry uprobes, make find_uprobe()
> > not take refcount, but protect the lifetime of a uprobe instance with
> > RCU. This improves scalability, as refcount gets quite expensive due to
> > cache line bouncing between multiple CPUs.
> >
> > Specifically, we utilize our own uprobe-specific SRCU instance for this
> > RCU protection. put_uprobe() will delay actual kfree() using call_srcu().
> >
> > For now, uretprobe and single-stepping handling will still acquire
> > refcount as necessary. We'll address these issues in follow up patches
> > by making them use SRCU with timeout.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  kernel/events/uprobes.c | 93 ++++++++++++++++++++++++-----------------
> >  1 file changed, 55 insertions(+), 38 deletions(-)
> >

[...]

> >
> > @@ -2258,12 +2275,12 @@ static void handle_swbp(struct pt_regs *regs)
> >       if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
> >               goto out;
> >
> > -     if (!pre_ssout(uprobe, regs, bp_vaddr))
> > -             return;
> > +     if (pre_ssout(uprobe, regs, bp_vaddr))
> > +             goto out;
> >
>
> Regardless what pre_ssout() returns, it always reach the label 'out', so the
> if block is unnecessary.

yep, I know, but I felt like

if (something is wrong)
    goto out;

pattern was important to keep for each possible failing step for consistency.

so unless this is a big deal, I'd keep it as is, as in the future
there might be some other steps after pre_ssout() before returning, so
this is a bit more "composable"


>
>
> > -     /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
> >  out:
> > -     put_uprobe(uprobe);
> > +     /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
> > +     srcu_read_unlock(&uprobes_srcu, srcu_idx);
> >  }
> >
> >  /*
>
> --
> BR
> Liao, Chang
Liao, Chang Aug. 2, 2024, 1:30 a.m. UTC | #3
在 2024/8/2 0:49, Andrii Nakryiko 写道:
> On Thu, Aug 1, 2024 at 5:23 AM Liao, Chang <liaochang1@huawei.com> wrote:
>>
>>
>>
>> 在 2024/8/1 5:42, Andrii Nakryiko 写道:
>>> To avoid unnecessarily taking a (brief) refcount on uprobe during
>>> breakpoint handling in handle_swbp for entry uprobes, make find_uprobe()
>>> not take refcount, but protect the lifetime of a uprobe instance with
>>> RCU. This improves scalability, as refcount gets quite expensive due to
>>> cache line bouncing between multiple CPUs.
>>>
>>> Specifically, we utilize our own uprobe-specific SRCU instance for this
>>> RCU protection. put_uprobe() will delay actual kfree() using call_srcu().
>>>
>>> For now, uretprobe and single-stepping handling will still acquire
>>> refcount as necessary. We'll address these issues in follow up patches
>>> by making them use SRCU with timeout.
>>>
>>> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
>>> ---
>>>  kernel/events/uprobes.c | 93 ++++++++++++++++++++++++-----------------
>>>  1 file changed, 55 insertions(+), 38 deletions(-)
>>>
> 
> [...]
> 
>>>
>>> @@ -2258,12 +2275,12 @@ static void handle_swbp(struct pt_regs *regs)
>>>       if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
>>>               goto out;
>>>
>>> -     if (!pre_ssout(uprobe, regs, bp_vaddr))
>>> -             return;
>>> +     if (pre_ssout(uprobe, regs, bp_vaddr))
>>> +             goto out;
>>>
>>
>> Regardless what pre_ssout() returns, it always reach the label 'out', so the
>> if block is unnecessary.
> 
> yep, I know, but I felt like
> 
> if (something is wrong)
>     goto out;
> 
> pattern was important to keep for each possible failing step for consistency.
> 
> so unless this is a big deal, I'd keep it as is, as in the future
> there might be some other steps after pre_ssout() before returning, so
> this is a bit more "composable"
> 
OK, I would say this conditional-check pattern is likely to be optimized away by
modern compiler.

Thanks.

> 
>>
>>
>>> -     /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
>>>  out:
>>> -     put_uprobe(uprobe);
>>> +     /* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
>>> +     srcu_read_unlock(&uprobes_srcu, srcu_idx);
>>>  }
>>>
>>>  /*
>>
>> --
>> BR
>> Liao, Chang
Oleg Nesterov Aug. 5, 2024, 2:51 p.m. UTC | #4
LGTM, just a few notes...

On 07/31, Andrii Nakryiko wrote:
>
> @@ -59,6 +61,7 @@ struct uprobe {
>  	struct list_head	pending_list;
>  	struct uprobe_consumer	*consumers;
>  	struct inode		*inode;		/* Also hold a ref to inode */
> +	struct rcu_head		rcu;

you can probably put the new member into the union with, say, rb_node.

> @@ -1945,9 +1950,14 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
>  	if (!utask)
>  		return -ENOMEM;
>
> +	if (!try_get_uprobe(uprobe))
> +		return -EINVAL;
> +

a bit off-topic right now, but it seems that we can simply kill
utask->active_uprobe. We can turn into into "bool has_active_uprobe"
and copy uprobe->arch into uprobe_task. Lets discuss this later.

> @@ -2201,13 +2215,15 @@ static void handle_swbp(struct pt_regs *regs)
>  {
>  	struct uprobe *uprobe;
>  	unsigned long bp_vaddr;
> -	int is_swbp;
> +	int is_swbp, srcu_idx;
>
>  	bp_vaddr = uprobe_get_swbp_addr(regs);
>  	if (bp_vaddr == uprobe_get_trampoline_vaddr())
>  		return uprobe_handle_trampoline(regs);
>
> -	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
> +	srcu_idx = srcu_read_lock(&uprobes_srcu);
> +
> +	uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
>  	if (!uprobe) {
>  		if (is_swbp > 0) {
>  			/* No matching uprobe; signal SIGTRAP. */
> @@ -2223,6 +2239,7 @@ static void handle_swbp(struct pt_regs *regs)
>  			 */
>  			instruction_pointer_set(regs, bp_vaddr);
>  		}
> +		srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  		return;

Why not
		goto out;

?

Oleg.
Andrii Nakryiko Aug. 5, 2024, 5:31 p.m. UTC | #5
On Mon, Aug 5, 2024 at 7:52 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> LGTM, just a few notes...
>
> On 07/31, Andrii Nakryiko wrote:
> >
> > @@ -59,6 +61,7 @@ struct uprobe {
> >       struct list_head        pending_list;
> >       struct uprobe_consumer  *consumers;
> >       struct inode            *inode;         /* Also hold a ref to inode */
> > +     struct rcu_head         rcu;
>
> you can probably put the new member into the union with, say, rb_node.

yep, good point, will do

>
> > @@ -1945,9 +1950,14 @@ pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
> >       if (!utask)
> >               return -ENOMEM;
> >
> > +     if (!try_get_uprobe(uprobe))
> > +             return -EINVAL;
> > +
>
> a bit off-topic right now, but it seems that we can simply kill
> utask->active_uprobe. We can turn into into "bool has_active_uprobe"
> and copy uprobe->arch into uprobe_task. Lets discuss this later.

I'm going to change this active_uprobe thing to be either refcounted
or SRCU-protected (but with timeout), so I'll need a bit more
structure around this. Let's see how that lands and if we still can
get rid of it, we can discuss.

>
> > @@ -2201,13 +2215,15 @@ static void handle_swbp(struct pt_regs *regs)
> >  {
> >       struct uprobe *uprobe;
> >       unsigned long bp_vaddr;
> > -     int is_swbp;
> > +     int is_swbp, srcu_idx;
> >
> >       bp_vaddr = uprobe_get_swbp_addr(regs);
> >       if (bp_vaddr == uprobe_get_trampoline_vaddr())
> >               return uprobe_handle_trampoline(regs);
> >
> > -     uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
> > +     srcu_idx = srcu_read_lock(&uprobes_srcu);
> > +
> > +     uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
> >       if (!uprobe) {
> >               if (is_swbp > 0) {
> >                       /* No matching uprobe; signal SIGTRAP. */
> > @@ -2223,6 +2239,7 @@ static void handle_swbp(struct pt_regs *regs)
> >                        */
> >                       instruction_pointer_set(regs, bp_vaddr);
> >               }
> > +             srcu_read_unlock(&uprobes_srcu, srcu_idx);
> >               return;
>
> Why not
>                 goto out;
>
> ?
>

Good point, can be goto out, will change.

> Oleg.
>
diff mbox series

Patch

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 23dde3ec5b09..6d5c3f4b210f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -41,6 +41,8 @@  static struct rb_root uprobes_tree = RB_ROOT;
 
 static DEFINE_RWLOCK(uprobes_treelock);	/* serialize rbtree access */
 
+DEFINE_STATIC_SRCU(uprobes_srcu);
+
 #define UPROBES_HASH_SZ	13
 /* serialize uprobe->pending_list */
 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@@ -59,6 +61,7 @@  struct uprobe {
 	struct list_head	pending_list;
 	struct uprobe_consumer	*consumers;
 	struct inode		*inode;		/* Also hold a ref to inode */
+	struct rcu_head		rcu;
 	loff_t			offset;
 	loff_t			ref_ctr_offset;
 	unsigned long		flags;
@@ -612,6 +615,13 @@  static inline bool uprobe_is_active(struct uprobe *uprobe)
 	return !RB_EMPTY_NODE(&uprobe->rb_node);
 }
 
+static void uprobe_free_rcu(struct rcu_head *rcu)
+{
+	struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu);
+
+	kfree(uprobe);
+}
+
 static void put_uprobe(struct uprobe *uprobe)
 {
 	if (!refcount_dec_and_test(&uprobe->ref))
@@ -632,6 +642,8 @@  static void put_uprobe(struct uprobe *uprobe)
 	mutex_lock(&delayed_uprobe_lock);
 	delayed_uprobe_remove(uprobe, NULL);
 	mutex_unlock(&delayed_uprobe_lock);
+
+	call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
 }
 
 static __always_inline
@@ -673,33 +685,25 @@  static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
 	return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
 }
 
-static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
+/*
+ * Assumes being inside RCU protected region.
+ * No refcount is taken on returned uprobe.
+ */
+static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
 {
 	struct __uprobe_key key = {
 		.inode = inode,
 		.offset = offset,
 	};
-	struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
-
-	if (node)
-		return try_get_uprobe(__node_2_uprobe(node));
+	struct rb_node *node;
 
-	return NULL;
-}
-
-/*
- * Find a uprobe corresponding to a given inode:offset
- * Acquires uprobes_treelock
- */
-static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
-{
-	struct uprobe *uprobe;
+	lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
 
 	read_lock(&uprobes_treelock);
-	uprobe = __find_uprobe(inode, offset);
+	node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
 	read_unlock(&uprobes_treelock);
 
-	return uprobe;
+	return node ? __node_2_uprobe(node) : NULL;
 }
 
 /*
@@ -1073,10 +1077,10 @@  register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
 			goto free;
 		/*
 		 * We take mmap_lock for writing to avoid the race with
-		 * find_active_uprobe() which takes mmap_lock for reading.
+		 * find_active_uprobe_rcu() which takes mmap_lock for reading.
 		 * Thus this install_breakpoint() can not make
-		 * is_trap_at_addr() true right after find_uprobe()
-		 * returns NULL in find_active_uprobe().
+		 * is_trap_at_addr() true right after find_uprobe_rcu()
+		 * returns NULL in find_active_uprobe_rcu().
 		 */
 		mmap_write_lock(mm);
 		vma = find_vma(mm, info->vaddr);
@@ -1885,9 +1889,13 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 		return;
 	}
 
+	/* we need to bump refcount to store uprobe in utask */
+	if (!try_get_uprobe(uprobe))
+		return;
+
 	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
 	if (!ri)
-		return;
+		goto fail;
 
 	trampoline_vaddr = uprobe_get_trampoline_vaddr();
 	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
@@ -1914,11 +1922,7 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 		}
 		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
 	}
-	 /*
-	  * uprobe's refcnt is positive, held by caller, so it's safe to
-	  * unconditionally bump it one more time here
-	  */
-	ri->uprobe = get_uprobe(uprobe);
+	ri->uprobe = uprobe;
 	ri->func = instruction_pointer(regs);
 	ri->stack = user_stack_pointer(regs);
 	ri->orig_ret_vaddr = orig_ret_vaddr;
@@ -1929,8 +1933,9 @@  static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
 	utask->return_instances = ri;
 
 	return;
- fail:
+fail:
 	kfree(ri);
+	put_uprobe(uprobe);
 }
 
 /* Prepare to single-step probed instruction out of line. */
@@ -1945,9 +1950,14 @@  pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
 	if (!utask)
 		return -ENOMEM;
 
+	if (!try_get_uprobe(uprobe))
+		return -EINVAL;
+
 	xol_vaddr = xol_get_insn_slot(uprobe);
-	if (!xol_vaddr)
-		return -ENOMEM;
+	if (!xol_vaddr) {
+		err = -ENOMEM;
+		goto err_out;
+	}
 
 	utask->xol_vaddr = xol_vaddr;
 	utask->vaddr = bp_vaddr;
@@ -1955,12 +1965,15 @@  pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
 	err = arch_uprobe_pre_xol(&uprobe->arch, regs);
 	if (unlikely(err)) {
 		xol_free_insn_slot(current);
-		return err;
+		goto err_out;
 	}
 
 	utask->active_uprobe = uprobe;
 	utask->state = UTASK_SSTEP;
 	return 0;
+err_out:
+	put_uprobe(uprobe);
+	return err;
 }
 
 /*
@@ -2044,7 +2057,8 @@  static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	return is_trap_insn(&opcode);
 }
 
-static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
+/* assumes being inside RCU protected region */
+static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swbp)
 {
 	struct mm_struct *mm = current->mm;
 	struct uprobe *uprobe = NULL;
@@ -2057,7 +2071,7 @@  static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
 			struct inode *inode = file_inode(vma->vm_file);
 			loff_t offset = vaddr_to_offset(vma, bp_vaddr);
 
-			uprobe = find_uprobe(inode, offset);
+			uprobe = find_uprobe_rcu(inode, offset);
 		}
 
 		if (!uprobe)
@@ -2201,13 +2215,15 @@  static void handle_swbp(struct pt_regs *regs)
 {
 	struct uprobe *uprobe;
 	unsigned long bp_vaddr;
-	int is_swbp;
+	int is_swbp, srcu_idx;
 
 	bp_vaddr = uprobe_get_swbp_addr(regs);
 	if (bp_vaddr == uprobe_get_trampoline_vaddr())
 		return uprobe_handle_trampoline(regs);
 
-	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
+	srcu_idx = srcu_read_lock(&uprobes_srcu);
+
+	uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
 	if (!uprobe) {
 		if (is_swbp > 0) {
 			/* No matching uprobe; signal SIGTRAP. */
@@ -2223,6 +2239,7 @@  static void handle_swbp(struct pt_regs *regs)
 			 */
 			instruction_pointer_set(regs, bp_vaddr);
 		}
+		srcu_read_unlock(&uprobes_srcu, srcu_idx);
 		return;
 	}
 
@@ -2258,12 +2275,12 @@  static void handle_swbp(struct pt_regs *regs)
 	if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
 		goto out;
 
-	if (!pre_ssout(uprobe, regs, bp_vaddr))
-		return;
+	if (pre_ssout(uprobe, regs, bp_vaddr))
+		goto out;
 
-	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
 out:
-	put_uprobe(uprobe);
+	/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
+	srcu_read_unlock(&uprobes_srcu, srcu_idx);
 }
 
 /*