diff mbox series

[bpf-next,05/13] uprobes: Add mapping for optimized uprobe trampolines

Message ID 20241211133403.208920-6-jolsa@kernel.org (mailing list archive)
State New
Headers show
Series uprobes: Add support to optimize usdt probes on x86_64 | expand

Commit Message

Jiri Olsa Dec. 11, 2024, 1:33 p.m. UTC
Adding support to add special mapping for for user space trampoline
with following functions:

  uprobe_trampoline_get - find or add related uprobe_trampoline
  uprobe_trampoline_put - remove ref or destroy uprobe_trampoline

The user space trampoline is exported as architecture specific user space
special mapping, which is provided by arch_uprobe_trampoline_mapping
function.

The uprobe trampoline needs to be callable/reachable from the probe address,
so while searching for available address we use arch_uprobe_is_callable
function to decide if the uprobe trampoline is callable from the probe address.

All uprobe_trampoline objects are stored in uprobes_state object and
are cleaned up when the process mm_struct goes down.

Locking is provided by callers in following changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/uprobes.h |  12 +++++
 kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c           |   1 +
 3 files changed, 127 insertions(+)

Comments

Andrii Nakryiko Dec. 13, 2024, 1:01 a.m. UTC | #1
On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding support to add special mapping for for user space trampoline

typo: for for

> with following functions:
>
>   uprobe_trampoline_get - find or add related uprobe_trampoline
>   uprobe_trampoline_put - remove ref or destroy uprobe_trampoline
>
> The user space trampoline is exported as architecture specific user space
> special mapping, which is provided by arch_uprobe_trampoline_mapping
> function.
>
> The uprobe trampoline needs to be callable/reachable from the probe address,
> so while searching for available address we use arch_uprobe_is_callable
> function to decide if the uprobe trampoline is callable from the probe address.
>
> All uprobe_trampoline objects are stored in uprobes_state object and
> are cleaned up when the process mm_struct goes down.
>
> Locking is provided by callers in following changes.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/uprobes.h |  12 +++++
>  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
>  kernel/fork.c           |   1 +
>  3 files changed, 127 insertions(+)
>

Ran out of time for today, will continue tomorrow for the rest of
patches. Some comments below.

The numbers are really encouraging, though!

> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index 8843b7f99ed0..c4ee755ca2a1 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -16,6 +16,7 @@
>  #include <linux/types.h>
>  #include <linux/wait.h>
>  #include <linux/timer.h>
> +#include <linux/mutex.h>
>
>  struct uprobe;
>  struct vm_area_struct;
> @@ -172,6 +173,13 @@ struct xol_area;
>
>  struct uprobes_state {
>         struct xol_area         *xol_area;
> +       struct hlist_head       tramp_head;
> +};
> +

should we make uprobe_state be linked by a pointer from mm_struct
instead of increasing mm for each added field? right now it's
embedded, I don't think it's problematic to allocate it on demand and
keep it until mm_struct is freed

> +struct uprobe_trampoline {
> +       struct hlist_node       node;
> +       unsigned long           vaddr;
> +       atomic64_t              ref;
>  };
>
>  extern void __init uprobes_init(void);
> @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
>                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
>                                      int nbytes);
>  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
>  #else /* !CONFIG_UPROBES */
>  struct uprobes_state {
>  };
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 8068f91de9e3..f57918c624da 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
>                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
>  }
>
> +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)

bikeshedding some more, I still find "is_callable" confusing. How
about "is_reachable_by_call"? slightly verbose, but probably more
meaningful?

> +{
> +       return false;
> +}
> +
> +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> +{
> +       return NULL;
> +}
> +
> +static unsigned long find_nearest_page(unsigned long vaddr)
> +{
> +       struct mm_struct *mm = current->mm;
> +       struct vm_area_struct *vma, *prev;
> +       VMA_ITERATOR(vmi, mm, 0);
> +
> +       prev = vma_next(&vmi);

minor: we are missing an opportunity to add something between
[PAGE_SIZE, <first_vma_start>). Probably fine, but why not?

> +       vma = vma_next(&vmi);
> +       while (vma) {
> +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> +                               return prev->vm_end;
> +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> +                               return vma->vm_start - PAGE_SIZE;
> +               }
> +
> +               prev = vma;
> +               vma = vma_next(&vmi);
> +       }
> +
> +       return 0;
> +}
> +

[...]

> +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> +{
> +       struct uprobes_state *state = &current->mm->uprobes_state;
> +       struct uprobe_trampoline *tramp = NULL;
> +
> +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> +                       atomic64_inc(&tramp->ref);
> +                       return tramp;
> +               }
> +       }
> +
> +       tramp = create_uprobe_trampoline(vaddr);
> +       if (!tramp)
> +               return NULL;
> +
> +       hlist_add_head(&tramp->node, &state->tramp_head);
> +       return tramp;
> +}
> +
> +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> +{
> +       hlist_del(&tramp->node);
> +       kfree(tramp);

hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
otherwise we might have some CPU executing code in that trampoline,
no?

> +}
> +

[...]
Jiri Olsa Dec. 13, 2024, 1:42 p.m. UTC | #2
On Thu, Dec 12, 2024 at 05:01:52PM -0800, Andrii Nakryiko wrote:

SNIP

> > ---
> >  include/linux/uprobes.h |  12 +++++
> >  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
> >  kernel/fork.c           |   1 +
> >  3 files changed, 127 insertions(+)
> >
> 
> Ran out of time for today, will continue tomorrow for the rest of
> patches. Some comments below.

thanks!

> 
> The numbers are really encouraging, though!
> 
> > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > index 8843b7f99ed0..c4ee755ca2a1 100644
> > --- a/include/linux/uprobes.h
> > +++ b/include/linux/uprobes.h
> > @@ -16,6 +16,7 @@
> >  #include <linux/types.h>
> >  #include <linux/wait.h>
> >  #include <linux/timer.h>
> > +#include <linux/mutex.h>
> >
> >  struct uprobe;
> >  struct vm_area_struct;
> > @@ -172,6 +173,13 @@ struct xol_area;
> >
> >  struct uprobes_state {
> >         struct xol_area         *xol_area;
> > +       struct hlist_head       tramp_head;
> > +};
> > +
> 
> should we make uprobe_state be linked by a pointer from mm_struct
> instead of increasing mm for each added field? right now it's
> embedded, I don't think it's problematic to allocate it on demand and
> keep it until mm_struct is freed

seems like good idea, I'll check on that

> 
> > +struct uprobe_trampoline {
> > +       struct hlist_node       node;
> > +       unsigned long           vaddr;
> > +       atomic64_t              ref;
> >  };
> >
> >  extern void __init uprobes_init(void);
> > @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
> >                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
> >                                      int nbytes);
> >  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> > +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> > +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> > +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> > +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
> >  #else /* !CONFIG_UPROBES */
> >  struct uprobes_state {
> >  };
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 8068f91de9e3..f57918c624da 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
> >                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
> >  }
> >
> > +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
> 
> bikeshedding some more, I still find "is_callable" confusing. How
> about "is_reachable_by_call"? slightly verbose, but probably more
> meaningful?

yep, more precise, will change

> 
> > +{
> > +       return false;
> > +}
> > +
> > +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> > +{
> > +       return NULL;
> > +}
> > +
> > +static unsigned long find_nearest_page(unsigned long vaddr)
> > +{
> > +       struct mm_struct *mm = current->mm;
> > +       struct vm_area_struct *vma, *prev;
> > +       VMA_ITERATOR(vmi, mm, 0);
> > +
> > +       prev = vma_next(&vmi);
> 
> minor: we are missing an opportunity to add something between
> [PAGE_SIZE, <first_vma_start>). Probably fine, but why not?

true, will add that check

> 
> > +       vma = vma_next(&vmi);
> > +       while (vma) {
> > +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> > +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> > +                               return prev->vm_end;
> > +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> > +                               return vma->vm_start - PAGE_SIZE;
> > +               }
> > +
> > +               prev = vma;
> > +               vma = vma_next(&vmi);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> 
> [...]
> 
> > +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> > +{
> > +       struct uprobes_state *state = &current->mm->uprobes_state;
> > +       struct uprobe_trampoline *tramp = NULL;
> > +
> > +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> > +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> > +                       atomic64_inc(&tramp->ref);
> > +                       return tramp;
> > +               }
> > +       }
> > +
> > +       tramp = create_uprobe_trampoline(vaddr);
> > +       if (!tramp)
> > +               return NULL;
> > +
> > +       hlist_add_head(&tramp->node, &state->tramp_head);
> > +       return tramp;
> > +}
> > +
> > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> > +{
> > +       hlist_del(&tramp->node);
> > +       kfree(tramp);
> 
> hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
> otherwise we might have some CPU executing code in that trampoline,
> no?

so we call destroy_uprobe_trampoline in 2 scenarios:

  - from uprobe_trampoline_put (in __arch_uprobe_optimize) when we failed
    to optimize the uprobe, so no task can execute it at that point

  - from clear_tramp_head as part of the uprobe trampolines cleanup
    (__mmput -> uprobe_clear_state) at which point the task should be dead

jirka

> 
> > +}
> > +
> 
> [...]
Andrii Nakryiko Dec. 13, 2024, 9:58 p.m. UTC | #3
On Fri, Dec 13, 2024 at 5:42 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Dec 12, 2024 at 05:01:52PM -0800, Andrii Nakryiko wrote:
>
> SNIP
>
> > > ---
> > >  include/linux/uprobes.h |  12 +++++
> > >  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
> > >  kernel/fork.c           |   1 +
> > >  3 files changed, 127 insertions(+)
> > >
> >
> > Ran out of time for today, will continue tomorrow for the rest of
> > patches. Some comments below.
>
> thanks!
>
> >
> > The numbers are really encouraging, though!
> >
> > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > > index 8843b7f99ed0..c4ee755ca2a1 100644
> > > --- a/include/linux/uprobes.h
> > > +++ b/include/linux/uprobes.h
> > > @@ -16,6 +16,7 @@
> > >  #include <linux/types.h>
> > >  #include <linux/wait.h>
> > >  #include <linux/timer.h>
> > > +#include <linux/mutex.h>
> > >
> > >  struct uprobe;
> > >  struct vm_area_struct;
> > > @@ -172,6 +173,13 @@ struct xol_area;
> > >
> > >  struct uprobes_state {
> > >         struct xol_area         *xol_area;
> > > +       struct hlist_head       tramp_head;
> > > +};
> > > +
> >
> > should we make uprobe_state be linked by a pointer from mm_struct
> > instead of increasing mm for each added field? right now it's
> > embedded, I don't think it's problematic to allocate it on demand and
> > keep it until mm_struct is freed
>
> seems like good idea, I'll check on that
>
> >
> > > +struct uprobe_trampoline {
> > > +       struct hlist_node       node;
> > > +       unsigned long           vaddr;
> > > +       atomic64_t              ref;
> > >  };
> > >
> > >  extern void __init uprobes_init(void);
> > > @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
> > >                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
> > >                                      int nbytes);
> > >  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> > > +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> > > +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> > > +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> > > +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
> > >  #else /* !CONFIG_UPROBES */
> > >  struct uprobes_state {
> > >  };
> > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > > index 8068f91de9e3..f57918c624da 100644
> > > --- a/kernel/events/uprobes.c
> > > +++ b/kernel/events/uprobes.c
> > > @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
> > >                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
> > >  }
> > >
> > > +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
> >
> > bikeshedding some more, I still find "is_callable" confusing. How
> > about "is_reachable_by_call"? slightly verbose, but probably more
> > meaningful?
>
> yep, more precise, will change
>
> >
> > > +{
> > > +       return false;
> > > +}
> > > +
> > > +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> > > +{
> > > +       return NULL;
> > > +}
> > > +
> > > +static unsigned long find_nearest_page(unsigned long vaddr)
> > > +{
> > > +       struct mm_struct *mm = current->mm;
> > > +       struct vm_area_struct *vma, *prev;
> > > +       VMA_ITERATOR(vmi, mm, 0);
> > > +
> > > +       prev = vma_next(&vmi);
> >
> > minor: we are missing an opportunity to add something between
> > [PAGE_SIZE, <first_vma_start>). Probably fine, but why not?
>
> true, will add that check
>
> >
> > > +       vma = vma_next(&vmi);
> > > +       while (vma) {
> > > +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> > > +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> > > +                               return prev->vm_end;
> > > +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> > > +                               return vma->vm_start - PAGE_SIZE;
> > > +               }
> > > +
> > > +               prev = vma;
> > > +               vma = vma_next(&vmi);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +
> >
> > [...]
> >
> > > +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> > > +{
> > > +       struct uprobes_state *state = &current->mm->uprobes_state;
> > > +       struct uprobe_trampoline *tramp = NULL;
> > > +
> > > +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> > > +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> > > +                       atomic64_inc(&tramp->ref);
> > > +                       return tramp;
> > > +               }
> > > +       }
> > > +
> > > +       tramp = create_uprobe_trampoline(vaddr);
> > > +       if (!tramp)
> > > +               return NULL;
> > > +
> > > +       hlist_add_head(&tramp->node, &state->tramp_head);
> > > +       return tramp;
> > > +}
> > > +
> > > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> > > +{
> > > +       hlist_del(&tramp->node);
> > > +       kfree(tramp);
> >
> > hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
> > otherwise we might have some CPU executing code in that trampoline,
> > no?
>
> so we call destroy_uprobe_trampoline in 2 scenarios:
>
>   - from uprobe_trampoline_put (in __arch_uprobe_optimize) when we failed
>     to optimize the uprobe, so no task can execute it at that point
>
>   - from clear_tramp_head as part of the uprobe trampolines cleanup
>     (__mmput -> uprobe_clear_state) at which point the task should be dead

makes sense, I've been overcautious

>
> jirka
>
> >
> > > +}
> > > +
> >
> > [...]
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 8843b7f99ed0..c4ee755ca2a1 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -16,6 +16,7 @@ 
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
+#include <linux/mutex.h>
 
 struct uprobe;
 struct vm_area_struct;
@@ -172,6 +173,13 @@  struct xol_area;
 
 struct uprobes_state {
 	struct xol_area		*xol_area;
+	struct hlist_head	tramp_head;
+};
+
+struct uprobe_trampoline {
+	struct hlist_node	node;
+	unsigned long		vaddr;
+	atomic64_t		ref;
 };
 
 extern void __init uprobes_init(void);
@@ -220,6 +228,10 @@  extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
 				     unsigned long vaddr, uprobe_opcode_t *new_opcode,
 				     int nbytes);
 extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
+extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
+extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
+extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
+extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8068f91de9e3..f57918c624da 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -615,6 +615,118 @@  set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
 			(uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
 }
 
+bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
+{
+	return false;
+}
+
+const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
+{
+	return NULL;
+}
+
+static unsigned long find_nearest_page(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+	VMA_ITERATOR(vmi, mm, 0);
+
+	prev = vma_next(&vmi);
+	vma = vma_next(&vmi);
+	while (vma) {
+		if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
+			if (arch_uprobe_is_callable(prev->vm_end, vaddr))
+				return prev->vm_end;
+			if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
+				return vma->vm_start - PAGE_SIZE;
+		}
+
+		prev = vma;
+		vma = vma_next(&vmi);
+	}
+
+	return 0;
+}
+
+static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
+{
+	const struct vm_special_mapping *mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct uprobe_trampoline *tramp;
+
+	mapping = arch_uprobe_trampoline_mapping();
+	if (!mapping)
+		return NULL;
+
+	vaddr = find_nearest_page(vaddr);
+	if (!vaddr)
+		return NULL;
+
+	tramp = kzalloc(sizeof(*tramp), GFP_KERNEL);
+	if (unlikely(!tramp))
+		return NULL;
+
+	atomic64_set(&tramp->ref, 1);
+	tramp->vaddr = vaddr;
+
+	vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE,
+				VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+				mapping);
+	if (IS_ERR(vma))
+		goto free_area;
+	return tramp;
+
+ free_area:
+	kfree(tramp);
+	return NULL;
+}
+
+struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
+{
+	struct uprobes_state *state = &current->mm->uprobes_state;
+	struct uprobe_trampoline *tramp = NULL;
+
+	hlist_for_each_entry(tramp, &state->tramp_head, node) {
+		if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
+			atomic64_inc(&tramp->ref);
+			return tramp;
+		}
+	}
+
+	tramp = create_uprobe_trampoline(vaddr);
+	if (!tramp)
+		return NULL;
+
+	hlist_add_head(&tramp->node, &state->tramp_head);
+	return tramp;
+}
+
+static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
+{
+	hlist_del(&tramp->node);
+	kfree(tramp);
+}
+
+void uprobe_trampoline_put(struct uprobe_trampoline *tramp)
+{
+	if (tramp == NULL)
+		return;
+
+	if (atomic64_dec_and_test(&tramp->ref))
+		destroy_uprobe_trampoline(tramp);
+}
+
+static void clear_tramp_head(struct mm_struct *mm)
+{
+	struct uprobes_state *state = &mm->uprobes_state;
+	struct uprobe_trampoline *tramp;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(tramp, n, &state->tramp_head, node)
+		destroy_uprobe_trampoline(tramp);
+}
+
 /* uprobe should have guaranteed positive refcount */
 static struct uprobe *get_uprobe(struct uprobe *uprobe)
 {
@@ -1787,6 +1899,8 @@  void uprobe_clear_state(struct mm_struct *mm)
 	delayed_uprobe_remove(NULL, mm);
 	mutex_unlock(&delayed_uprobe_lock);
 
+	clear_tramp_head(mm);
+
 	if (!area)
 		return;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 1450b461d196..b734a172fd6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1254,6 +1254,7 @@  static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
 	mm->uprobes_state.xol_area = NULL;
+	INIT_HLIST_HEAD(&mm->uprobes_state.tramp_head);
 #endif
 }