diff mbox series

[bpf-next,05/13] uprobes: Add mapping for optimized uprobe trampolines

Message ID 20241211133403.208920-6-jolsa@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series uprobes: Add support to optimize usdt probes on x86_64 | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 568 this patch: 568
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 10 maintainers not CCed: alexander.shishkin@linux.intel.com mark.rutland@arm.com adrian.hunter@intel.com mingo@redhat.com namhyung@kernel.org acme@kernel.org irogers@google.com linux-perf-users@vger.kernel.org brauner@kernel.org kan.liang@linux.intel.com
netdev/build_clang success Errors and warnings before: 980 this patch: 980
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15902 this patch: 15902
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Comparison to NULL could be written "!tramp" CHECK: extern prototypes should be avoided in .h files CHECK: spaces preferred around that '|' (ctx:VxV) WARNING: Missing a blank line after declarations WARNING: Possible repeated word: 'for' WARNING: line length of 86 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 12 this patch: 12
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Jiri Olsa Dec. 11, 2024, 1:33 p.m. UTC
Adding support to add special mapping for for user space trampoline
with following functions:

  uprobe_trampoline_get - find or add related uprobe_trampoline
  uprobe_trampoline_put - remove ref or destroy uprobe_trampoline

The user space trampoline is exported as architecture specific user space
special mapping, which is provided by arch_uprobe_trampoline_mapping
function.

The uprobe trampoline needs to be callable/reachable from the probe address,
so while searching for available address we use arch_uprobe_is_callable
function to decide if the uprobe trampoline is callable from the probe address.

All uprobe_trampoline objects are stored in uprobes_state object and
are cleaned up when the process mm_struct goes down.

Locking is provided by callers in following changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/uprobes.h |  12 +++++
 kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c           |   1 +
 3 files changed, 127 insertions(+)

Comments

Andrii Nakryiko Dec. 13, 2024, 1:01 a.m. UTC | #1
On Wed, Dec 11, 2024 at 5:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding support to add special mapping for for user space trampoline

typo: for for

> with following functions:
>
>   uprobe_trampoline_get - find or add related uprobe_trampoline
>   uprobe_trampoline_put - remove ref or destroy uprobe_trampoline
>
> The user space trampoline is exported as architecture specific user space
> special mapping, which is provided by arch_uprobe_trampoline_mapping
> function.
>
> The uprobe trampoline needs to be callable/reachable from the probe address,
> so while searching for available address we use arch_uprobe_is_callable
> function to decide if the uprobe trampoline is callable from the probe address.
>
> All uprobe_trampoline objects are stored in uprobes_state object and
> are cleaned up when the process mm_struct goes down.
>
> Locking is provided by callers in following changes.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  include/linux/uprobes.h |  12 +++++
>  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
>  kernel/fork.c           |   1 +
>  3 files changed, 127 insertions(+)
>

Ran out of time for today, will continue tomorrow for the rest of
patches. Some comments below.

The numbers are really encouraging, though!

> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index 8843b7f99ed0..c4ee755ca2a1 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -16,6 +16,7 @@
>  #include <linux/types.h>
>  #include <linux/wait.h>
>  #include <linux/timer.h>
> +#include <linux/mutex.h>
>
>  struct uprobe;
>  struct vm_area_struct;
> @@ -172,6 +173,13 @@ struct xol_area;
>
>  struct uprobes_state {
>         struct xol_area         *xol_area;
> +       struct hlist_head       tramp_head;
> +};
> +

should we make uprobe_state be linked by a pointer from mm_struct
instead of increasing mm for each added field? right now it's
embedded, I don't think it's problematic to allocate it on demand and
keep it until mm_struct is freed

> +struct uprobe_trampoline {
> +       struct hlist_node       node;
> +       unsigned long           vaddr;
> +       atomic64_t              ref;
>  };
>
>  extern void __init uprobes_init(void);
> @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
>                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
>                                      int nbytes);
>  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
>  #else /* !CONFIG_UPROBES */
>  struct uprobes_state {
>  };
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 8068f91de9e3..f57918c624da 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
>                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
>  }
>
> +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)

bikeshedding some more, I still find "is_callable" confusing. How
about "is_reachable_by_call"? slightly verbose, but probably more
meaningful?

> +{
> +       return false;
> +}
> +
> +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> +{
> +       return NULL;
> +}
> +
> +static unsigned long find_nearest_page(unsigned long vaddr)
> +{
> +       struct mm_struct *mm = current->mm;
> +       struct vm_area_struct *vma, *prev;
> +       VMA_ITERATOR(vmi, mm, 0);
> +
> +       prev = vma_next(&vmi);

minor: we are missing an opportunity to add something between
[PAGE_SIZE, <first_vma_start>). Probably fine, but why not?

> +       vma = vma_next(&vmi);
> +       while (vma) {
> +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> +                               return prev->vm_end;
> +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> +                               return vma->vm_start - PAGE_SIZE;
> +               }
> +
> +               prev = vma;
> +               vma = vma_next(&vmi);
> +       }
> +
> +       return 0;
> +}
> +

[...]

> +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> +{
> +       struct uprobes_state *state = &current->mm->uprobes_state;
> +       struct uprobe_trampoline *tramp = NULL;
> +
> +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> +                       atomic64_inc(&tramp->ref);
> +                       return tramp;
> +               }
> +       }
> +
> +       tramp = create_uprobe_trampoline(vaddr);
> +       if (!tramp)
> +               return NULL;
> +
> +       hlist_add_head(&tramp->node, &state->tramp_head);
> +       return tramp;
> +}
> +
> +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> +{
> +       hlist_del(&tramp->node);
> +       kfree(tramp);

hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
otherwise we might have some CPU executing code in that trampoline,
no?

> +}
> +

[...]
Jiri Olsa Dec. 13, 2024, 1:42 p.m. UTC | #2
On Thu, Dec 12, 2024 at 05:01:52PM -0800, Andrii Nakryiko wrote:

SNIP

> > ---
> >  include/linux/uprobes.h |  12 +++++
> >  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
> >  kernel/fork.c           |   1 +
> >  3 files changed, 127 insertions(+)
> >
> 
> Ran out of time for today, will continue tomorrow for the rest of
> patches. Some comments below.

thanks!

> 
> The numbers are really encouraging, though!
> 
> > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > index 8843b7f99ed0..c4ee755ca2a1 100644
> > --- a/include/linux/uprobes.h
> > +++ b/include/linux/uprobes.h
> > @@ -16,6 +16,7 @@
> >  #include <linux/types.h>
> >  #include <linux/wait.h>
> >  #include <linux/timer.h>
> > +#include <linux/mutex.h>
> >
> >  struct uprobe;
> >  struct vm_area_struct;
> > @@ -172,6 +173,13 @@ struct xol_area;
> >
> >  struct uprobes_state {
> >         struct xol_area         *xol_area;
> > +       struct hlist_head       tramp_head;
> > +};
> > +
> 
> should we make uprobe_state be linked by a pointer from mm_struct
> instead of increasing mm for each added field? right now it's
> embedded, I don't think it's problematic to allocate it on demand and
> keep it until mm_struct is freed

seems like good idea, I'll check on that

> 
> > +struct uprobe_trampoline {
> > +       struct hlist_node       node;
> > +       unsigned long           vaddr;
> > +       atomic64_t              ref;
> >  };
> >
> >  extern void __init uprobes_init(void);
> > @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
> >                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
> >                                      int nbytes);
> >  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> > +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> > +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> > +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> > +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
> >  #else /* !CONFIG_UPROBES */
> >  struct uprobes_state {
> >  };
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 8068f91de9e3..f57918c624da 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
> >                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
> >  }
> >
> > +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
> 
> bikeshedding some more, I still find "is_callable" confusing. How
> about "is_reachable_by_call"? slightly verbose, but probably more
> meaningful?

yep, more precise, will change

> 
> > +{
> > +       return false;
> > +}
> > +
> > +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> > +{
> > +       return NULL;
> > +}
> > +
> > +static unsigned long find_nearest_page(unsigned long vaddr)
> > +{
> > +       struct mm_struct *mm = current->mm;
> > +       struct vm_area_struct *vma, *prev;
> > +       VMA_ITERATOR(vmi, mm, 0);
> > +
> > +       prev = vma_next(&vmi);
> 
> minor: we are missing an opportunity to add something between
> [PAGE_SIZE, <first_vma_start>). Probably fine, but why not?

true, will add that check

> 
> > +       vma = vma_next(&vmi);
> > +       while (vma) {
> > +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> > +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> > +                               return prev->vm_end;
> > +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> > +                               return vma->vm_start - PAGE_SIZE;
> > +               }
> > +
> > +               prev = vma;
> > +               vma = vma_next(&vmi);
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> 
> [...]
> 
> > +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> > +{
> > +       struct uprobes_state *state = &current->mm->uprobes_state;
> > +       struct uprobe_trampoline *tramp = NULL;
> > +
> > +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> > +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> > +                       atomic64_inc(&tramp->ref);
> > +                       return tramp;
> > +               }
> > +       }
> > +
> > +       tramp = create_uprobe_trampoline(vaddr);
> > +       if (!tramp)
> > +               return NULL;
> > +
> > +       hlist_add_head(&tramp->node, &state->tramp_head);
> > +       return tramp;
> > +}
> > +
> > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> > +{
> > +       hlist_del(&tramp->node);
> > +       kfree(tramp);
> 
> hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
> otherwise we might have some CPU executing code in that trampoline,
> no?

so we call destroy_uprobe_trampoline in 2 scenarios:

  - from uprobe_trampoline_put (in __arch_uprobe_optimize) when we failed
    to optimize the uprobe, so no task can execute it at that point

  - from clear_tramp_head as part of the uprobe trampolines cleanup
    (__mmput -> uprobe_clear_state) at which point the task should be dead

jirka

> 
> > +}
> > +
> 
> [...]
Andrii Nakryiko Dec. 13, 2024, 9:58 p.m. UTC | #3
On Fri, Dec 13, 2024 at 5:42 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Dec 12, 2024 at 05:01:52PM -0800, Andrii Nakryiko wrote:
>
> SNIP
>
> > > ---
> > >  include/linux/uprobes.h |  12 +++++
> > >  kernel/events/uprobes.c | 114 ++++++++++++++++++++++++++++++++++++++++
> > >  kernel/fork.c           |   1 +
> > >  3 files changed, 127 insertions(+)
> > >
> >
> > Ran out of time for today, will continue tomorrow for the rest of
> > patches. Some comments below.
>
> thanks!
>
> >
> > The numbers are really encouraging, though!
> >
> > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > > index 8843b7f99ed0..c4ee755ca2a1 100644
> > > --- a/include/linux/uprobes.h
> > > +++ b/include/linux/uprobes.h
> > > @@ -16,6 +16,7 @@
> > >  #include <linux/types.h>
> > >  #include <linux/wait.h>
> > >  #include <linux/timer.h>
> > > +#include <linux/mutex.h>
> > >
> > >  struct uprobe;
> > >  struct vm_area_struct;
> > > @@ -172,6 +173,13 @@ struct xol_area;
> > >
> > >  struct uprobes_state {
> > >         struct xol_area         *xol_area;
> > > +       struct hlist_head       tramp_head;
> > > +};
> > > +
> >
> > should we make uprobe_state be linked by a pointer from mm_struct
> > instead of increasing mm for each added field? right now it's
> > embedded, I don't think it's problematic to allocate it on demand and
> > keep it until mm_struct is freed
>
> seems like good idea, I'll check on that
>
> >
> > > +struct uprobe_trampoline {
> > > +       struct hlist_node       node;
> > > +       unsigned long           vaddr;
> > > +       atomic64_t              ref;
> > >  };
> > >
> > >  extern void __init uprobes_init(void);
> > > @@ -220,6 +228,10 @@ extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
> > >                                      unsigned long vaddr, uprobe_opcode_t *new_opcode,
> > >                                      int nbytes);
> > >  extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
> > > +extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
> > > +extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
> > > +extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
> > > +extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
> > >  #else /* !CONFIG_UPROBES */
> > >  struct uprobes_state {
> > >  };
> > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > > index 8068f91de9e3..f57918c624da 100644
> > > --- a/kernel/events/uprobes.c
> > > +++ b/kernel/events/uprobes.c
> > > @@ -615,6 +615,118 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
> > >                         (uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
> > >  }
> > >
> > > +bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
> >
> > bikeshedding some more, I still find "is_callable" confusing. How
> > about "is_reachable_by_call"? slightly verbose, but probably more
> > meaningful?
>
> yep, more precise, will change
>
> >
> > > +{
> > > +       return false;
> > > +}
> > > +
> > > +const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
> > > +{
> > > +       return NULL;
> > > +}
> > > +
> > > +static unsigned long find_nearest_page(unsigned long vaddr)
> > > +{
> > > +       struct mm_struct *mm = current->mm;
> > > +       struct vm_area_struct *vma, *prev;
> > > +       VMA_ITERATOR(vmi, mm, 0);
> > > +
> > > +       prev = vma_next(&vmi);
> >
> > minor: we are missing an opportunity to add something between
> > [PAGE_SIZE, <first_vma_start>). Probably fine, but why not?
>
> true, will add that check
>
> >
> > > +       vma = vma_next(&vmi);
> > > +       while (vma) {
> > > +               if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
> > > +                       if (arch_uprobe_is_callable(prev->vm_end, vaddr))
> > > +                               return prev->vm_end;
> > > +                       if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
> > > +                               return vma->vm_start - PAGE_SIZE;
> > > +               }
> > > +
> > > +               prev = vma;
> > > +               vma = vma_next(&vmi);
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> > > +
> >
> > [...]
> >
> > > +struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
> > > +{
> > > +       struct uprobes_state *state = &current->mm->uprobes_state;
> > > +       struct uprobe_trampoline *tramp = NULL;
> > > +
> > > +       hlist_for_each_entry(tramp, &state->tramp_head, node) {
> > > +               if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
> > > +                       atomic64_inc(&tramp->ref);
> > > +                       return tramp;
> > > +               }
> > > +       }
> > > +
> > > +       tramp = create_uprobe_trampoline(vaddr);
> > > +       if (!tramp)
> > > +               return NULL;
> > > +
> > > +       hlist_add_head(&tramp->node, &state->tramp_head);
> > > +       return tramp;
> > > +}
> > > +
> > > +static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
> > > +{
> > > +       hlist_del(&tramp->node);
> > > +       kfree(tramp);
> >
> > hmm... shouldn't this be RCU-delayed (RCU Tasks Trace for uprobes),
> > otherwise we might have some CPU executing code in that trampoline,
> > no?
>
> so we call destroy_uprobe_trampoline in 2 scenarios:
>
>   - from uprobe_trampoline_put (in __arch_uprobe_optimize) when we failed
>     to optimize the uprobe, so no task can execute it at that point
>
>   - from clear_tramp_head as part of the uprobe trampolines cleanup
>     (__mmput -> uprobe_clear_state) at which point the task should be dead

makes sense, I've been overcautious

>
> jirka
>
> >
> > > +}
> > > +
> >
> > [...]
diff mbox series

Patch

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 8843b7f99ed0..c4ee755ca2a1 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -16,6 +16,7 @@ 
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/timer.h>
+#include <linux/mutex.h>
 
 struct uprobe;
 struct vm_area_struct;
@@ -172,6 +173,13 @@  struct xol_area;
 
 struct uprobes_state {
 	struct xol_area		*xol_area;
+	struct hlist_head	tramp_head;
+};
+
+struct uprobe_trampoline {
+	struct hlist_node	node;
+	unsigned long		vaddr;
+	atomic64_t		ref;
 };
 
 extern void __init uprobes_init(void);
@@ -220,6 +228,10 @@  extern int arch_uprobe_verify_opcode(struct arch_uprobe *auprobe, struct page *p
 				     unsigned long vaddr, uprobe_opcode_t *new_opcode,
 				     int nbytes);
 extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int nbytes);
+extern struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr);
+extern void uprobe_trampoline_put(struct uprobe_trampoline *area);
+extern bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
+extern const struct vm_special_mapping *arch_uprobe_trampoline_mapping(void);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 8068f91de9e3..f57918c624da 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -615,6 +615,118 @@  set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v
 			(uprobe_opcode_t *)&auprobe->insn, UPROBE_SWBP_INSN_SIZE);
 }
 
+bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
+{
+	return false;
+}
+
+const struct vm_special_mapping * __weak arch_uprobe_trampoline_mapping(void)
+{
+	return NULL;
+}
+
+static unsigned long find_nearest_page(unsigned long vaddr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+	VMA_ITERATOR(vmi, mm, 0);
+
+	prev = vma_next(&vmi);
+	vma = vma_next(&vmi);
+	while (vma) {
+		if (vma->vm_start - prev->vm_end  >= PAGE_SIZE) {
+			if (arch_uprobe_is_callable(prev->vm_end, vaddr))
+				return prev->vm_end;
+			if (arch_uprobe_is_callable(vma->vm_start - PAGE_SIZE, vaddr))
+				return vma->vm_start - PAGE_SIZE;
+		}
+
+		prev = vma;
+		vma = vma_next(&vmi);
+	}
+
+	return 0;
+}
+
+static struct uprobe_trampoline *create_uprobe_trampoline(unsigned long vaddr)
+{
+	const struct vm_special_mapping *mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct uprobe_trampoline *tramp;
+
+	mapping = arch_uprobe_trampoline_mapping();
+	if (!mapping)
+		return NULL;
+
+	vaddr = find_nearest_page(vaddr);
+	if (!vaddr)
+		return NULL;
+
+	tramp = kzalloc(sizeof(*tramp), GFP_KERNEL);
+	if (unlikely(!tramp))
+		return NULL;
+
+	atomic64_set(&tramp->ref, 1);
+	tramp->vaddr = vaddr;
+
+	vma = _install_special_mapping(mm, tramp->vaddr, PAGE_SIZE,
+				VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
+				mapping);
+	if (IS_ERR(vma))
+		goto free_area;
+	return tramp;
+
+ free_area:
+	kfree(tramp);
+	return NULL;
+}
+
+struct uprobe_trampoline *uprobe_trampoline_get(unsigned long vaddr)
+{
+	struct uprobes_state *state = &current->mm->uprobes_state;
+	struct uprobe_trampoline *tramp = NULL;
+
+	hlist_for_each_entry(tramp, &state->tramp_head, node) {
+		if (arch_uprobe_is_callable(tramp->vaddr, vaddr)) {
+			atomic64_inc(&tramp->ref);
+			return tramp;
+		}
+	}
+
+	tramp = create_uprobe_trampoline(vaddr);
+	if (!tramp)
+		return NULL;
+
+	hlist_add_head(&tramp->node, &state->tramp_head);
+	return tramp;
+}
+
+static void destroy_uprobe_trampoline(struct uprobe_trampoline *tramp)
+{
+	hlist_del(&tramp->node);
+	kfree(tramp);
+}
+
+void uprobe_trampoline_put(struct uprobe_trampoline *tramp)
+{
+	if (tramp == NULL)
+		return;
+
+	if (atomic64_dec_and_test(&tramp->ref))
+		destroy_uprobe_trampoline(tramp);
+}
+
+static void clear_tramp_head(struct mm_struct *mm)
+{
+	struct uprobes_state *state = &mm->uprobes_state;
+	struct uprobe_trampoline *tramp;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(tramp, n, &state->tramp_head, node)
+		destroy_uprobe_trampoline(tramp);
+}
+
 /* uprobe should have guaranteed positive refcount */
 static struct uprobe *get_uprobe(struct uprobe *uprobe)
 {
@@ -1787,6 +1899,8 @@  void uprobe_clear_state(struct mm_struct *mm)
 	delayed_uprobe_remove(NULL, mm);
 	mutex_unlock(&delayed_uprobe_lock);
 
+	clear_tramp_head(mm);
+
 	if (!area)
 		return;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 1450b461d196..b734a172fd6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1254,6 +1254,7 @@  static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
 	mm->uprobes_state.xol_area = NULL;
+	INIT_HLIST_HEAD(&mm->uprobes_state.tramp_head);
 #endif
 }