Message ID | 20220407125224.310255-4-jolsa@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: Speed up symbol resolving in kprobe multi link | expand |
Context | Check | Description |
---|---|---|
bpf/vmtest-bpf-next-PR | fail | merge-conflict |
netdev/tree_selection | success | Clearly marked for bpf-next, async |
netdev/apply | fail | Patch does not apply to bpf-next |
On Thu, Apr 07, 2022 at 02:52:23PM +0200, Jiri Olsa wrote: > Using kallsyms_lookup_names function to speed up symbols lookup in > kprobe multi link attachment and replacing with it the current > kprobe_multi_resolve_syms function. > > This speeds up bpftrace kprobe attachment: > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > ... > 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) > > After: > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > ... > 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > --- > kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- > 1 file changed, 73 insertions(+), 50 deletions(-) > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index b26f3da943de..2602957225ba 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { > unsigned long entry_ip; > }; > > +struct user_syms { > + const char **syms; > + char *buf; > +}; > + > +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) > +{ > + const char __user **usyms_copy = NULL; > + const char **syms = NULL; > + char *buf = NULL, *p; > + int err = -EFAULT; > + unsigned int i; > + size_t size; > + > + size = cnt * sizeof(*usyms_copy); > + > + usyms_copy = kvmalloc(size, GFP_KERNEL); > + if (!usyms_copy) > + return -ENOMEM; > + > + if (copy_from_user(usyms_copy, usyms, size)) > + goto error; > + > + err = -ENOMEM; > + syms = kvmalloc(size, GFP_KERNEL); > + if (!syms) > + goto error; > + > + /* TODO this potentially allocates lot of memory (~6MB in my tests > + * with attaching ~40k functions). I haven't seen this to fail yet, > + * but it could be changed to allocate memory gradually if needed. > + */ Why would 6MB kvmalloc fail? If we don't have such memory the kernel will be ooming soon anyway. I don't think we'll see this kvmalloc triggering oom in practice. The verifier allocates a lot more memory to check large programs. imo this approach is fine. It's simple. Trying to do gradual alloc with realloc would be just guessing. Another option would be to ask user space (libbpf) to do the sort. There are pros and cons. This vmalloc+sort is slightly better imo. > + size = cnt * KSYM_NAME_LEN; > + buf = kvmalloc(size, GFP_KERNEL); > + if (!buf) > + goto error; > + > + for (p = buf, i = 0; i < cnt; i++) { > + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN); > + if (err == KSYM_NAME_LEN) > + err = -E2BIG; > + if (err < 0) > + goto error; > + syms[i] = p; > + p += err + 1; > + } > + > + err = 0; > + us->syms = syms; > + us->buf = buf; > + > +error: > + kvfree(usyms_copy); > + if (err) { > + kvfree(syms); > + kvfree(buf); > + } > + return err; > +} > + > +static void free_user_syms(struct user_syms *us) > +{ > + kvfree(us->syms); > + kvfree(us->buf); > +} > + > static void bpf_kprobe_multi_link_release(struct bpf_link *link) > { > struct bpf_kprobe_multi_link *kmulti_link; > @@ -2346,55 +2412,6 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, > kprobe_multi_link_prog_run(link, entry_ip, regs); > } > > -static int > -kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt, > - unsigned long *addrs) > -{ > - unsigned long addr, size; > - const char __user **syms; > - int err = -ENOMEM; > - unsigned int i; > - char *func; > - > - size = cnt * sizeof(*syms); > - syms = kvzalloc(size, GFP_KERNEL); > - if (!syms) > - return -ENOMEM; > - > - func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); > - if (!func) > - goto error; > - > - if (copy_from_user(syms, usyms, size)) { > - err = -EFAULT; > - goto error; > - } > - > - for (i = 0; i < cnt; i++) { > - err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN); > - if (err == KSYM_NAME_LEN) > - err = -E2BIG; > - if (err < 0) > - goto error; > - err = -EINVAL; > - addr = kallsyms_lookup_name(func); > - if (!addr) > - goto error; > - if (!kallsyms_lookup_size_offset(addr, &size, NULL)) > - goto error; > - addr = ftrace_location_range(addr, addr + size - 1); > - if (!addr) > - goto error; > - addrs[i] = addr; > - } > - > - err = 0; > -error: > - kvfree(syms); > - kfree(func); > - return err; > -} > - > int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) > { > struct bpf_kprobe_multi_link *link = NULL; > @@ -2438,7 +2455,13 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr > goto error; > } > } else { > - err = kprobe_multi_resolve_syms(usyms, cnt, addrs); > + struct user_syms us; > + > + err = copy_user_syms(&us, usyms, cnt); > + if (err) > + goto error; > + err = kallsyms_lookup_names(us.syms, cnt, addrs); > + free_user_syms(&us); > if (err) > goto error; > } > -- > 2.35.1 >
On Fri, Apr 08, 2022 at 04:26:10PM -0700, Alexei Starovoitov wrote: > On Thu, Apr 07, 2022 at 02:52:23PM +0200, Jiri Olsa wrote: > > Using kallsyms_lookup_names function to speed up symbols lookup in > > kprobe multi link attachment and replacing with it the current > > kprobe_multi_resolve_syms function. > > > > This speeds up bpftrace kprobe attachment: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) > > > > After: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > --- > > kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- > > 1 file changed, 73 insertions(+), 50 deletions(-) > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index b26f3da943de..2602957225ba 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { > > unsigned long entry_ip; > > }; > > > > +struct user_syms { > > + const char **syms; > > + char *buf; > > +}; > > + > > +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) > > +{ > > + const char __user **usyms_copy = NULL; > > + const char **syms = NULL; > > + char *buf = NULL, *p; > > + int err = -EFAULT; > > + unsigned int i; > > + size_t size; > > + > > + size = cnt * sizeof(*usyms_copy); > > + > > + usyms_copy = kvmalloc(size, GFP_KERNEL); > > + if (!usyms_copy) > > + return -ENOMEM; > > + > > + if (copy_from_user(usyms_copy, usyms, size)) > > + goto error; > > + > > + err = -ENOMEM; > > + syms = kvmalloc(size, GFP_KERNEL); > > + if (!syms) > > + goto error; > > + > > + /* TODO this potentially allocates lot of memory (~6MB in my tests > > + * with attaching ~40k functions). I haven't seen this to fail yet, > > + * but it could be changed to allocate memory gradually if needed. > > + */ > > Why would 6MB kvmalloc fail? > If we don't have such memory the kernel will be ooming soon anyway. > I don't think we'll see this kvmalloc triggering oom in practice. > The verifier allocates a lot more memory to check large programs. > > imo this approach is fine. It's simple. > Trying to do gradual alloc with realloc would be just guessing. > > Another option would be to ask user space (libbpf) to do the sort. > There are pros and cons. > This vmalloc+sort is slightly better imo. ok, makes sense, will keep it jirka
On Thu, Apr 7, 2022 at 5:53 AM Jiri Olsa <jolsa@kernel.org> wrote: > > Using kallsyms_lookup_names function to speed up symbols lookup in > kprobe multi link attachment and replacing with it the current > kprobe_multi_resolve_syms function. > > This speeds up bpftrace kprobe attachment: > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > ... > 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) > > After: > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > ... > 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > --- > kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- > 1 file changed, 73 insertions(+), 50 deletions(-) > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index b26f3da943de..2602957225ba 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { > unsigned long entry_ip; > }; > > +struct user_syms { > + const char **syms; > + char *buf; > +}; > + > +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) > +{ > + const char __user **usyms_copy = NULL; > + const char **syms = NULL; > + char *buf = NULL, *p; > + int err = -EFAULT; > + unsigned int i; > + size_t size; > + > + size = cnt * sizeof(*usyms_copy); > + > + usyms_copy = kvmalloc(size, GFP_KERNEL); > + if (!usyms_copy) > + return -ENOMEM; do you really need usyms_copy? why not just read one pointer at a time? > + > + if (copy_from_user(usyms_copy, usyms, size)) > + goto error; > + > + err = -ENOMEM; > + syms = kvmalloc(size, GFP_KERNEL); > + if (!syms) > + goto error; > + > + /* TODO this potentially allocates lot of memory (~6MB in my tests > + * with attaching ~40k functions). I haven't seen this to fail yet, > + * but it could be changed to allocate memory gradually if needed. > + */ > + size = cnt * KSYM_NAME_LEN; this reassignment of size is making it hard to follow the code, you can just do cnt * KSYM_NAME_LEN inside kvmalloc, you don't ever use it anywhere else > + buf = kvmalloc(size, GFP_KERNEL); > + if (!buf) > + goto error; > + > + for (p = buf, i = 0; i < cnt; i++) { like here, before doing strncpy_from_user() you can read usyms[i] from user-space into temporary variable, no need for extra kvmalloc? > + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN); > + if (err == KSYM_NAME_LEN) > + err = -E2BIG; > + if (err < 0) > + goto error; > + syms[i] = p; > + p += err + 1; > + } > + > + err = 0; > + us->syms = syms; > + us->buf = buf; > + [...]
On Fri, Apr 8, 2022 at 4:26 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Thu, Apr 07, 2022 at 02:52:23PM +0200, Jiri Olsa wrote: > > Using kallsyms_lookup_names function to speed up symbols lookup in > > kprobe multi link attachment and replacing with it the current > > kprobe_multi_resolve_syms function. > > > > This speeds up bpftrace kprobe attachment: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) > > > > After: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > --- > > kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- > > 1 file changed, 73 insertions(+), 50 deletions(-) > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index b26f3da943de..2602957225ba 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { > > unsigned long entry_ip; > > }; > > > > +struct user_syms { > > + const char **syms; > > + char *buf; > > +}; > > + > > +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) > > +{ > > + const char __user **usyms_copy = NULL; > > + const char **syms = NULL; > > + char *buf = NULL, *p; > > + int err = -EFAULT; > > + unsigned int i; > > + size_t size; > > + > > + size = cnt * sizeof(*usyms_copy); > > + > > + usyms_copy = kvmalloc(size, GFP_KERNEL); > > + if (!usyms_copy) > > + return -ENOMEM; > > + > > + if (copy_from_user(usyms_copy, usyms, size)) > > + goto error; > > + > > + err = -ENOMEM; > > + syms = kvmalloc(size, GFP_KERNEL); > > + if (!syms) > > + goto error; > > + > > + /* TODO this potentially allocates lot of memory (~6MB in my tests > > + * with attaching ~40k functions). I haven't seen this to fail yet, > > + * but it could be changed to allocate memory gradually if needed. > > + */ > > Why would 6MB kvmalloc fail? > If we don't have such memory the kernel will be ooming soon anyway. > I don't think we'll see this kvmalloc triggering oom in practice. > The verifier allocates a lot more memory to check large programs. > > imo this approach is fine. It's simple. > Trying to do gradual alloc with realloc would be just guessing. > > Another option would be to ask user space (libbpf) to do the sort. > There are pros and cons. > This vmalloc+sort is slightly better imo. Totally agree, the simpler the better. Also when you are attaching to tens of thousands of probes, 6MB isn't a lot of memory for whatever you are trying to do, anyways :) Even if libbpf had to sort it, kernel would probably have to validate that. Also for binary search you'd still need to read in the string itself, but if you'd do this "on demand", we are adding TOCTOU and other headaches. Simple is good. > > > + size = cnt * KSYM_NAME_LEN; > > + buf = kvmalloc(size, GFP_KERNEL); > > + if (!buf) > > + goto error; > > + > > + for (p = buf, i = 0; i < cnt; i++) { > > + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN); > > + if (err == KSYM_NAME_LEN) > > + err = -E2BIG; > > + if (err < 0) > > + goto error; [...]
On Mon, Apr 11, 2022 at 03:15:32PM -0700, Andrii Nakryiko wrote: > On Thu, Apr 7, 2022 at 5:53 AM Jiri Olsa <jolsa@kernel.org> wrote: > > > > Using kallsyms_lookup_names function to speed up symbols lookup in > > kprobe multi link attachment and replacing with it the current > > kprobe_multi_resolve_syms function. > > > > This speeds up bpftrace kprobe attachment: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) > > > > After: > > > > # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' > > ... > > 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org> > > --- > > kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- > > 1 file changed, 73 insertions(+), 50 deletions(-) > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index b26f3da943de..2602957225ba 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { > > unsigned long entry_ip; > > }; > > > > +struct user_syms { > > + const char **syms; > > + char *buf; > > +}; > > + > > +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) > > +{ > > + const char __user **usyms_copy = NULL; > > + const char **syms = NULL; > > + char *buf = NULL, *p; > > + int err = -EFAULT; > > + unsigned int i; > > + size_t size; > > + > > + size = cnt * sizeof(*usyms_copy); > > + > > + usyms_copy = kvmalloc(size, GFP_KERNEL); > > + if (!usyms_copy) > > + return -ENOMEM; > > do you really need usyms_copy? why not just read one pointer at a time? > > > + > > + if (copy_from_user(usyms_copy, usyms, size)) > > + goto error; > > + > > + err = -ENOMEM; > > + syms = kvmalloc(size, GFP_KERNEL); > > + if (!syms) > > + goto error; > > + > > + /* TODO this potentially allocates lot of memory (~6MB in my tests > > + * with attaching ~40k functions). I haven't seen this to fail yet, > > + * but it could be changed to allocate memory gradually if needed. > > + */ > > + size = cnt * KSYM_NAME_LEN; > > this reassignment of size is making it hard to follow the code, you > can just do cnt * KSYM_NAME_LEN inside kvmalloc, you don't ever use it > anywhere else ok > > > + buf = kvmalloc(size, GFP_KERNEL); > > + if (!buf) > > + goto error; > > + > > + for (p = buf, i = 0; i < cnt; i++) { > > like here, before doing strncpy_from_user() you can read usyms[i] from > user-space into temporary variable, no need for extra kvmalloc? yes, that could work.. one copy_from_user seemed faster than separate get_user calls, but then it's without memory allocation.. so perhaps that's better jirka > > > + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN); > > + if (err == KSYM_NAME_LEN) > > + err = -E2BIG; > > + if (err < 0) > > + goto error; > > + syms[i] = p; > > + p += err + 1; > > + } > > + > > + err = 0; > > + us->syms = syms; > > + us->buf = buf; > > + > > [...]
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b26f3da943de..2602957225ba 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2226,6 +2226,72 @@ struct bpf_kprobe_multi_run_ctx { unsigned long entry_ip; }; +struct user_syms { + const char **syms; + char *buf; +}; + +static int copy_user_syms(struct user_syms *us, void __user *usyms, u32 cnt) +{ + const char __user **usyms_copy = NULL; + const char **syms = NULL; + char *buf = NULL, *p; + int err = -EFAULT; + unsigned int i; + size_t size; + + size = cnt * sizeof(*usyms_copy); + + usyms_copy = kvmalloc(size, GFP_KERNEL); + if (!usyms_copy) + return -ENOMEM; + + if (copy_from_user(usyms_copy, usyms, size)) + goto error; + + err = -ENOMEM; + syms = kvmalloc(size, GFP_KERNEL); + if (!syms) + goto error; + + /* TODO this potentially allocates lot of memory (~6MB in my tests + * with attaching ~40k functions). I haven't seen this to fail yet, + * but it could be changed to allocate memory gradually if needed. + */ + size = cnt * KSYM_NAME_LEN; + buf = kvmalloc(size, GFP_KERNEL); + if (!buf) + goto error; + + for (p = buf, i = 0; i < cnt; i++) { + err = strncpy_from_user(p, usyms_copy[i], KSYM_NAME_LEN); + if (err == KSYM_NAME_LEN) + err = -E2BIG; + if (err < 0) + goto error; + syms[i] = p; + p += err + 1; + } + + err = 0; + us->syms = syms; + us->buf = buf; + +error: + kvfree(usyms_copy); + if (err) { + kvfree(syms); + kvfree(buf); + } + return err; +} + +static void free_user_syms(struct user_syms *us) +{ + kvfree(us->syms); + kvfree(us->buf); +} + static void bpf_kprobe_multi_link_release(struct bpf_link *link) { struct bpf_kprobe_multi_link *kmulti_link; @@ -2346,55 +2412,6 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, kprobe_multi_link_prog_run(link, entry_ip, regs); } -static int -kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt, - unsigned long *addrs) -{ - unsigned long addr, size; - const char __user **syms; - int err = -ENOMEM; - unsigned int i; - char *func; - - size = cnt * sizeof(*syms); - syms = kvzalloc(size, GFP_KERNEL); - if (!syms) - return -ENOMEM; - - func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); - if (!func) - goto error; - - if (copy_from_user(syms, usyms, size)) { - err = -EFAULT; - goto error; - } - - for (i = 0; i < cnt; i++) { - err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN); - if (err == KSYM_NAME_LEN) - err = -E2BIG; - if (err < 0) - goto error; - err = -EINVAL; - addr = kallsyms_lookup_name(func); - if (!addr) - goto error; - if (!kallsyms_lookup_size_offset(addr, &size, NULL)) - goto error; - addr = ftrace_location_range(addr, addr + size - 1); - if (!addr) - goto error; - addrs[i] = addr; - } - - err = 0; -error: - kvfree(syms); - kfree(func); - return err; -} - int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2438,7 +2455,13 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } } else { - err = kprobe_multi_resolve_syms(usyms, cnt, addrs); + struct user_syms us; + + err = copy_user_syms(&us, usyms, cnt); + if (err) + goto error; + err = kallsyms_lookup_names(us.syms, cnt, addrs); + free_user_syms(&us); if (err) goto error; }
Using kallsyms_lookup_names function to speed up symbols lookup in kprobe multi link attachment and replacing with it the current kprobe_multi_resolve_syms function. This speeds up bpftrace kprobe attachment: # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' ... 6.5681 +- 0.0225 seconds time elapsed ( +- 0.34% ) After: # perf stat -r 5 -e cycles ./src/bpftrace -e 'kprobe:x* { } i:ms:1 { exit(); }' ... 0.5661 +- 0.0275 seconds time elapsed ( +- 4.85% ) Signed-off-by: Jiri Olsa <jolsa@kernel.org> --- kernel/trace/bpf_trace.c | 123 +++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 50 deletions(-)