diff mbox series

[PATCHv2,bpf-next,3/9] bpf: Add missed value to kprobe perf link info

Message ID 20230907071311.254313-4-jolsa@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Add missed stats for kprobes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4263 this patch: 4263
netdev/cc_maintainers warning 7 maintainers not CCed: rostedt@goodmis.org martin.lau@linux.dev kpsingh@kernel.org yonghong.song@linux.dev mhiramat@kernel.org linux-trace-kernel@vger.kernel.org song@kernel.org
netdev/build_clang success Errors and warnings before: 1688 this patch: 1688
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4437 this patch: 4437
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 2 this patch: 2
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on s390x with gcc

Commit Message

Jiri Olsa Sept. 7, 2023, 7:13 a.m. UTC
Add missed value to kprobe attached through perf link info to
hold the stats of missed kprobe handler execution.

The kprobe's missed counter gets incremented when kprobe handler
is not executed due to another kprobe running on the same cpu.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/linux/trace_events.h   |  6 ++++--
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/syscall.c           | 14 ++++++++------
 kernel/trace/bpf_trace.c       |  5 +++--
 kernel/trace/trace_kprobe.c    | 14 +++++++++++---
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 28 insertions(+), 13 deletions(-)

Comments

Song Liu Sept. 7, 2023, 6:40 p.m. UTC | #1
On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Add missed value to kprobe attached through perf link info to
> hold the stats of missed kprobe handler execution.
>
> The kprobe's missed counter gets incremented when kprobe handler
> is not executed due to another kprobe running on the same cpu.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>

[...]

The code looks good to me. But I have two thoughts on this (and 2/9).

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e5216420ec73..e824b0c50425 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6546,6 +6546,7 @@ struct bpf_link_info {
>                                         __u32 name_len;
>                                         __u32 offset; /* offset from func_name */
>                                         __u64 addr;
> +                                       __u64 missed;
>                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
>                                 struct {
>                                         __aligned_u64 tp_name;   /* in/out */

1) Shall we add missed for all bpf_link_info? Something like:

diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
index 5a39c7a13499..cf0b8b2a8b39 100644
--- i/include/uapi/linux/bpf.h
+++ w/include/uapi/linux/bpf.h
@@ -6465,6 +6465,7 @@ struct bpf_link_info {
        __u32 type;
        __u32 id;
        __u32 prog_id;
+       __u64 missed;
        union {
                struct {
                        __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */

2) "missed" doesn't seem to fit well with other information in
struct bpf_link_info. Other information there are more like stable-ish
information; while missed is a stat that changes over time. Given we
have prog_id in bpf_link_info, do we really need "missed" here?

Thanks,
Song


[...]
Jiri Olsa Sept. 8, 2023, 11:43 a.m. UTC | #2
On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > Add missed value to kprobe attached through perf link info to
> > hold the stats of missed kprobe handler execution.
> >
> > The kprobe's missed counter gets incremented when kprobe handler
> > is not executed due to another kprobe running on the same cpu.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> 
> [...]
> 
> The code looks good to me. But I have two thoughts on this (and 2/9).
> 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index e5216420ec73..e824b0c50425 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> >                                         __u32 name_len;
> >                                         __u32 offset; /* offset from func_name */
> >                                         __u64 addr;
> > +                                       __u64 missed;
> >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> >                                 struct {
> >                                         __aligned_u64 tp_name;   /* in/out */
> 
> 1) Shall we add missed for all bpf_link_info? Something like:
> 
> diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> index 5a39c7a13499..cf0b8b2a8b39 100644
> --- i/include/uapi/linux/bpf.h
> +++ w/include/uapi/linux/bpf.h
> @@ -6465,6 +6465,7 @@ struct bpf_link_info {
>         __u32 type;
>         __u32 id;
>         __u32 prog_id;
> +       __u64 missed;
>         union {
>                 struct {
>                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */

hm, there's lot of links under bpf_link_info, can't really tell if
all could gather 'missed' data.. like I don't think we have any for
standard perf event or perf tracepoint

> 
> 2) "missed" doesn't seem to fit well with other information in
> struct bpf_link_info. Other information there are more like stable-ish
> information; while missed is a stat that changes over time. Given we
> have prog_id in bpf_link_info, do we really need "missed" here?

right, OTOH there's recursion_misses/run_time_ns/run_cnt in bpf_prog_info

the bpf link has access to its attach layer, like perf event for kprobe
in perf_link or fprobe for kprobe_multi link... so it's convenient to
reach out from link for these stats and make them available through
bpf_link_info

also there's no other way to get these data for some links

like we could perhaps add some perf event specific interface to retrieve
these stats for kprobes, because we have access to the perf event in user
space, but that's not the case for kprobe_multi link, because there's no
other way to reach the fprobe object

jirka
Song Liu Sept. 8, 2023, 4:49 p.m. UTC | #3
On Fri, Sep 8, 2023 at 4:44 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> > On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > >
> > > Add missed value to kprobe attached through perf link info to
> > > hold the stats of missed kprobe handler execution.
> > >
> > > The kprobe's missed counter gets incremented when kprobe handler
> > > is not executed due to another kprobe running on the same cpu.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> >
> > [...]
> >
> > The code looks good to me. But I have two thoughts on this (and 2/9).
> >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index e5216420ec73..e824b0c50425 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> > >                                         __u32 name_len;
> > >                                         __u32 offset; /* offset from func_name */
> > >                                         __u64 addr;
> > > +                                       __u64 missed;
> > >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> > >                                 struct {
> > >                                         __aligned_u64 tp_name;   /* in/out */
> >
> > 1) Shall we add missed for all bpf_link_info? Something like:
> >
> > diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> > index 5a39c7a13499..cf0b8b2a8b39 100644
> > --- i/include/uapi/linux/bpf.h
> > +++ w/include/uapi/linux/bpf.h
> > @@ -6465,6 +6465,7 @@ struct bpf_link_info {
> >         __u32 type;
> >         __u32 id;
> >         __u32 prog_id;
> > +       __u64 missed;
> >         union {
> >                 struct {
> >                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
>
> hm, there's lot of links under bpf_link_info, can't really tell if
> all could gather 'missed' data.. like I don't think we have any for
> standard perf event or perf tracepoint

I thought about the same thing, but didn't get to a conclusion. So
I brought it up for more discussions. Personally, I don't have a
strong preference either way.

>
> >
> > 2) "missed" doesn't seem to fit well with other information in
> > struct bpf_link_info. Other information there are more like stable-ish
> > information; while missed is a stat that changes over time. Given we
> > have prog_id in bpf_link_info, do we really need "missed" here?
>
> right, OTOH there's recursion_misses/run_time_ns/run_cnt in bpf_prog_info

Agreed. bpf_prog_info also contains stats of the program.

> the bpf link has access to its attach layer, like perf event for kprobe
> in perf_link or fprobe for kprobe_multi link... so it's convenient to
> reach out from link for these stats and make them available through
> bpf_link_info
>
> also there's no other way to get these data for some links
>
> like we could perhaps add some perf event specific interface to retrieve
> these stats for kprobes, because we have access to the perf event in user
> space, but that's not the case for kprobe_multi link, because there's no
> other way to reach the fprobe object

Fair enough. I guess this is a good stat to have for the bpf link.

More question about kprobe_multi: Shall we (or can we) collect "missed" for each
individual function we attach to?

Thanks,
Song
Andrii Nakryiko Sept. 8, 2023, 11:22 p.m. UTC | #4
On Fri, Sep 8, 2023 at 4:44 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> > On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > >
> > > Add missed value to kprobe attached through perf link info to
> > > hold the stats of missed kprobe handler execution.
> > >
> > > The kprobe's missed counter gets incremented when kprobe handler
> > > is not executed due to another kprobe running on the same cpu.
> > >
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> >
> > [...]
> >
> > The code looks good to me. But I have two thoughts on this (and 2/9).
> >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index e5216420ec73..e824b0c50425 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> > >                                         __u32 name_len;
> > >                                         __u32 offset; /* offset from func_name */
> > >                                         __u64 addr;
> > > +                                       __u64 missed;
> > >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> > >                                 struct {
> > >                                         __aligned_u64 tp_name;   /* in/out */
> >
> > 1) Shall we add missed for all bpf_link_info? Something like:
> >
> > diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> > index 5a39c7a13499..cf0b8b2a8b39 100644
> > --- i/include/uapi/linux/bpf.h
> > +++ w/include/uapi/linux/bpf.h
> > @@ -6465,6 +6465,7 @@ struct bpf_link_info {
> >         __u32 type;
> >         __u32 id;
> >         __u32 prog_id;
> > +       __u64 missed;
> >         union {
> >                 struct {
> >                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
>
> hm, there's lot of links under bpf_link_info, can't really tell if
> all could gather 'missed' data.. like I don't think we have any for
> standard perf event or perf tracepoint

even if missed for all link types would make sense, we can't add any
field before union, this would be a breaking change

>
> >
> > 2) "missed" doesn't seem to fit well with other information in
> > struct bpf_link_info. Other information there are more like stable-ish
> > information; while missed is a stat that changes over time. Given we
> > have prog_id in bpf_link_info, do we really need "missed" here?
>
> right, OTOH there's recursion_misses/run_time_ns/run_cnt in bpf_prog_info
>
> the bpf link has access to its attach layer, like perf event for kprobe
> in perf_link or fprobe for kprobe_multi link... so it's convenient to
> reach out from link for these stats and make them available through
> bpf_link_info

but what's confusing to me is that missed counter is per-program (at
least in your patch #1), but you report it on  a link. But the same
BPF program can be attached multiple times through independent links.
So each link will report a shared misses count, which is quite
confusing.

Have you thought about counting misses per link instead of per
program? Is it possible?

>
> also there's no other way to get these data for some links
>
> like we could perhaps add some perf event specific interface to retrieve
> these stats for kprobes, because we have access to the perf event in user
> space, but that's not the case for kprobe_multi link, because there's no
> other way to reach the fprobe object
>
> jirka
>
Song Liu Sept. 8, 2023, 11:32 p.m. UTC | #5
On Fri, Sep 8, 2023 at 4:22 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Sep 8, 2023 at 4:44 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> > > On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > > >
> > > > Add missed value to kprobe attached through perf link info to
> > > > hold the stats of missed kprobe handler execution.
> > > >
> > > > The kprobe's missed counter gets incremented when kprobe handler
> > > > is not executed due to another kprobe running on the same cpu.
> > > >
> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > >
> > > [...]
> > >
> > > The code looks good to me. But I have two thoughts on this (and 2/9).
> > >
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index e5216420ec73..e824b0c50425 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> > > >                                         __u32 name_len;
> > > >                                         __u32 offset; /* offset from func_name */
> > > >                                         __u64 addr;
> > > > +                                       __u64 missed;
> > > >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> > > >                                 struct {
> > > >                                         __aligned_u64 tp_name;   /* in/out */
> > >
> > > 1) Shall we add missed for all bpf_link_info? Something like:
> > >
> > > diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> > > index 5a39c7a13499..cf0b8b2a8b39 100644
> > > --- i/include/uapi/linux/bpf.h
> > > +++ w/include/uapi/linux/bpf.h
> > > @@ -6465,6 +6465,7 @@ struct bpf_link_info {
> > >         __u32 type;
> > >         __u32 id;
> > >         __u32 prog_id;
> > > +       __u64 missed;
> > >         union {
> > >                 struct {
> > >                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
> >
> > hm, there's lot of links under bpf_link_info, can't really tell if
> > all could gather 'missed' data.. like I don't think we have any for
> > standard perf event or perf tracepoint
>
> even if missed for all link types would make sense, we can't add any
> field before union, this would be a breaking change

Right...

It is also tricky to add it to the union, right? We cannot tell whether
the kernel supports missed stats based on sizeof(struct bpf_link_info).
I guess this is also problematic?

Thanks,
Song

[...]
Andrii Nakryiko Sept. 8, 2023, 11:44 p.m. UTC | #6
On Fri, Sep 8, 2023 at 4:33 PM Song Liu <song@kernel.org> wrote:
>
> On Fri, Sep 8, 2023 at 4:22 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Sep 8, 2023 at 4:44 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> > >
> > > On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> > > > On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > > > >
> > > > > Add missed value to kprobe attached through perf link info to
> > > > > hold the stats of missed kprobe handler execution.
> > > > >
> > > > > The kprobe's missed counter gets incremented when kprobe handler
> > > > > is not executed due to another kprobe running on the same cpu.
> > > > >
> > > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > >
> > > > [...]
> > > >
> > > > The code looks good to me. But I have two thoughts on this (and 2/9).
> > > >
> > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > index e5216420ec73..e824b0c50425 100644
> > > > > --- a/include/uapi/linux/bpf.h
> > > > > +++ b/include/uapi/linux/bpf.h
> > > > > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> > > > >                                         __u32 name_len;
> > > > >                                         __u32 offset; /* offset from func_name */
> > > > >                                         __u64 addr;
> > > > > +                                       __u64 missed;
> > > > >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> > > > >                                 struct {
> > > > >                                         __aligned_u64 tp_name;   /* in/out */
> > > >
> > > > 1) Shall we add missed for all bpf_link_info? Something like:
> > > >
> > > > diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> > > > index 5a39c7a13499..cf0b8b2a8b39 100644
> > > > --- i/include/uapi/linux/bpf.h
> > > > +++ w/include/uapi/linux/bpf.h
> > > > @@ -6465,6 +6465,7 @@ struct bpf_link_info {
> > > >         __u32 type;
> > > >         __u32 id;
> > > >         __u32 prog_id;
> > > > +       __u64 missed;
> > > >         union {
> > > >                 struct {
> > > >                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
> > >
> > > hm, there's lot of links under bpf_link_info, can't really tell if
> > > all could gather 'missed' data.. like I don't think we have any for
> > > standard perf event or perf tracepoint
> >
> > even if missed for all link types would make sense, we can't add any
> > field before union, this would be a breaking change
>
> Right...
>
> It is also tricky to add it to the union, right? We cannot tell whether
> the kernel supports missed stats based on sizeof(struct bpf_link_info).
> I guess this is also problematic?

right, just checking size won't be reliable (it would be if missed is
added to largest substruct of a union). If it's important to know if
kernel reports missed, one would need to do a more proper feature
detection


>
> Thanks,
> Song
>
> [...]
Jiri Olsa Sept. 10, 2023, 6:54 p.m. UTC | #7
On Fri, Sep 08, 2023 at 04:22:05PM -0700, Andrii Nakryiko wrote:
> On Fri, Sep 8, 2023 at 4:44 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> >
> > On Thu, Sep 07, 2023 at 11:40:46AM -0700, Song Liu wrote:
> > > On Thu, Sep 7, 2023 at 12:13 AM Jiri Olsa <jolsa@kernel.org> wrote:
> > > >
> > > > Add missed value to kprobe attached through perf link info to
> > > > hold the stats of missed kprobe handler execution.
> > > >
> > > > The kprobe's missed counter gets incremented when kprobe handler
> > > > is not executed due to another kprobe running on the same cpu.
> > > >
> > > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > >
> > > [...]
> > >
> > > The code looks good to me. But I have two thoughts on this (and 2/9).
> > >
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index e5216420ec73..e824b0c50425 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -6546,6 +6546,7 @@ struct bpf_link_info {
> > > >                                         __u32 name_len;
> > > >                                         __u32 offset; /* offset from func_name */
> > > >                                         __u64 addr;
> > > > +                                       __u64 missed;
> > > >                                 } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
> > > >                                 struct {
> > > >                                         __aligned_u64 tp_name;   /* in/out */
> > >
> > > 1) Shall we add missed for all bpf_link_info? Something like:
> > >
> > > diff --git i/include/uapi/linux/bpf.h w/include/uapi/linux/bpf.h
> > > index 5a39c7a13499..cf0b8b2a8b39 100644
> > > --- i/include/uapi/linux/bpf.h
> > > +++ w/include/uapi/linux/bpf.h
> > > @@ -6465,6 +6465,7 @@ struct bpf_link_info {
> > >         __u32 type;
> > >         __u32 id;
> > >         __u32 prog_id;
> > > +       __u64 missed;
> > >         union {
> > >                 struct {
> > >                         __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
> >
> > hm, there's lot of links under bpf_link_info, can't really tell if
> > all could gather 'missed' data.. like I don't think we have any for
> > standard perf event or perf tracepoint
> 
> even if missed for all link types would make sense, we can't add any
> field before union, this would be a breaking change
> 
> >
> > >
> > > 2) "missed" doesn't seem to fit well with other information in
> > > struct bpf_link_info. Other information there are more like stable-ish
> > > information; while missed is a stat that changes over time. Given we
> > > have prog_id in bpf_link_info, do we really need "missed" here?
> >
> > right, OTOH there's recursion_misses/run_time_ns/run_cnt in bpf_prog_info
> >
> > the bpf link has access to its attach layer, like perf event for kprobe
> > in perf_link or fprobe for kprobe_multi link... so it's convenient to
> > reach out from link for these stats and make them available through
> > bpf_link_info
> 
> but what's confusing to me is that missed counter is per-program (at
> least in your patch #1), but you report it on  a link. But the same
> BPF program can be attached multiple times through independent links.
> So each link will report a shared misses count, which is quite
> confusing.
> 
> Have you thought about counting misses per link instead of per
> program? Is it possible?

I think recursion_misses makes sense for both program and link

currently we have recursion_misses per program which I think is
still useful even if the program is attached to multiple links

if the program is attached to multiple links it'd be useful to
have perf link stat as well

it is definitely possible for kprobe_multi link, which IIUC might
not be the main user here, because you can already attach program
to multiple functions

I guess perf_link would benefit more from this stats, but it
looks bit harder to add.. we'd need to add link pointer to
bpf_prog_array_item and add some extra logic, will check

jirka
Jiri Olsa Sept. 10, 2023, 6:54 p.m. UTC | #8
On Fri, Sep 08, 2023 at 09:49:17AM -0700, Song Liu wrote:

SNIP

> > the bpf link has access to its attach layer, like perf event for kprobe
> > in perf_link or fprobe for kprobe_multi link... so it's convenient to
> > reach out from link for these stats and make them available through
> > bpf_link_info
> >
> > also there's no other way to get these data for some links
> >
> > like we could perhaps add some perf event specific interface to retrieve
> > these stats for kprobes, because we have access to the perf event in user
> > space, but that's not the case for kprobe_multi link, because there's no
> > other way to reach the fprobe object
> 
> Fair enough. I guess this is a good stat to have for the bpf link.
> 
> More question about kprobe_multi: Shall we (or can we) collect "missed" for each
> individual function we attach to?

I think it's possible, but we'd need to keep/lookup stats for each
function/addr same way we do for cookies ... so it'd mean bsearch
on each missed path.. which might be not that bad, considering it's
error path instead of executing bpf program.. also it can be optional

jirka
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index c1a0a19d80fb..8e12c63a9db7 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -761,7 +761,8 @@  struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 			    u32 *fd_type, const char **buf,
-			    u64 *probe_offset, u64 *probe_addr);
+			    u64 *probe_offset, u64 *probe_addr,
+			    unsigned long *missed);
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 #else
@@ -801,7 +802,7 @@  static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
 static inline int bpf_get_perf_event_info(const struct perf_event *event,
 					  u32 *prog_id, u32 *fd_type,
 					  const char **buf, u64 *probe_offset,
-					  u64 *probe_addr)
+					  u64 *probe_addr, unsigned long *missed)
 {
 	return -EOPNOTSUPP;
 }
@@ -876,6 +877,7 @@  extern void perf_kprobe_destroy(struct perf_event *event);
 extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **symbol,
 			       u64 *probe_offset, u64 *probe_addr,
+			       unsigned long *missed,
 			       bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e5216420ec73..e824b0c50425 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6546,6 +6546,7 @@  struct bpf_link_info {
 					__u32 name_len;
 					__u32 offset; /* offset from func_name */
 					__u64 addr;
+					__u64 missed;
 				} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
 				struct {
 					__aligned_u64 tp_name;   /* in/out */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e25e7e059d7d..0c0c93c87927 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3374,7 +3374,7 @@  static void bpf_perf_link_dealloc(struct bpf_link *link)
 static int bpf_perf_link_fill_common(const struct perf_event *event,
 				     char __user *uname, u32 ulen,
 				     u64 *probe_offset, u64 *probe_addr,
-				     u32 *fd_type)
+				     u32 *fd_type, unsigned long *missed)
 {
 	const char *buf;
 	u32 prog_id;
@@ -3385,7 +3385,7 @@  static int bpf_perf_link_fill_common(const struct perf_event *event,
 		return -EINVAL;
 
 	err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
-				      probe_offset, probe_addr);
+				      probe_offset, probe_addr, missed);
 	if (err)
 		return err;
 	if (!uname)
@@ -3408,6 +3408,7 @@  static int bpf_perf_link_fill_common(const struct perf_event *event,
 static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 				     struct bpf_link_info *info)
 {
+	unsigned long missed;
 	char __user *uname;
 	u64 addr, offset;
 	u32 ulen, type;
@@ -3416,7 +3417,7 @@  static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.kprobe.func_name);
 	ulen = info->perf_event.kprobe.name_len;
 	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
-					&type);
+					&type, &missed);
 	if (err)
 		return err;
 	if (type == BPF_FD_TYPE_KRETPROBE)
@@ -3425,6 +3426,7 @@  static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
 		info->perf_event.type = BPF_PERF_EVENT_KPROBE;
 
 	info->perf_event.kprobe.offset = offset;
+	info->perf_event.kprobe.missed = missed;
 	if (!kallsyms_show_value(current_cred()))
 		addr = 0;
 	info->perf_event.kprobe.addr = addr;
@@ -3444,7 +3446,7 @@  static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
 	ulen = info->perf_event.uprobe.name_len;
 	err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr,
-					&type);
+					&type, NULL);
 	if (err)
 		return err;
 
@@ -3480,7 +3482,7 @@  static int bpf_perf_link_fill_tracepoint(const struct perf_event *event,
 	uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name);
 	ulen = info->perf_event.tracepoint.name_len;
 	info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT;
-	return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL);
+	return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL);
 }
 
 static int bpf_perf_link_fill_perf_event(const struct perf_event *event,
@@ -4813,7 +4815,7 @@  static int bpf_task_fd_query(const union bpf_attr *attr,
 
 		err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
 					      &buf, &probe_offset,
-					      &probe_addr);
+					      &probe_addr, NULL);
 		if (!err)
 			err = bpf_task_fd_query_copy(attr, uattr, prog_id,
 						     fd_type, buf,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index aec52938c703..a9d8634b503c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2384,7 +2384,8 @@  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 			    u32 *fd_type, const char **buf,
-			    u64 *probe_offset, u64 *probe_addr)
+			    u64 *probe_offset, u64 *probe_addr,
+			    unsigned long *missed)
 {
 	bool is_tracepoint, is_syscall_tp;
 	struct bpf_prog *prog;
@@ -2419,7 +2420,7 @@  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 #ifdef CONFIG_KPROBE_EVENTS
 		if (flags & TRACE_EVENT_FL_KPROBE)
 			err = bpf_get_kprobe_info(event, fd_type, buf,
-						  probe_offset, probe_addr,
+						  probe_offset, probe_addr, missed,
 						  event->attr.type == PERF_TYPE_TRACEPOINT);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 17c21c0b2dd1..5264fd26f8c7 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1188,6 +1188,12 @@  static const struct file_operations kprobe_events_ops = {
 	.write		= probes_write,
 };
 
+static unsigned long trace_kprobe_missed(struct trace_kprobe *tk)
+{
+	return trace_kprobe_is_return(tk) ?
+		tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+}
+
 /* Probes profiling interfaces */
 static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
@@ -1199,8 +1205,7 @@  static int probes_profile_seq_show(struct seq_file *m, void *v)
 		return 0;
 
 	tk = to_trace_kprobe(ev);
-	nmissed = trace_kprobe_is_return(tk) ?
-		tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
+	nmissed = trace_kprobe_missed(tk);
 	seq_printf(m, "  %-44s %15lu %15lu\n",
 		   trace_probe_name(&tk->tp),
 		   trace_kprobe_nhit(tk),
@@ -1546,7 +1551,8 @@  NOKPROBE_SYMBOL(kretprobe_perf_func);
 
 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
 			const char **symbol, u64 *probe_offset,
-			u64 *probe_addr, bool perf_type_tracepoint)
+			u64 *probe_addr, unsigned long *missed,
+			bool perf_type_tracepoint)
 {
 	const char *pevent = trace_event_name(event->tp_event);
 	const char *group = event->tp_event->class->system;
@@ -1565,6 +1571,8 @@  int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
 	*probe_addr = kallsyms_show_value(current_cred()) ?
 		      (unsigned long)tk->rp.kp.addr : 0;
 	*symbol = tk->symbol;
+	if (missed)
+		*missed = trace_kprobe_missed(tk);
 	return 0;
 }
 #endif	/* CONFIG_PERF_EVENTS */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e5216420ec73..e824b0c50425 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6546,6 +6546,7 @@  struct bpf_link_info {
 					__u32 name_len;
 					__u32 offset; /* offset from func_name */
 					__u64 addr;
+					__u64 missed;
 				} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
 				struct {
 					__aligned_u64 tp_name;   /* in/out */