diff mbox series

[bpf-next,v2,1/2] bpf: add runtime stats, max cost

Message ID 20221215043217.81368-1-xiangxia.m.yue@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,v2,1/2] bpf: add runtime stats, max cost | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1719 this patch: 1719
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 162 this patch: 162
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1716 this patch: 1716
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 138 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc

Commit Message

Tonghao Zhang Dec. 15, 2022, 4:32 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

Now user can enable sysctl kernel.bpf_stats_enabled to fetch
run_time_ns and run_cnt. It's easy to calculate the average value.

In some case, the max cost for bpf prog invoked, are more useful:
is there a burst sysload or high cpu usage. This patch introduce
a update stats helper.

$ bpftool --json --pretty p s
   ...
   "run_max_cost_ns": 313367

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Hou Tao <houtao1@huawei.com>
---
v2: fix build warning
---
 include/linux/filter.h   | 29 ++++++++++++++++++++++-------
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/syscall.c     | 10 +++++++++-
 kernel/bpf/trampoline.c  | 10 +---------
 4 files changed, 33 insertions(+), 17 deletions(-)

Comments

Yonghong Song Dec. 16, 2022, 5:39 a.m. UTC | #1
On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> 
> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> run_time_ns and run_cnt. It's easy to calculate the average value.
> 
> In some case, the max cost for bpf prog invoked, are more useful:
> is there a burst sysload or high cpu usage. This patch introduce
> a update stats helper.

I am not 100% sure about how this single max value will be useful
in general. A particular max_run_time_ns, if much bigger than average,
could be an outlier due to preemption/softirq etc.
What you really need might be a trend over time of the run_time
to capture the burst. You could do this by taking snapshot of
run_time_ns/run_cnt periodically and plot the trend of average
run_time_ns which might correlate with other system activity.
Maybe I missed some use cases for max_run_time_ns...

> 
> $ bpftool --json --pretty p s
>     ...
>     "run_max_cost_ns": 313367
> 
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Andrii Nakryiko <andrii@kernel.org>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Song Liu <song@kernel.org>
> Cc: Yonghong Song <yhs@fb.com>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: KP Singh <kpsingh@kernel.org>
> Cc: Stanislav Fomichev <sdf@google.com>
> Cc: Hao Luo <haoluo@google.com>
> Cc: Jiri Olsa <jolsa@kernel.org>
> Cc: Hou Tao <houtao1@huawei.com>
> ---
> v2: fix build warning
> ---
>   include/linux/filter.h   | 29 ++++++++++++++++++++++-------
>   include/uapi/linux/bpf.h |  1 +
>   kernel/bpf/syscall.c     | 10 +++++++++-
>   kernel/bpf/trampoline.c  | 10 +---------
>   4 files changed, 33 insertions(+), 17 deletions(-)
> 
[...]
Tonghao Zhang Dec. 16, 2022, 9:05 a.m. UTC | #2
On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
>
>
>
> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> > run_time_ns and run_cnt. It's easy to calculate the average value.
> >
> > In some case, the max cost for bpf prog invoked, are more useful:
> > is there a burst sysload or high cpu usage. This patch introduce
> > a update stats helper.
>
> I am not 100% sure about how this single max value will be useful
> in general. A particular max_run_time_ns, if much bigger than average,
> could be an outlier due to preemption/softirq etc.
> What you really need might be a trend over time of the run_time
> to capture the burst. You could do this by taking snapshot of
Hi
If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
not be increased too much while
there is a maxcost in bpf prog. The max cost value means there is at
least one high cost in bpf prog.
we should take care of the most cost of bpf prog. especially, much
more than run_time_ns/run_cnt.

> run_time_ns/run_cnt periodically and plot the trend of average
> run_time_ns which might correlate with other system activity.
> Maybe I missed some use cases for max_run_time_ns...
>
> >
> > $ bpftool --json --pretty p s
> >     ...
> >     "run_max_cost_ns": 313367
> >
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: Andrii Nakryiko <andrii@kernel.org>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Song Liu <song@kernel.org>
> > Cc: Yonghong Song <yhs@fb.com>
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: KP Singh <kpsingh@kernel.org>
> > Cc: Stanislav Fomichev <sdf@google.com>
> > Cc: Hao Luo <haoluo@google.com>
> > Cc: Jiri Olsa <jolsa@kernel.org>
> > Cc: Hou Tao <houtao1@huawei.com>
> > ---
> > v2: fix build warning
> > ---
> >   include/linux/filter.h   | 29 ++++++++++++++++++++++-------
> >   include/uapi/linux/bpf.h |  1 +
> >   kernel/bpf/syscall.c     | 10 +++++++++-
> >   kernel/bpf/trampoline.c  | 10 +---------
> >   4 files changed, 33 insertions(+), 17 deletions(-)
> >
> [...]
Daniel Borkmann Dec. 16, 2022, 4:06 p.m. UTC | #3
On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
>> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
>>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>
>>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
>>> run_time_ns and run_cnt. It's easy to calculate the average value.
>>>
>>> In some case, the max cost for bpf prog invoked, are more useful:
>>> is there a burst sysload or high cpu usage. This patch introduce
>>> a update stats helper.
>>
>> I am not 100% sure about how this single max value will be useful
>> in general. A particular max_run_time_ns, if much bigger than average,
>> could be an outlier due to preemption/softirq etc.
>> What you really need might be a trend over time of the run_time
>> to capture the burst. You could do this by taking snapshot of
> Hi
> If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> not be increased too much while
> there is a maxcost in bpf prog. The max cost value means there is at
> least one high cost in bpf prog.
> we should take care of the most cost of bpf prog. especially, much
> more than run_time_ns/run_cnt.

But then again, see Yonghong's comment with regards to outliers. I
think what you're probably rather asking for is something like tracking
p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
sure how single max cost would help, really..

>> run_time_ns/run_cnt periodically and plot the trend of average
>> run_time_ns which might correlate with other system activity.
>> Maybe I missed some use cases for max_run_time_ns...
>>
>>>
>>> $ bpftool --json --pretty p s
>>>      ...
>>>      "run_max_cost_ns": 313367
>>>
>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>> Cc: Alexei Starovoitov <ast@kernel.org>
>>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>>> Cc: Andrii Nakryiko <andrii@kernel.org>
>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
>>> Cc: Song Liu <song@kernel.org>
>>> Cc: Yonghong Song <yhs@fb.com>
>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>> Cc: KP Singh <kpsingh@kernel.org>
>>> Cc: Stanislav Fomichev <sdf@google.com>
>>> Cc: Hao Luo <haoluo@google.com>
>>> Cc: Jiri Olsa <jolsa@kernel.org>
>>> Cc: Hou Tao <houtao1@huawei.com>
>>> ---
>>> v2: fix build warning
>>> ---
>>>    include/linux/filter.h   | 29 ++++++++++++++++++++++-------
>>>    include/uapi/linux/bpf.h |  1 +
>>>    kernel/bpf/syscall.c     | 10 +++++++++-
>>>    kernel/bpf/trampoline.c  | 10 +---------
>>>    4 files changed, 33 insertions(+), 17 deletions(-)
>>>
>> [...]
> 
> 
>
Tonghao Zhang Dec. 17, 2022, 3:37 p.m. UTC | #4
On Sat, Dec 17, 2022 at 12:07 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> > On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
> >> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >>>
> >>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> >>> run_time_ns and run_cnt. It's easy to calculate the average value.
> >>>
> >>> In some case, the max cost for bpf prog invoked, are more useful:
> >>> is there a burst sysload or high cpu usage. This patch introduce
> >>> a update stats helper.
> >>
> >> I am not 100% sure about how this single max value will be useful
> >> in general. A particular max_run_time_ns, if much bigger than average,
> >> could be an outlier due to preemption/softirq etc.
> >> What you really need might be a trend over time of the run_time
> >> to capture the burst. You could do this by taking snapshot of
> > Hi
> > If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> > not be increased too much while
> > there is a maxcost in bpf prog. The max cost value means there is at
> > least one high cost in bpf prog.
> > we should take care of the most cost of bpf prog. especially, much
> > more than run_time_ns/run_cnt.
>
> But then again, see Yonghong's comment with regards to outliers. I
> think what you're probably rather asking for is something like tracking
> p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
> sure how single max cost would help, really..
What I am asking for is that is there a high cpu cost in bpf prog ? If
the bpf prog run frequently,
the run_time_ns/cnt is not what we want. because if we get bpf runtime
stats frequently, there will
be a high syscall cpu load. so we can't use syscall frequently. so why
I need this max cost value, as
yonghong say "if much bigger than average, could be an outlier due to
preemption/softirq etc.". It is right.
but I think there is another reason, the bpf prog may be too bad to
cause the issue or bpf prog invoke a bpf helper which
take a lot cpu. Anyway this can help us debug the bpf prog. and help
us to know what max cost the prog take. If possible
we can update the commit message and send v3.

> >> run_time_ns/run_cnt periodically and plot the trend of average
> >> run_time_ns which might correlate with other system activity.
> >> Maybe I missed some use cases for max_run_time_ns...
> >>
> >>>
> >>> $ bpftool --json --pretty p s
> >>>      ...
> >>>      "run_max_cost_ns": 313367
> >>>
> >>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >>> Cc: Alexei Starovoitov <ast@kernel.org>
> >>> Cc: Daniel Borkmann <daniel@iogearbox.net>
> >>> Cc: Andrii Nakryiko <andrii@kernel.org>
> >>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> >>> Cc: Song Liu <song@kernel.org>
> >>> Cc: Yonghong Song <yhs@fb.com>
> >>> Cc: John Fastabend <john.fastabend@gmail.com>
> >>> Cc: KP Singh <kpsingh@kernel.org>
> >>> Cc: Stanislav Fomichev <sdf@google.com>
> >>> Cc: Hao Luo <haoluo@google.com>
> >>> Cc: Jiri Olsa <jolsa@kernel.org>
> >>> Cc: Hou Tao <houtao1@huawei.com>
> >>> ---
> >>> v2: fix build warning
> >>> ---
> >>>    include/linux/filter.h   | 29 ++++++++++++++++++++++-------
> >>>    include/uapi/linux/bpf.h |  1 +
> >>>    kernel/bpf/syscall.c     | 10 +++++++++-
> >>>    kernel/bpf/trampoline.c  | 10 +---------
> >>>    4 files changed, 33 insertions(+), 17 deletions(-)
> >>>
> >> [...]
> >
> >
> >
>
Song Liu Dec. 19, 2022, 8:04 a.m. UTC | #5
On Sat, Dec 17, 2022 at 7:38 AM Tonghao Zhang <xiangxia.m.yue@gmail.com> wrote:
>
> On Sat, Dec 17, 2022 at 12:07 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 12/16/22 10:05 AM, Tonghao Zhang wrote:
> > > On Fri, Dec 16, 2022 at 1:40 PM Yonghong Song <yhs@meta.com> wrote:
> > >> On 12/14/22 8:32 PM, xiangxia.m.yue@gmail.com wrote:
> > >>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > >>>
> > >>> Now user can enable sysctl kernel.bpf_stats_enabled to fetch
> > >>> run_time_ns and run_cnt. It's easy to calculate the average value.
> > >>>
> > >>> In some case, the max cost for bpf prog invoked, are more useful:
> > >>> is there a burst sysload or high cpu usage. This patch introduce
> > >>> a update stats helper.
> > >>
> > >> I am not 100% sure about how this single max value will be useful
> > >> in general. A particular max_run_time_ns, if much bigger than average,
> > >> could be an outlier due to preemption/softirq etc.
> > >> What you really need might be a trend over time of the run_time
> > >> to capture the burst. You could do this by taking snapshot of
> > > Hi
> > > If the bpf prog is invoked frequently,  the run_time_ns/run_cnt may
> > > not be increased too much while
> > > there is a maxcost in bpf prog. The max cost value means there is at
> > > least one high cost in bpf prog.
> > > we should take care of the most cost of bpf prog. especially, much
> > > more than run_time_ns/run_cnt.
> >
> > But then again, see Yonghong's comment with regards to outliers. I
> > think what you're probably rather asking for is something like tracking
> > p50/p90/p99 run_time_ns numbers over time to get a better picture. Not
> > sure how single max cost would help, really..
> What I am asking for is that is there a high cpu cost in bpf prog ? If
> the bpf prog run frequently,
> the run_time_ns/cnt is not what we want. because if we get bpf runtime
> stats frequently, there will
> be a high syscall cpu load. so we can't use syscall frequently. so why
> I need this max cost value, as
> yonghong say "if much bigger than average, could be an outlier due to
> preemption/softirq etc.". It is right.
> but I think there is another reason, the bpf prog may be too bad to
> cause the issue or bpf prog invoke a bpf helper which
> take a lot cpu. Anyway this can help us debug the bpf prog. and help
> us to know what max cost the prog take. If possible
> we can update the commit message and send v3.

kernel.bpf_stats_enabled is a relatively light weight monitoring interface.
One of the use cases is to enable it for a few seconds periodically, so
we can get an overview of all BPF programs in the system.

While max time cost might be useful in some debugging, I don't think
we should add it with kernel.bpf_stats_enabled. Otherwise, we can
argue p50/p90/p99 are also useful in some cases, and some other
metrics are useful in some other cases.  These metrics together will
make kernel.bpf_stats_enabled too expensive for the use case above.

Since the use case is for debugging, have you considered using
some other BPF programs to profile the target BPF program?
Please refer to "bpftool prog profile" or "perf stat -b " for
examples of similar solutions. We may need to revise the following
check in bpf_check_attach_target() to make this work for some
scenarios:

                if (tgt_prog->type == prog->type) {
                        /* Cannot fentry/fexit another fentry/fexit program.
                         * Cannot attach program extension to another extension.
                         * It's ok to attach fentry/fexit to extension program.
                         */
                        bpf_log(log, "Cannot recursively attach\n");
                        return -EINVAL;
                }

Thanks,
Song
Martin KaFai Lau Dec. 19, 2022, 6:24 p.m. UTC | #6
On 12/19/22 12:04 AM, Song Liu wrote:
> While max time cost might be useful in some debugging, I don't think
> we should add it with kernel.bpf_stats_enabled. Otherwise, we can
> argue p50/p90/p99 are also useful in some cases, and some other
> metrics are useful in some other cases.  These metrics together will
> make kernel.bpf_stats_enabled too expensive for the use case above.
> 
> Since the use case is for debugging, have you considered using
> some other BPF programs to profile the target BPF program?
> Please refer to "bpftool prog profile" or "perf stat -b " for
> examples of similar solutions.
+1.  Please consider other suggested solutions that the same can be done in a 
more flexible way.
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf701976056e..886b65fcd4ac 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -556,6 +556,7 @@  struct bpf_prog_stats {
 	u64_stats_t cnt;
 	u64_stats_t nsecs;
 	u64_stats_t misses;
+	u64_stats_t max_cost;
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
@@ -578,6 +579,26 @@  typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
 					  unsigned int (*bpf_func)(const void *,
 								   const struct bpf_insn *));
 
+static inline void bpf_prog_update_stats(const struct bpf_prog *prog, u64 start)
+{
+	struct bpf_prog_stats *stats;
+	unsigned long flags;
+	u64 run_time, max_cost;
+
+	stats = this_cpu_ptr(prog->stats);
+	flags = u64_stats_update_begin_irqsave(&stats->syncp);
+
+	run_time =  sched_clock() - start;
+	u64_stats_inc(&stats->cnt);
+	u64_stats_add(&stats->nsecs, run_time);
+
+	max_cost = u64_stats_read(&stats->max_cost);
+	if (max_cost < run_time)
+		u64_stats_set(&stats->max_cost, run_time);
+
+	u64_stats_update_end_irqrestore(&stats->syncp, flags);
+}
+
 static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 					  const void *ctx,
 					  bpf_dispatcher_fn dfunc)
@@ -586,16 +607,10 @@  static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 
 	cant_migrate();
 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
-		struct bpf_prog_stats *stats;
 		u64 start = sched_clock();
-		unsigned long flags;
 
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
-		stats = this_cpu_ptr(prog->stats);
-		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		u64_stats_inc(&stats->cnt);
-		u64_stats_add(&stats->nsecs, sched_clock() - start);
-		u64_stats_update_end_irqrestore(&stats->syncp, flags);
+		bpf_prog_update_stats(prog, start);
 	} else {
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
 	}
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 464ca3f01fe7..da4d1f2d7bc2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6259,6 +6259,7 @@  struct bpf_prog_info {
 	__u32 verified_insns;
 	__u32 attach_btf_obj_id;
 	__u32 attach_btf_id;
+	__u64 run_max_cost_ns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64131f88c553..06439b09863d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2105,6 +2105,7 @@  struct bpf_prog_kstats {
 	u64 nsecs;
 	u64 cnt;
 	u64 misses;
+	u64 max_cost;
 };
 
 void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog)
@@ -2122,12 +2123,13 @@  static void bpf_prog_get_stats(const struct bpf_prog *prog,
 			       struct bpf_prog_kstats *stats)
 {
 	u64 nsecs = 0, cnt = 0, misses = 0;
+	u64 max_cost = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
 		const struct bpf_prog_stats *st;
 		unsigned int start;
-		u64 tnsecs, tcnt, tmisses;
+		u64 tnsecs, tcnt, tmisses, tmax_cost;
 
 		st = per_cpu_ptr(prog->stats, cpu);
 		do {
@@ -2135,14 +2137,17 @@  static void bpf_prog_get_stats(const struct bpf_prog *prog,
 			tnsecs = u64_stats_read(&st->nsecs);
 			tcnt = u64_stats_read(&st->cnt);
 			tmisses = u64_stats_read(&st->misses);
+			tmax_cost = u64_stats_read(&st->max_cost);
 		} while (u64_stats_fetch_retry(&st->syncp, start));
 		nsecs += tnsecs;
 		cnt += tcnt;
 		misses += tmisses;
+		max_cost = max(max_cost, tmax_cost);
 	}
 	stats->nsecs = nsecs;
 	stats->cnt = cnt;
 	stats->misses = misses;
+	stats->max_cost = max_cost;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -2162,6 +2167,7 @@  static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "prog_id:\t%u\n"
 		   "run_time_ns:\t%llu\n"
 		   "run_cnt:\t%llu\n"
+		   "run_max_cost_ns:\t%llu\n"
 		   "recursion_misses:\t%llu\n"
 		   "verified_insns:\t%u\n",
 		   prog->type,
@@ -2171,6 +2177,7 @@  static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   prog->aux->id,
 		   stats.nsecs,
 		   stats.cnt,
+		   stats.max_cost,
 		   stats.misses,
 		   prog->aux->verified_insns);
 }
@@ -3962,6 +3969,7 @@  static int bpf_prog_get_info_by_fd(struct file *file,
 	info.run_time_ns = stats.nsecs;
 	info.run_cnt = stats.cnt;
 	info.recursion_misses = stats.misses;
+	info.run_max_cost_ns = stats.max_cost;
 
 	info.verified_insns = prog->aux->verified_insns;
 
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d6395215b849..4ddad462562e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -882,8 +882,6 @@  static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram
 static void notrace update_prog_stats(struct bpf_prog *prog,
 				      u64 start)
 {
-	struct bpf_prog_stats *stats;
-
 	if (static_branch_unlikely(&bpf_stats_enabled_key) &&
 	    /* static_key could be enabled in __bpf_prog_enter*
 	     * and disabled in __bpf_prog_exit*.
@@ -891,13 +889,7 @@  static void notrace update_prog_stats(struct bpf_prog *prog,
 	     * Hence check that 'start' is valid.
 	     */
 	    start > NO_START_TIME) {
-		unsigned long flags;
-
-		stats = this_cpu_ptr(prog->stats);
-		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		u64_stats_inc(&stats->cnt);
-		u64_stats_add(&stats->nsecs, sched_clock() - start);
-		u64_stats_update_end_irqrestore(&stats->syncp, flags);
+		bpf_prog_update_stats(prog, start);
 	}
 }