diff mbox series

[bpf-next,v2] bpf: Cache the last valid build_id.

Message ID 20220224000531.1265030-1-haoluo@google.com (mailing list archive)
State Accepted
Commit ceac059ed4fd8abc0940f799dd5133275011e244
Delegated to: BPF
Headers show
Series [bpf-next,v2] bpf: Cache the last valid build_id. | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/cc_maintainers warning 5 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com yhs@fb.com netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 33 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next success VM_Test

Commit Message

Hao Luo Feb. 24, 2022, 12:05 a.m. UTC
For binaries that are statically linked, consecutive stack frames are
likely to be in the same VMA and therefore have the same build id.
As an optimization for this case, we can cache the previous frame's
VMA, if the new frame has the same VMA as the previous one, reuse the
previous one's build id. We are holding the MM locks as reader across
the entire loop, so we don't need to worry about VMA going away.

Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
test_progs.

Suggested-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Hao Luo <haoluo@google.com>
---
 kernel/bpf/stackmap.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Feb. 24, 2022, 12:10 a.m. UTC | #1
On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
>
> For binaries that are statically linked, consecutive stack frames are
> likely to be in the same VMA and therefore have the same build id.
> As an optimization for this case, we can cache the previous frame's
> VMA, if the new frame has the same VMA as the previous one, reuse the
> previous one's build id. We are holding the MM locks as reader across
> the entire loop, so we don't need to worry about VMA going away.
>
> Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> test_progs.
>
> Suggested-by: Greg Thelen <gthelen@google.com>
> Signed-off-by: Hao Luo <haoluo@google.com>
> ---

LGTM. Can you share performance numbers before and after?

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  kernel/bpf/stackmap.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 22c8ae94e4c1..38bdfcd06f55 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -132,7 +132,8 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>         int i;
>         struct mmap_unlock_irq_work *work = NULL;
>         bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
> -       struct vm_area_struct *vma;
> +       struct vm_area_struct *vma, *prev_vma = NULL;
> +       const char *prev_build_id;
>
>         /* If the irq_work is in use, fall back to report ips. Same
>          * fallback is used for kernel stack (!user) on a stackmap with
> @@ -150,6 +151,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>         }
>
>         for (i = 0; i < trace_nr; i++) {
> +               if (range_in_vma(prev_vma, ips[i], ips[i])) {
> +                       vma = prev_vma;
> +                       memcpy(id_offs[i].build_id, prev_build_id,
> +                              BUILD_ID_SIZE_MAX);
> +                       goto build_id_valid;
> +               }
>                 vma = find_vma(current->mm, ips[i]);
>                 if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
>                         /* per entry fall back to ips */
> @@ -158,9 +165,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
>                         memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
>                         continue;
>                 }
> +build_id_valid:
>                 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
>                         - vma->vm_start;
>                 id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
> +               prev_vma = vma;
> +               prev_build_id = id_offs[i].build_id;
>         }
>         bpf_mmap_unlock_mm(work, current->mm);
>  }
> --
> 2.35.1.473.g83b2b277ed-goog
>
Hao Luo Feb. 24, 2022, 12:33 a.m. UTC | #2
On Wed, Feb 23, 2022 at 4:11 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
> >
> > For binaries that are statically linked, consecutive stack frames are
> > likely to be in the same VMA and therefore have the same build id.
> > As an optimization for this case, we can cache the previous frame's
> > VMA, if the new frame has the same VMA as the previous one, reuse the
> > previous one's build id. We are holding the MM locks as reader across
> > the entire loop, so we don't need to worry about VMA going away.
> >
> > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > test_progs.
> >
> > Suggested-by: Greg Thelen <gthelen@google.com>
> > Signed-off-by: Hao Luo <haoluo@google.com>
> > ---
>
> LGTM. Can you share performance numbers before and after?
>
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
>

Thanks Andrii.

On a real-world workload, we observed that 66% of cpu cycles in
__bpf_get_stackid() were spent on build_id_parse() and find_vma().
This was before.

We haven't evaluated the performance with this patch yet. This
optimization seems straightforward, so we plan to upstream it first
and then retest.
Andrii Nakryiko Feb. 24, 2022, 1:10 a.m. UTC | #3
On Wed, Feb 23, 2022 at 4:33 PM Hao Luo <haoluo@google.com> wrote:
>
> On Wed, Feb 23, 2022 at 4:11 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
> > >
> > > For binaries that are statically linked, consecutive stack frames are
> > > likely to be in the same VMA and therefore have the same build id.
> > > As an optimization for this case, we can cache the previous frame's
> > > VMA, if the new frame has the same VMA as the previous one, reuse the
> > > previous one's build id. We are holding the MM locks as reader across
> > > the entire loop, so we don't need to worry about VMA going away.
> > >
> > > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > > test_progs.
> > >
> > > Suggested-by: Greg Thelen <gthelen@google.com>
> > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > ---
> >
> > LGTM. Can you share performance numbers before and after?
> >
> > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> >
>
> Thanks Andrii.
>
> On a real-world workload, we observed that 66% of cpu cycles in
> __bpf_get_stackid() were spent on build_id_parse() and find_vma().
> This was before.
>
> We haven't evaluated the performance with this patch yet. This
> optimization seems straightforward, so we plan to upstream it first
> and then retest.

Ok, once it lands upstream, I'd really appreciate if you can retest
and update us with numbers. Thanks!
Hao Luo Feb. 24, 2022, 1:11 a.m. UTC | #4
On Wed, Feb 23, 2022 at 5:10 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Feb 23, 2022 at 4:33 PM Hao Luo <haoluo@google.com> wrote:
> >
> > On Wed, Feb 23, 2022 at 4:11 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
> > > >
> > > > For binaries that are statically linked, consecutive stack frames are
> > > > likely to be in the same VMA and therefore have the same build id.
> > > > As an optimization for this case, we can cache the previous frame's
> > > > VMA, if the new frame has the same VMA as the previous one, reuse the
> > > > previous one's build id. We are holding the MM locks as reader across
> > > > the entire loop, so we don't need to worry about VMA going away.
> > > >
> > > > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > > > test_progs.
> > > >
> > > > Suggested-by: Greg Thelen <gthelen@google.com>
> > > > Signed-off-by: Hao Luo <haoluo@google.com>
> > > > ---
> > >
> > > LGTM. Can you share performance numbers before and after?
> > >
> > > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> > >
> >
> > Thanks Andrii.
> >
> > On a real-world workload, we observed that 66% of cpu cycles in
> > __bpf_get_stackid() were spent on build_id_parse() and find_vma().
> > This was before.
> >
> > We haven't evaluated the performance with this patch yet. This
> > optimization seems straightforward, so we plan to upstream it first
> > and then retest.
>
> Ok, once it lands upstream, I'd really appreciate if you can retest
> and update us with numbers. Thanks!

Sure, will do that.
Song Liu Feb. 24, 2022, 2:31 a.m. UTC | #5
On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
>
> For binaries that are statically linked, consecutive stack frames are
> likely to be in the same VMA and therefore have the same build id.
> As an optimization for this case, we can cache the previous frame's
> VMA, if the new frame has the same VMA as the previous one, reuse the
> previous one's build id. We are holding the MM locks as reader across
> the entire loop, so we don't need to worry about VMA going away.
>
> Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> test_progs.
>
> Suggested-by: Greg Thelen <gthelen@google.com>
> Signed-off-by: Hao Luo <haoluo@google.com>

Nice optimization!

Acked-by: Song Liu <songliubraving@fb.com>
Namhyung Kim Feb. 24, 2022, 6:13 p.m. UTC | #6
Hello,

On Wed, Feb 23, 2022 at 6:31 PM Song Liu <song@kernel.org> wrote:
>
> On Wed, Feb 23, 2022 at 4:05 PM Hao Luo <haoluo@google.com> wrote:
> >
> > For binaries that are statically linked, consecutive stack frames are
> > likely to be in the same VMA and therefore have the same build id.
> > As an optimization for this case, we can cache the previous frame's
> > VMA, if the new frame has the same VMA as the previous one, reuse the
> > previous one's build id. We are holding the MM locks as reader across
> > the entire loop, so we don't need to worry about VMA going away.
> >
> > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > test_progs.
> >
> > Suggested-by: Greg Thelen <gthelen@google.com>
> > Signed-off-by: Hao Luo <haoluo@google.com>

Acked-by: Namhyung Kim <namhyung@kernel.org>

Thanks,
Namhyung
Pasha Tatashin Feb. 25, 2022, 8:43 p.m. UTC | #7
On 2/23/22 19:05, Hao Luo wrote:
> For binaries that are statically linked, consecutive stack frames are
> likely to be in the same VMA and therefore have the same build id.
> As an optimization for this case, we can cache the previous frame's
> VMA, if the new frame has the same VMA as the previous one, reuse the
> previous one's build id. We are holding the MM locks as reader across
> the entire loop, so we don't need to worry about VMA going away.
> 
> Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> test_progs.
> 
> Suggested-by: Greg Thelen <gthelen@google.com>
> Signed-off-by: Hao Luo <haoluo@google.com>

Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>

Thanks,
Pasha
patchwork-bot+netdevbpf@kernel.org Feb. 28, 2022, 5:20 p.m. UTC | #8
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Daniel Borkmann <daniel@iogearbox.net>:

On Wed, 23 Feb 2022 16:05:31 -0800 you wrote:
> For binaries that are statically linked, consecutive stack frames are
> likely to be in the same VMA and therefore have the same build id.
> As an optimization for this case, we can cache the previous frame's
> VMA, if the new frame has the same VMA as the previous one, reuse the
> previous one's build id. We are holding the MM locks as reader across
> the entire loop, so we don't need to worry about VMA going away.
> 
> [...]

Here is the summary with links:
  - [bpf-next,v2] bpf: Cache the last valid build_id.
    https://git.kernel.org/bpf/bpf-next/c/ceac059ed4fd

You are awesome, thank you!
Hao Luo March 11, 2022, 1:16 a.m. UTC | #9
On Fri, Feb 25, 2022 at 12:43 PM Pasha Tatashin
<pasha.tatashin@soleen.com> wrote:
>
> On 2/23/22 19:05, Hao Luo wrote:
> > For binaries that are statically linked, consecutive stack frames are
> > likely to be in the same VMA and therefore have the same build id.
> > As an optimization for this case, we can cache the previous frame's
> > VMA, if the new frame has the same VMA as the previous one, reuse the
> > previous one's build id. We are holding the MM locks as reader across
> > the entire loop, so we don't need to worry about VMA going away.
> >
> > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > test_progs.
> >
> > Suggested-by: Greg Thelen <gthelen@google.com>
> > Signed-off-by: Hao Luo <haoluo@google.com>
>
> Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>

An update with performance numbers. Thanks to Blake Jones for
collecting the stats:

In a production workload, with BPF probes sampling stack trace, we see
the following changes:

 - stack_map_get_build_id_offset() is taking 70% of the time of
__bpf_get_stackid(); it was 80% before.

 - find_get_page() and find_vma() together are taking 75% of the time
of stack_map_get_build_id_offset(); it was 83% before.

Note the call chain is

__bpf_get_stackid()
  -> stack_map_get_build_id_offset()
    -> find_get_page()
    -> find_vma()

> Thanks,
> Pasha
Andrii Nakryiko March 11, 2022, 7:11 p.m. UTC | #10
On Thu, Mar 10, 2022 at 5:16 PM Hao Luo <haoluo@google.com> wrote:
>
> On Fri, Feb 25, 2022 at 12:43 PM Pasha Tatashin
> <pasha.tatashin@soleen.com> wrote:
> >
> > On 2/23/22 19:05, Hao Luo wrote:
> > > For binaries that are statically linked, consecutive stack frames are
> > > likely to be in the same VMA and therefore have the same build id.
> > > As an optimization for this case, we can cache the previous frame's
> > > VMA, if the new frame has the same VMA as the previous one, reuse the
> > > previous one's build id. We are holding the MM locks as reader across
> > > the entire loop, so we don't need to worry about VMA going away.
> > >
> > > Tested through "stacktrace_build_id" and "stacktrace_build_id_nmi" in
> > > test_progs.
> > >
> > > Suggested-by: Greg Thelen <gthelen@google.com>
> > > Signed-off-by: Hao Luo <haoluo@google.com>
> >
> > Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> >
>
> An update with performance numbers. Thanks to Blake Jones for
> collecting the stats:
>
> In a production workload, with BPF probes sampling stack trace, we see
> the following changes:
>
>  - stack_map_get_build_id_offset() is taking 70% of the time of
> __bpf_get_stackid(); it was 80% before.

Great, thanks for following up with updated numbers!

>
>  - find_get_page() and find_vma() together are taking 75% of the time
> of stack_map_get_build_id_offset(); it was 83% before.
>
> Note the call chain is
>
> __bpf_get_stackid()
>   -> stack_map_get_build_id_offset()
>     -> find_get_page()
>     -> find_vma()
>
> > Thanks,
> > Pasha
diff mbox series

Patch

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 22c8ae94e4c1..38bdfcd06f55 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -132,7 +132,8 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	int i;
 	struct mmap_unlock_irq_work *work = NULL;
 	bool irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma, *prev_vma = NULL;
+	const char *prev_build_id;
 
 	/* If the irq_work is in use, fall back to report ips. Same
 	 * fallback is used for kernel stack (!user) on a stackmap with
@@ -150,6 +151,12 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	}
 
 	for (i = 0; i < trace_nr; i++) {
+		if (range_in_vma(prev_vma, ips[i], ips[i])) {
+			vma = prev_vma;
+			memcpy(id_offs[i].build_id, prev_build_id,
+			       BUILD_ID_SIZE_MAX);
+			goto build_id_valid;
+		}
 		vma = find_vma(current->mm, ips[i]);
 		if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
 			/* per entry fall back to ips */
@@ -158,9 +165,12 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 			memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
 			continue;
 		}
+build_id_valid:
 		id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
 			- vma->vm_start;
 		id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
+		prev_vma = vma;
+		prev_build_id = id_offs[i].build_id;
 	}
 	bpf_mmap_unlock_mm(work, current->mm);
 }