diff mbox series

[bpf-next,2/3] bpf: inline bpf_get_branch_snapshot() helper

Message ID 20240321180501.734779-3-andrii@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Inline two LBR-related helpers | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 951 this patch: 951
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: haoluo@google.com john.fastabend@gmail.com eddyz87@gmail.com sdf@google.com kpsingh@kernel.org yonghong.song@linux.dev martin.lau@linux.dev jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 957 this patch: 957
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 968 this patch: 968
netdev/checkpatch warning CHECK: multiple assignments should be avoided WARNING: line length of 102 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc

Commit Message

Andrii Nakryiko March 21, 2024, 6:05 p.m. UTC
Inline bpf_get_branch_snapshot() helper using architecture-agnostic
inline BPF code which calls directly into underlying callback of
perf_snapshot_branch_stack static call. This callback is set early
during kernel initialization and is never updated or reset, so it's ok
to fetch actual implementation using static_call_query() and call
directly into it.

This change eliminates a full function call and saves one LBR entry
in PERF_SAMPLE_BRANCH_ANY LBR mode.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 kernel/bpf/verifier.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

Comments

Jiri Olsa March 21, 2024, 9:08 p.m. UTC | #1
On Thu, Mar 21, 2024 at 11:05:00AM -0700, Andrii Nakryiko wrote:
> Inline bpf_get_branch_snapshot() helper using architecture-agnostic
> inline BPF code which calls directly into underlying callback of
> perf_snapshot_branch_stack static call. This callback is set early
> during kernel initialization and is never updated or reset, so it's ok
> to fetch actual implementation using static_call_query() and call
> directly into it.
> 
> This change eliminates a full function call and saves one LBR entry
> in PERF_SAMPLE_BRANCH_ANY LBR mode.
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  kernel/bpf/verifier.c | 37 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 37 insertions(+)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index de7813947981..4fb6c468e199 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20130,6 +20130,43 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>  			goto next_insn;
>  		}
>  
> +		/* Implement bpf_get_branch_snapshot inline. */
> +		if (prog->jit_requested && BITS_PER_LONG == 64 &&
> +		    insn->imm == BPF_FUNC_get_branch_snapshot) {
> +			/* We are dealing with the following func protos:
> +			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
> +			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
> +			 */
> +			const u32 br_entry_size = sizeof(struct perf_branch_entry);
> +
> +			/* if (unlikely(flags)) return -EINVAL */
> +			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 5);

nit, you moved the flags check on top, which I think makes sense and
we should do it in bpf_get_branch_snapshot as well to keep it same

jirka

> +			/* transform size (bytes) into entry_cnt */
> +			insn_buf[1] = BPF_ALU32_IMM(BPF_DIV, BPF_REG_2, br_entry_size);
> +			/* call perf_snapshot_branch_stack implementation */
> +			insn_buf[2] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
> +			/* if (entry_cnt == 0) return -ENOENT */
> +			insn_buf[3] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
> +			/* return entry_cnt * sizeof(struct perf_branch_entry) */
> +			insn_buf[4] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
> +			insn_buf[5] = BPF_JMP_A(3);
> +			/* return -EINVAL; */
> +			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
> +			insn_buf[7] = BPF_JMP_A(1);
> +			/* return -ENOENT; */
> +			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
> +			cnt = 9;
> +
> +			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> +			if (!new_prog)
> +				return -ENOMEM;
> +
> +			delta    += cnt - 1;
> +			env->prog = prog = new_prog;
> +			insn      = new_prog->insnsi + i + delta;
> +			continue;
> +		}
> +
>  		/* Implement bpf_kptr_xchg inline */
>  		if (prog->jit_requested && BITS_PER_LONG == 64 &&
>  		    insn->imm == BPF_FUNC_kptr_xchg &&
> -- 
> 2.43.0
> 
>
Andrii Nakryiko March 21, 2024, 9:27 p.m. UTC | #2
On Thu, Mar 21, 2024 at 2:08 PM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Mar 21, 2024 at 11:05:00AM -0700, Andrii Nakryiko wrote:
> > Inline bpf_get_branch_snapshot() helper using architecture-agnostic
> > inline BPF code which calls directly into underlying callback of
> > perf_snapshot_branch_stack static call. This callback is set early
> > during kernel initialization and is never updated or reset, so it's ok
> > to fetch actual implementation using static_call_query() and call
> > directly into it.
> >
> > This change eliminates a full function call and saves one LBR entry
> > in PERF_SAMPLE_BRANCH_ANY LBR mode.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  kernel/bpf/verifier.c | 37 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 37 insertions(+)
> >
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index de7813947981..4fb6c468e199 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -20130,6 +20130,43 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> >                       goto next_insn;
> >               }
> >
> > +             /* Implement bpf_get_branch_snapshot inline. */
> > +             if (prog->jit_requested && BITS_PER_LONG == 64 &&
> > +                 insn->imm == BPF_FUNC_get_branch_snapshot) {
> > +                     /* We are dealing with the following func protos:
> > +                      * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
> > +                      * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
> > +                      */
> > +                     const u32 br_entry_size = sizeof(struct perf_branch_entry);
> > +
> > +                     /* if (unlikely(flags)) return -EINVAL */
> > +                     insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 5);
>
> nit, you moved the flags check on top, which I think makes sense and
> we should do it in bpf_get_branch_snapshot as well to keep it same
>

here I could control that it won't be taken in common case, so if we
are to do that in BPF helper itself, we should use unlikely() and
check that compiler actually honored it. I can add that in the next
revision.

> jirka
>
> > +                     /* transform size (bytes) into entry_cnt */
> > +                     insn_buf[1] = BPF_ALU32_IMM(BPF_DIV, BPF_REG_2, br_entry_size);
> > +                     /* call perf_snapshot_branch_stack implementation */
> > +                     insn_buf[2] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
> > +                     /* if (entry_cnt == 0) return -ENOENT */
> > +                     insn_buf[3] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
> > +                     /* return entry_cnt * sizeof(struct perf_branch_entry) */
> > +                     insn_buf[4] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
> > +                     insn_buf[5] = BPF_JMP_A(3);
> > +                     /* return -EINVAL; */
> > +                     insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
> > +                     insn_buf[7] = BPF_JMP_A(1);
> > +                     /* return -ENOENT; */
> > +                     insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
> > +                     cnt = 9;
> > +
> > +                     new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> > +                     if (!new_prog)
> > +                             return -ENOMEM;
> > +
> > +                     delta    += cnt - 1;
> > +                     env->prog = prog = new_prog;
> > +                     insn      = new_prog->insnsi + i + delta;
> > +                     continue;
> > +             }
> > +
> >               /* Implement bpf_kptr_xchg inline */
> >               if (prog->jit_requested && BITS_PER_LONG == 64 &&
> >                   insn->imm == BPF_FUNC_kptr_xchg &&
> > --
> > 2.43.0
> >
> >
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index de7813947981..4fb6c468e199 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20130,6 +20130,43 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto next_insn;
 		}
 
+		/* Implement bpf_get_branch_snapshot inline. */
+		if (prog->jit_requested && BITS_PER_LONG == 64 &&
+		    insn->imm == BPF_FUNC_get_branch_snapshot) {
+			/* We are dealing with the following func protos:
+			 * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags);
+			 * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt);
+			 */
+			const u32 br_entry_size = sizeof(struct perf_branch_entry);
+
+			/* if (unlikely(flags)) return -EINVAL */
+			insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 5);
+			/* transform size (bytes) into entry_cnt */
+			insn_buf[1] = BPF_ALU32_IMM(BPF_DIV, BPF_REG_2, br_entry_size);
+			/* call perf_snapshot_branch_stack implementation */
+			insn_buf[2] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack));
+			/* if (entry_cnt == 0) return -ENOENT */
+			insn_buf[3] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4);
+			/* return entry_cnt * sizeof(struct perf_branch_entry) */
+			insn_buf[4] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size);
+			insn_buf[5] = BPF_JMP_A(3);
+			/* return -EINVAL; */
+			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
+			insn_buf[7] = BPF_JMP_A(1);
+			/* return -ENOENT; */
+			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT);
+			cnt = 9;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
 		/* Implement bpf_kptr_xchg inline */
 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
 		    insn->imm == BPF_FUNC_kptr_xchg &&