diff mbox series

[bpf-next] selftests/bpf: emit top frequent C code lines in veristat

Message ID 20240820152433.777663-1-yatsenko@meta.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] selftests/bpf: emit top frequent C code lines in veristat | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 12 maintainers not CCed: sdf@fomichev.me eddyz87@gmail.com mykolal@fb.com haoluo@google.com jolsa@kernel.org shuah@kernel.org linux-kselftest@vger.kernel.org song@kernel.org yonghong.song@linux.dev kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Mykyta Yatsenko mykyta.yatsenko5@gmail.com Aug. 20, 2024, 3:24 p.m. UTC
From: Mykyta Yatsenko <yatsenko@meta.com>

Production BPF programs are increasing in number of instructions and states
to the point, where optimising verification process for them is necessary
to avoid running into instruction limit. Authors of those BPF programs
need to analyze verifier output, for example, collecting the most
frequent C source code lines to understand which part of the program has
the biggest verification cost.

This patch introduces `--top-lines` and `--include-instructions` flags in
veristat.
`--top-lines=N` makes veristat output N the most popular C sorce code
lines, parsed from verification log. `--include-instructions` enables
printing BPF instructions along with C source code.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/testing/selftests/bpf/veristat.c | 160 +++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

Comments

Andrii Nakryiko Aug. 22, 2024, 10:29 p.m. UTC | #1
On Tue, Aug 20, 2024 at 8:24 AM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent C source code lines to understand which part of the program has
> the biggest verification cost.
>
> This patch introduces `--top-lines` and `--include-instructions` flags in
> veristat.
> `--top-lines=N` makes veristat output N the most popular C sorce code
> lines, parsed from verification log. `--include-instructions` enables
> printing BPF instructions along with C source code.

Hm... I think --include-instructions needs a bit more thought to be
useful. Just one assembly instruction isn't all that useful, we should
be thinking in terms of blocks of assembly instruction, probably...
But then not sure how to take that into account when calculating top N
frequencies...

Not sure about all that. For v2, let's drop the assembly instructions
parts and try to get --top-lines logic right. We can then see how it
works in practice and adjust and extend as necessary.

>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
>  tools/testing/selftests/bpf/veristat.c | 160 +++++++++++++++++++++++++
>  1 file changed, 160 insertions(+)
>

Ok, #1 problem is that --top-lines is useless without -vl2, so we
should either check that this is specified. Or maybe better force
verbose log internally without actually emitting it, probably it's
better.

> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
> index 1ec5c4c47235..977ab54cba83 100644
> --- a/tools/testing/selftests/bpf/veristat.c
> +++ b/tools/testing/selftests/bpf/veristat.c
> @@ -16,10 +16,12 @@
>  #include <sys/stat.h>
>  #include <bpf/libbpf.h>
>  #include <bpf/btf.h>
> +#include <bpf/hashmap.h>

well, great for veristat in kernel repo, but this is libbpf-internal
thing and I'd like to avoid relying on it in veristat to make Github
sync simple.

>  #include <libelf.h>
>  #include <gelf.h>
>  #include <float.h>
>  #include <math.h>
> +#include <linux/err.h>
>
>  #ifndef ARRAY_SIZE
>  #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
> @@ -179,8 +181,16 @@ static struct env {
>         int files_skipped;
>         int progs_processed;
>         int progs_skipped;
> +       int top_lines;
> +       bool include_insn;
>  } env;
>
> +struct line_cnt {
> +       long cnt;
> +       char *line;
> +       char *insn;
> +};
> +
>  static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
>  {
>         if (!env.verbose)
> @@ -206,6 +216,8 @@ const char argp_program_doc[] =
>  enum {
>         OPT_LOG_FIXED = 1000,
>         OPT_LOG_SIZE = 1001,
> +       OPT_TOP_LINES = 1002,
> +       OPT_INCLUDE_INSN = 1003,
>  };
>
>  static const struct argp_option opts[] = {
> @@ -228,6 +240,9 @@ static const struct argp_option opts[] = {
>           "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
>         { "test-reg-invariants", 'r', NULL, 0,
>           "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
> +       { "top-lines", OPT_TOP_LINES, "N", 0, "Emit N the most frequent C source code lines." },

"Emit N most frequent source code lines."

Doesn't have to be C, in general.

maybe let's call it "--top-src-lines" to be a bit more specific?

> +       { "include-instructions", OPT_INCLUDE_INSN, NULL, OPTION_HIDDEN,
> +         "If emitting the most frequent C source code lines, include their BPF instructions." },
>         {},
>  };
>
> @@ -337,6 +352,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
>                         return -ENOMEM;
>                 env.filename_cnt++;
>                 break;
> +       case OPT_TOP_LINES:
> +               errno = 0;
> +               env.top_lines = strtol(arg, NULL, 10);
> +               if (errno) {
> +                       fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
> +                       argp_usage(state);
> +               }
> +               break;
> +       case OPT_INCLUDE_INSN:
> +               env.include_insn = true;
> +               break;
>         default:
>                 return ARGP_ERR_UNKNOWN;
>         }
> @@ -817,6 +843,24 @@ static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
>         free(stats);
>  }
>
> +static int line_cnt_cmp(const void *a, const void *b)
> +{
> +       const struct line_cnt **a_cnt = (const struct line_cnt **)a;
> +       const struct line_cnt **b_cnt = (const struct line_cnt **)b;
> +
> +       return (*b_cnt)->cnt - (*a_cnt)->cnt;
> +}
> +
> +static size_t str_hash_fn(long key, void *ctx)
> +{
> +       return str_hash((void *)key);
> +}
> +
> +static bool str_equal_fn(long a, long b, void *ctx)
> +{
> +       return strcmp((void *)a, (void *)b) == 0;
> +}
> +
>  static char verif_log_buf[64 * 1024];
>
>  #define MAX_PARSED_LOG_LINES 100
> @@ -854,6 +898,120 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
>         return 0;
>  }
>
> +static char *parse_instructions(char *buf, char *buf_end)
> +{
> +       char *start = buf;
> +
> +       while (buf && buf < buf_end && *buf && *buf != ';') {
> +               char *num_end = NULL;
> +
> +               strtol(buf, &num_end, 10);
> +               if (!num_end || *num_end != ':')
> +                       break;
> +
> +               buf = strchr(num_end, '\n');
> +       }
> +
> +       return start == buf ? NULL : strndup(start, buf - start);
> +}
> +
> +static int print_top_lines(char * const buf, size_t buf_sz)
> +{
> +       struct hashmap *lines_map;
> +       struct line_cnt **lines_cnt = NULL;
> +       struct hashmap_entry *entry;
> +       char *buf_end;
> +       char *line;
> +       int err = 0;
> +       int unique_lines;
> +       int bkt;
> +       int i;
> +
> +       if (!buf || !buf_sz)
> +               return -EINVAL;
> +

let's make sure we don't call print_top_lines with not buffer instead ?

> +       buf_end = buf + buf_sz - 1;
> +       *buf_end = '\0';

and buffer should be valid, so no need to zero-terminate it (verifier
guarantees that)

> +       lines_map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
> +       if (IS_ERR(lines_map))
> +               return PTR_ERR(lines_map);
> +
> +       for (char *line_start = buf; line_start < buf_end;) {
> +               char *line_end = strchr(line_start, '\n');

any reason we can't use strtok_r() for this?

> +
> +               if (!line_end)
> +                       line_end = buf_end;
> +
> +               if (*line_start == ';') {

let's check that it starts with "; " with strncmp() and skip space as well?

> +                       struct line_cnt *line_cnt;
> +
> +                       line_start++; /* skip semicolon */
> +                       *line_end = '\0';
> +                       if (hashmap__find(lines_map, line_start, &line_cnt)) {
> +                               line_cnt->cnt++;

so as I mentioned, I'd like to avoid the use of libbpf's hashmap. How
about we just add each string's offset within the buffer into a
(rather long sometimes) array of u32s. Then implement custom
comparator that would compare actual strings within log buffer by its
offset. Sort such indices this way, and then (reusing this comparator)
implement "unique" operation just like std::unique. Then we'll only
need to re-sort indices (but now taking their total counts), and emit
first/last N items.

Basically, keep it cheap by using offsets, but otherwise rely on NlogN
sorting to avoid hashmaps.

> +                       } else {
> +                               char *insn = NULL;
> +
> +                               line_cnt = malloc(sizeof(struct line_cnt));
> +                               if (!line_cnt) {
> +                                       *line_end = '\n';
> +                                       goto cleanup;
> +                               }
> +                               line = strdup(line_start);
> +                               if (!line) {
> +                                       *line_end = '\n';
> +                                       free(line_cnt);
> +                                       goto cleanup;
> +                               }
> +                               if (env.include_insn)
> +                                       insn = parse_instructions(line_end + 1, buf_end);
> +                               line_cnt->insn = insn;
> +                               line_cnt->line = line;
> +                               line_cnt->cnt = 1;
> +                               err = hashmap__add(lines_map, line, line_cnt);
> +                       }
> +                       *line_end = '\n';
> +
> +                       if (err)
> +                               goto cleanup;
> +               }
> +               line_start = line_end + 1;
> +       }
> +       unique_lines = hashmap__size(lines_map);
> +       if (!unique_lines)
> +               goto cleanup;
> +
> +       lines_cnt = calloc(unique_lines, sizeof(struct line_cnt *));
> +       if (!lines_cnt)
> +               goto cleanup;
> +
> +       i = 0;
> +       hashmap__for_each_entry(lines_map, entry, bkt)
> +               lines_cnt[i++] = (struct line_cnt *)entry->value;
> +
> +       qsort(lines_cnt, unique_lines, sizeof(struct line_cnt *), line_cnt_cmp);
> +
> +       printf("Top C source code lines:\n");

nit: there is no need to say "C source code", it's just "source code"

> +       for (i = 0; i  < min(unique_lines, env.top_lines); i++) {
> +               printf("; [Count: %ld] %s\n", lines_cnt[i]->cnt, lines_cnt[i]->line);

[Count: %ld] prefix is super verbose. Let's just emit a number of
occurrences without any extra "Count" text.

BTW, newer verifiers emit file location information now which looks
like " @ test_vmlinux.c:82", it would be nice if we could detect that,
parse it out separately (at the very last moment, during output) and
reformat everything to something like:

123: (test_vmlinux.c:82) <the rest of source code line>

Make sure to use something like %5d for frequency, so at least that
part is nicely aligned

> +               if (env.include_insn)
> +                       printf("%s\n", lines_cnt[i]->insn);
> +       }
> +       printf("\n");
> +
> +cleanup:
> +       hashmap__for_each_entry(lines_map, entry, bkt) {
> +               struct line_cnt *line_cnt = (struct line_cnt *)entry->value;
> +
> +               free(line_cnt->insn);
> +               free(line_cnt->line);
> +               free(line_cnt);
> +       }

we really shouldn't be allocating so much for strings, we have one
huge string and we should be dealing with indices into the buffer

pw-bot: cr

> +       hashmap__free(lines_map);
> +       free(lines_cnt);
> +       return err;
> +}
> +
>  static int guess_prog_type_by_ctx_name(const char *ctx_name,
>                                        enum bpf_prog_type *prog_type,
>                                        enum bpf_attach_type *attach_type)
> @@ -1048,6 +1206,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
>                        filename, prog_name, stats->stats[DURATION],
>                        err ? "failure" : "success", buf);
>         }
> +       if (env.top_lines)
> +               print_top_lines(buf, buf_sz);
>
>         if (verif_log_buf != buf)
>                 free(buf);
> --
> 2.46.0
>
Mykyta Yatsenko Aug. 30, 2024, 8:31 p.m. UTC | #2
On 22/08/2024 23:29, Andrii Nakryiko wrote:
> On Tue, Aug 20, 2024 at 8:24 AM Mykyta Yatsenko
> <mykyta.yatsenko5@gmail.com> wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Production BPF programs are increasing in number of instructions and states
>> to the point, where optimising verification process for them is necessary
>> to avoid running into instruction limit. Authors of those BPF programs
>> need to analyze verifier output, for example, collecting the most
>> frequent C source code lines to understand which part of the program has
>> the biggest verification cost.
>>
>> This patch introduces `--top-lines` and `--include-instructions` flags in
>> veristat.
>> `--top-lines=N` makes veristat output N the most popular C sorce code
>> lines, parsed from verification log. `--include-instructions` enables
>> printing BPF instructions along with C source code.
> Hm... I think --include-instructions needs a bit more thought to be
> useful. Just one assembly instruction isn't all that useful, we should
> be thinking in terms of blocks of assembly instruction, probably...
> But then not sure how to take that into account when calculating top N
> frequencies...
>
> Not sure about all that. For v2, let's drop the assembly instructions
> parts and try to get --top-lines logic right. We can then see how it
> works in practice and adjust and extend as necessary.
--include-instructions outputs a block of the instructions that go after
the source code line in the verifier buffer, an example output:
```
processed 5549 insns (limit 1000000) max_states_per_insn 8 total_states 
226 peak_states 198 mark_read 29

Top C source code lines:
; [Count: 114]  ctx->off = off + *opsize; @ xdp_synproxy_kern.c:260
1033: (71) r5 = *(u8 *)(r5 +0)        ; frame1: 
R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 
0xff)) cb
1034: (0c) w4 += w5                   ; frame1: 
R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) 
R5_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 
0xff)) cb
1035: (63) *(u32 *)(r2 +28) = r4      ; frame1: R2=fp[0]-64 
R4_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) cb
1036: (bc) w7 = w4                    ; frame1: 
R4=scalar(id=75,smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) 
R7=scalar(id=75,smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) cb
1037: (b4) w0 = 0                     ; frame1: R0_w=0 cb
; [Count: 108]  for (i = 0; i < 7; i++) @ xdp_synproxy_kern.c:269
1038: (04) w1 += -1                   ; frame1: R1_w=6 cb
1039: (bc) w4 = w7                    ; frame1: 
R4_w=scalar(id=75,smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) 
R7=scalar(id=75,smin=smin32=0,smax=umax=smax32=umax32=0x100fd,var_off=(0x0; 
0x1ffff)) cb
1040: (56) if w1 != 0x0 goto pc+1
; [Count: 106]  if (data + sz >= ctx->data_end) @ xdp_synproxy_kern.c:211
1046: (bf) r5 = r6                    ; frame1: 
R5_w=pkt(id=72,r=0,smin=smin32=0,smax=umax=smax32=umax32=0xfffe,var_off=(0x0; 
0xffff)) 
R6_w=pkt(id=72,r=0,smin=smin32=0,smax=umax=smax32=umax32=0xfffe,var_off=(0x0; 
0xffff)) cb
1047: (07) r5 += 1                    ; frame1: 
R5_w=pkt(id=72,off=1,r=0,smin=smin32=0,smax=umax=smax32=umax32=0xfffe,var_off=(0x0; 
0xffff)) cb
1048: (79) r7 = *(u64 *)(r2 +8)       ; frame1: R2=fp[0]-64 
R7_w=pkt_end() cb
1049: (3d) if r5 >= r7 goto pc-9      ; frame1: 
R5_w=pkt(id=72,off=1,r=2,smin=smin32=0,smax=umax=smax32=umax32=0xfffe,var_off=(0x0; 
0xffff)) R7_w=pkt_end() cb

File                     Program Verdict  Duration (us)  Insns  States  
Peak states
-----------------------  ------------- -------  -------------  -----  
------  -----------
xdp_synproxy_kern.bpf.o  syncookie_tc success          37106   5549     
226          198
xdp_synproxy_kern.bpf.o  syncookie_xdp success          40206   5826     
228          201
-----------------------  ------------- -------  -------------  -----  
------  -----------
Done. Processed 1 files, 0 programs. Skipped 2 files, 0 programs.
```

Thanks for all the findings, I'll apply them for v2.

>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
>> ---
>>   tools/testing/selftests/bpf/veristat.c | 160 +++++++++++++++++++++++++
>>   1 file changed, 160 insertions(+)
>>
> Ok, #1 problem is that --top-lines is useless without -vl2, so we
> should either check that this is specified. Or maybe better force
> verbose log internally without actually emitting it, probably it's
> better.
>
>> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
>> index 1ec5c4c47235..977ab54cba83 100644
>> --- a/tools/testing/selftests/bpf/veristat.c
>> +++ b/tools/testing/selftests/bpf/veristat.c
>> @@ -16,10 +16,12 @@
>>   #include <sys/stat.h>
>>   #include <bpf/libbpf.h>
>>   #include <bpf/btf.h>
>> +#include <bpf/hashmap.h>
> well, great for veristat in kernel repo, but this is libbpf-internal
> thing and I'd like to avoid relying on it in veristat to make Github
> sync simple.
>
>>   #include <libelf.h>
>>   #include <gelf.h>
>>   #include <float.h>
>>   #include <math.h>
>> +#include <linux/err.h>
>>
>>   #ifndef ARRAY_SIZE
>>   #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
>> @@ -179,8 +181,16 @@ static struct env {
>>          int files_skipped;
>>          int progs_processed;
>>          int progs_skipped;
>> +       int top_lines;
>> +       bool include_insn;
>>   } env;
>>
>> +struct line_cnt {
>> +       long cnt;
>> +       char *line;
>> +       char *insn;
>> +};
>> +
>>   static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
>>   {
>>          if (!env.verbose)
>> @@ -206,6 +216,8 @@ const char argp_program_doc[] =
>>   enum {
>>          OPT_LOG_FIXED = 1000,
>>          OPT_LOG_SIZE = 1001,
>> +       OPT_TOP_LINES = 1002,
>> +       OPT_INCLUDE_INSN = 1003,
>>   };
>>
>>   static const struct argp_option opts[] = {
>> @@ -228,6 +240,9 @@ static const struct argp_option opts[] = {
>>            "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
>>          { "test-reg-invariants", 'r', NULL, 0,
>>            "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
>> +       { "top-lines", OPT_TOP_LINES, "N", 0, "Emit N the most frequent C source code lines." },
> "Emit N most frequent source code lines."
>
> Doesn't have to be C, in general.
>
> maybe let's call it "--top-src-lines" to be a bit more specific?
>
>> +       { "include-instructions", OPT_INCLUDE_INSN, NULL, OPTION_HIDDEN,
>> +         "If emitting the most frequent C source code lines, include their BPF instructions." },
>>          {},
>>   };
>>
>> @@ -337,6 +352,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
>>                          return -ENOMEM;
>>                  env.filename_cnt++;
>>                  break;
>> +       case OPT_TOP_LINES:
>> +               errno = 0;
>> +               env.top_lines = strtol(arg, NULL, 10);
>> +               if (errno) {
>> +                       fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
>> +                       argp_usage(state);
>> +               }
>> +               break;
>> +       case OPT_INCLUDE_INSN:
>> +               env.include_insn = true;
>> +               break;
>>          default:
>>                  return ARGP_ERR_UNKNOWN;
>>          }
>> @@ -817,6 +843,24 @@ static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
>>          free(stats);
>>   }
>>
>> +static int line_cnt_cmp(const void *a, const void *b)
>> +{
>> +       const struct line_cnt **a_cnt = (const struct line_cnt **)a;
>> +       const struct line_cnt **b_cnt = (const struct line_cnt **)b;
>> +
>> +       return (*b_cnt)->cnt - (*a_cnt)->cnt;
>> +}
>> +
>> +static size_t str_hash_fn(long key, void *ctx)
>> +{
>> +       return str_hash((void *)key);
>> +}
>> +
>> +static bool str_equal_fn(long a, long b, void *ctx)
>> +{
>> +       return strcmp((void *)a, (void *)b) == 0;
>> +}
>> +
>>   static char verif_log_buf[64 * 1024];
>>
>>   #define MAX_PARSED_LOG_LINES 100
>> @@ -854,6 +898,120 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
>>          return 0;
>>   }
>>
>> +static char *parse_instructions(char *buf, char *buf_end)
>> +{
>> +       char *start = buf;
>> +
>> +       while (buf && buf < buf_end && *buf && *buf != ';') {
>> +               char *num_end = NULL;
>> +
>> +               strtol(buf, &num_end, 10);
>> +               if (!num_end || *num_end != ':')
>> +                       break;
>> +
>> +               buf = strchr(num_end, '\n');
>> +       }
>> +
>> +       return start == buf ? NULL : strndup(start, buf - start);
>> +}
>> +
>> +static int print_top_lines(char * const buf, size_t buf_sz)
>> +{
>> +       struct hashmap *lines_map;
>> +       struct line_cnt **lines_cnt = NULL;
>> +       struct hashmap_entry *entry;
>> +       char *buf_end;
>> +       char *line;
>> +       int err = 0;
>> +       int unique_lines;
>> +       int bkt;
>> +       int i;
>> +
>> +       if (!buf || !buf_sz)
>> +               return -EINVAL;
>> +
> let's make sure we don't call print_top_lines with not buffer instead ?
>
>> +       buf_end = buf + buf_sz - 1;
>> +       *buf_end = '\0';
> and buffer should be valid, so no need to zero-terminate it (verifier
> guarantees that)
>
>> +       lines_map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
>> +       if (IS_ERR(lines_map))
>> +               return PTR_ERR(lines_map);
>> +
>> +       for (char *line_start = buf; line_start < buf_end;) {
>> +               char *line_end = strchr(line_start, '\n');
> any reason we can't use strtok_r() for this?
>
>> +
>> +               if (!line_end)
>> +                       line_end = buf_end;
>> +
>> +               if (*line_start == ';') {
> let's check that it starts with "; " with strncmp() and skip space as well?
>
>> +                       struct line_cnt *line_cnt;
>> +
>> +                       line_start++; /* skip semicolon */
>> +                       *line_end = '\0';
>> +                       if (hashmap__find(lines_map, line_start, &line_cnt)) {
>> +                               line_cnt->cnt++;
> so as I mentioned, I'd like to avoid the use of libbpf's hashmap. How
> about we just add each string's offset within the buffer into a
> (rather long sometimes) array of u32s. Then implement custom
> comparator that would compare actual strings within log buffer by its
> offset. Sort such indices this way, and then (reusing this comparator)
> implement "unique" operation just like std::unique. Then we'll only
> need to re-sort indices (but now taking their total counts), and emit
> first/last N items.
>
> Basically, keep it cheap by using offsets, but otherwise rely on NlogN
> sorting to avoid hashmaps.
>
>> +                       } else {
>> +                               char *insn = NULL;
>> +
>> +                               line_cnt = malloc(sizeof(struct line_cnt));
>> +                               if (!line_cnt) {
>> +                                       *line_end = '\n';
>> +                                       goto cleanup;
>> +                               }
>> +                               line = strdup(line_start);
>> +                               if (!line) {
>> +                                       *line_end = '\n';
>> +                                       free(line_cnt);
>> +                                       goto cleanup;
>> +                               }
>> +                               if (env.include_insn)
>> +                                       insn = parse_instructions(line_end + 1, buf_end);
>> +                               line_cnt->insn = insn;
>> +                               line_cnt->line = line;
>> +                               line_cnt->cnt = 1;
>> +                               err = hashmap__add(lines_map, line, line_cnt);
>> +                       }
>> +                       *line_end = '\n';
>> +
>> +                       if (err)
>> +                               goto cleanup;
>> +               }
>> +               line_start = line_end + 1;
>> +       }
>> +       unique_lines = hashmap__size(lines_map);
>> +       if (!unique_lines)
>> +               goto cleanup;
>> +
>> +       lines_cnt = calloc(unique_lines, sizeof(struct line_cnt *));
>> +       if (!lines_cnt)
>> +               goto cleanup;
>> +
>> +       i = 0;
>> +       hashmap__for_each_entry(lines_map, entry, bkt)
>> +               lines_cnt[i++] = (struct line_cnt *)entry->value;
>> +
>> +       qsort(lines_cnt, unique_lines, sizeof(struct line_cnt *), line_cnt_cmp);
>> +
>> +       printf("Top C source code lines:\n");
> nit: there is no need to say "C source code", it's just "source code"
>
>> +       for (i = 0; i  < min(unique_lines, env.top_lines); i++) {
>> +               printf("; [Count: %ld] %s\n", lines_cnt[i]->cnt, lines_cnt[i]->line);
> [Count: %ld] prefix is super verbose. Let's just emit a number of
> occurrences without any extra "Count" text.
>
> BTW, newer verifiers emit file location information now which looks
> like " @ test_vmlinux.c:82", it would be nice if we could detect that,
> parse it out separately (at the very last moment, during output) and
> reformat everything to something like:
>
> 123: (test_vmlinux.c:82) <the rest of source code line>
>
> Make sure to use something like %5d for frequency, so at least that
> part is nicely aligned
>
>> +               if (env.include_insn)
>> +                       printf("%s\n", lines_cnt[i]->insn);
>> +       }
>> +       printf("\n");
>> +
>> +cleanup:
>> +       hashmap__for_each_entry(lines_map, entry, bkt) {
>> +               struct line_cnt *line_cnt = (struct line_cnt *)entry->value;
>> +
>> +               free(line_cnt->insn);
>> +               free(line_cnt->line);
>> +               free(line_cnt);
>> +       }
> we really shouldn't be allocating so much for strings, we have one
> huge string and we should be dealing with indices into the buffer
>
> pw-bot: cr
>
>> +       hashmap__free(lines_map);
>> +       free(lines_cnt);
>> +       return err;
>> +}
>> +
>>   static int guess_prog_type_by_ctx_name(const char *ctx_name,
>>                                         enum bpf_prog_type *prog_type,
>>                                         enum bpf_attach_type *attach_type)
>> @@ -1048,6 +1206,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
>>                         filename, prog_name, stats->stats[DURATION],
>>                         err ? "failure" : "success", buf);
>>          }
>> +       if (env.top_lines)
>> +               print_top_lines(buf, buf_sz);
>>
>>          if (verif_log_buf != buf)
>>                  free(buf);
>> --
>> 2.46.0
>>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1ec5c4c47235..977ab54cba83 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -16,10 +16,12 @@ 
 #include <sys/stat.h>
 #include <bpf/libbpf.h>
 #include <bpf/btf.h>
+#include <bpf/hashmap.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <float.h>
 #include <math.h>
+#include <linux/err.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -179,8 +181,16 @@  static struct env {
 	int files_skipped;
 	int progs_processed;
 	int progs_skipped;
+	int top_lines;
+	bool include_insn;
 } env;
 
+struct line_cnt {
+	long cnt;
+	char *line;
+	char *insn;
+};
+
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (!env.verbose)
@@ -206,6 +216,8 @@  const char argp_program_doc[] =
 enum {
 	OPT_LOG_FIXED = 1000,
 	OPT_LOG_SIZE = 1001,
+	OPT_TOP_LINES = 1002,
+	OPT_INCLUDE_INSN = 1003,
 };
 
 static const struct argp_option opts[] = {
@@ -228,6 +240,9 @@  static const struct argp_option opts[] = {
 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
 	{ "test-reg-invariants", 'r', NULL, 0,
 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+	{ "top-lines", OPT_TOP_LINES, "N", 0, "Emit N the most frequent C source code lines." },
+	{ "include-instructions", OPT_INCLUDE_INSN, NULL, OPTION_HIDDEN,
+	  "If emitting the most frequent C source code lines, include their BPF instructions." },
 	{},
 };
 
@@ -337,6 +352,17 @@  static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			return -ENOMEM;
 		env.filename_cnt++;
 		break;
+	case OPT_TOP_LINES:
+		errno = 0;
+		env.top_lines = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case OPT_INCLUDE_INSN:
+		env.include_insn = true;
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -817,6 +843,24 @@  static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
 	free(stats);
 }
 
+static int line_cnt_cmp(const void *a, const void *b)
+{
+	const struct line_cnt **a_cnt = (const struct line_cnt **)a;
+	const struct line_cnt **b_cnt = (const struct line_cnt **)b;
+
+	return (*b_cnt)->cnt - (*a_cnt)->cnt;
+}
+
+static size_t str_hash_fn(long key, void *ctx)
+{
+	return str_hash((void *)key);
+}
+
+static bool str_equal_fn(long a, long b, void *ctx)
+{
+	return strcmp((void *)a, (void *)b) == 0;
+}
+
 static char verif_log_buf[64 * 1024];
 
 #define MAX_PARSED_LOG_LINES 100
@@ -854,6 +898,120 @@  static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 	return 0;
 }
 
+static char *parse_instructions(char *buf, char *buf_end)
+{
+	char *start = buf;
+
+	while (buf && buf < buf_end && *buf && *buf != ';') {
+		char *num_end = NULL;
+
+		strtol(buf, &num_end, 10);
+		if (!num_end || *num_end != ':')
+			break;
+
+		buf = strchr(num_end, '\n');
+	}
+
+	return start == buf ? NULL : strndup(start, buf - start);
+}
+
+static int print_top_lines(char * const buf, size_t buf_sz)
+{
+	struct hashmap *lines_map;
+	struct line_cnt **lines_cnt = NULL;
+	struct hashmap_entry *entry;
+	char *buf_end;
+	char *line;
+	int err = 0;
+	int unique_lines;
+	int bkt;
+	int i;
+
+	if (!buf || !buf_sz)
+		return -EINVAL;
+
+	buf_end = buf + buf_sz - 1;
+	*buf_end = '\0';
+	lines_map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(lines_map))
+		return PTR_ERR(lines_map);
+
+	for (char *line_start = buf; line_start < buf_end;) {
+		char *line_end = strchr(line_start, '\n');
+
+		if (!line_end)
+			line_end = buf_end;
+
+		if (*line_start == ';') {
+			struct line_cnt *line_cnt;
+
+			line_start++; /* skip semicolon */
+			*line_end = '\0';
+			if (hashmap__find(lines_map, line_start, &line_cnt)) {
+				line_cnt->cnt++;
+			} else {
+				char *insn = NULL;
+
+				line_cnt = malloc(sizeof(struct line_cnt));
+				if (!line_cnt) {
+					*line_end = '\n';
+					goto cleanup;
+				}
+				line = strdup(line_start);
+				if (!line) {
+					*line_end = '\n';
+					free(line_cnt);
+					goto cleanup;
+				}
+				if (env.include_insn)
+					insn = parse_instructions(line_end + 1, buf_end);
+				line_cnt->insn = insn;
+				line_cnt->line = line;
+				line_cnt->cnt = 1;
+				err = hashmap__add(lines_map, line, line_cnt);
+			}
+			*line_end = '\n';
+
+			if (err)
+				goto cleanup;
+		}
+		line_start = line_end + 1;
+	}
+	unique_lines = hashmap__size(lines_map);
+	if (!unique_lines)
+		goto cleanup;
+
+	lines_cnt = calloc(unique_lines, sizeof(struct line_cnt *));
+	if (!lines_cnt)
+		goto cleanup;
+
+	i = 0;
+	hashmap__for_each_entry(lines_map, entry, bkt)
+		lines_cnt[i++] = (struct line_cnt *)entry->value;
+
+	qsort(lines_cnt, unique_lines, sizeof(struct line_cnt *), line_cnt_cmp);
+
+	printf("Top C source code lines:\n");
+	for (i = 0; i  < min(unique_lines, env.top_lines); i++) {
+		printf("; [Count: %ld] %s\n", lines_cnt[i]->cnt, lines_cnt[i]->line);
+		if (env.include_insn)
+			printf("%s\n", lines_cnt[i]->insn);
+	}
+	printf("\n");
+
+cleanup:
+	hashmap__for_each_entry(lines_map, entry, bkt) {
+		struct line_cnt *line_cnt = (struct line_cnt *)entry->value;
+
+		free(line_cnt->insn);
+		free(line_cnt->line);
+		free(line_cnt);
+	}
+	hashmap__free(lines_map);
+	free(lines_cnt);
+	return err;
+}
+
 static int guess_prog_type_by_ctx_name(const char *ctx_name,
 				       enum bpf_prog_type *prog_type,
 				       enum bpf_attach_type *attach_type)
@@ -1048,6 +1206,8 @@  static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 		       filename, prog_name, stats->stats[DURATION],
 		       err ? "failure" : "success", buf);
 	}
+	if (env.top_lines)
+		print_top_lines(buf, buf_sz);
 
 	if (verif_log_buf != buf)
 		free(buf);