diff mbox series

[bpf-next,v3] selftests/bpf: emit top frequent code lines in veristat

Message ID 20240930231522.58650-1-mykyta.yatsenko5@gmail.com (mailing list archive)
State Accepted
Commit 9502a7de5a61bec3bda841a830560c5d6d40ecac
Delegated to: BPF
Headers show
Series [bpf-next,v3] selftests/bpf: emit top frequent code lines in veristat | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 12 maintainers not CCed: sdf@fomichev.me eddyz87@gmail.com mykolal@fb.com haoluo@google.com jolsa@kernel.org shuah@kernel.org linux-kselftest@vger.kernel.org song@kernel.org yonghong.song@linux.dev kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc

Commit Message

Mykyta Yatsenko Sept. 30, 2024, 11:15 p.m. UTC
From: Mykyta Yatsenko <yatsenko@meta.com>

Production BPF programs are increasing in number of instructions and states
to the point, where optimising verification process for them is necessary
to avoid running into instruction limit. Authors of those BPF programs
need to analyze verifier output, for example, collecting the most
frequent source code lines to understand which part of the program has
the biggest verification cost.

This patch introduces `--top-src-lines` flag in veristat.
`--top-src-lines=N` makes veristat output N the most popular sorce code
lines, parsed from verification log.

An example of output:
```
sudo ./veristat  --top-src-lines=2   bpf_flow.bpf.o
Processing 'bpf_flow.bpf.o'...
Top source lines (_dissect):
    4: (bpf_helpers.h:161)	asm volatile("r1 = %[ctx]\n\t"
    4: (bpf_flow.c:155)	if (iph && iph->ihl == 5 &&
...
```

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/testing/selftests/bpf/veristat.c | 127 ++++++++++++++++++++++++-
 1 file changed, 125 insertions(+), 2 deletions(-)

Comments

Andrii Nakryiko Oct. 1, 2024, 5:38 p.m. UTC | #1
On Mon, Sep 30, 2024 at 4:15 PM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
>
> This patch introduces `--top-src-lines` flag in veristat.
> `--top-src-lines=N` makes veristat output N the most popular sorce code
> lines, parsed from verification log.
>
> An example of output:
> ```
> sudo ./veristat  --top-src-lines=2   bpf_flow.bpf.o
> Processing 'bpf_flow.bpf.o'...
> Top source lines (_dissect):
>     4: (bpf_helpers.h:161)      asm volatile("r1 = %[ctx]\n\t"
>     4: (bpf_flow.c:155) if (iph && iph->ihl == 5 &&
> ...
> ```
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
>  tools/testing/selftests/bpf/veristat.c | 127 ++++++++++++++++++++++++-
>  1 file changed, 125 insertions(+), 2 deletions(-)
>

LGTM, I played with it locally, works nicely, thanks!

I did a few tweaks before applying, see below for details.

> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
> index 1ec5c4c47235..977c5eca56f7 100644
> --- a/tools/testing/selftests/bpf/veristat.c
> +++ b/tools/testing/selftests/bpf/veristat.c
> @@ -179,6 +179,7 @@ static struct env {
>         int files_skipped;
>         int progs_processed;
>         int progs_skipped;
> +       int top_src_lines;
>  } env;
>
>  static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
> @@ -206,6 +207,7 @@ const char argp_program_doc[] =
>  enum {
>         OPT_LOG_FIXED = 1000,
>         OPT_LOG_SIZE = 1001,
> +       OPT_TOP_SRC_LINES = 1002,
>  };
>
>  static const struct argp_option opts[] = {
> @@ -228,6 +230,7 @@ static const struct argp_option opts[] = {
>           "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
>         { "test-reg-invariants", 'r', NULL, 0,
>           "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
> +       { "top-src-lines", OPT_TOP_SRC_LINES, "N", 0, "Emit N most frequent source code lines" },

I added 'S' as a short name, I found --top-src-lines too much pain to type :)

>         {},
>  };
>
> @@ -337,6 +340,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
>                         return -ENOMEM;
>                 env.filename_cnt++;
>                 break;
> +       case OPT_TOP_SRC_LINES:
> +               errno = 0;
> +               env.top_src_lines = strtol(arg, NULL, 10);
> +               if (errno) {
> +                       fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
> +                       argp_usage(state);
> +               }
> +               break;
>         default:
>                 return ARGP_ERR_UNKNOWN;
>         }
> @@ -854,6 +865,115 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
>         return 0;
>  }
>
> +struct line_cnt {
> +       char *line;
> +       int cnt;
> +};
> +
> +static int str_cmp(const void *a, const void *b)
> +{
> +       const char **str1 = (const char **)a;
> +       const char **str2 = (const char **)b;
> +
> +       return strcmp(*str1, *str2);
> +}
> +
> +static int line_cnt_cmp(const void *a, const void *b)
> +{
> +       const struct line_cnt *a_cnt = (const struct line_cnt *)a;
> +       const struct line_cnt *b_cnt = (const struct line_cnt *)b;
> +
> +       return b_cnt->cnt - a_cnt->cnt;

for ordering stability, I added a fallback to line_cnt->line string
comparison, if counts are equal

> +}
> +

[...]

> +       printf("Top source lines (%s):\n", prog_name);
> +       for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
> +               const char *src_code = freq[i].line;
> +               const char *src_line = NULL;
> +               char *split = strrchr(freq[i].line, '@');
> +
> +               if (split) {
> +                       src_line = split + 1;
> +
> +                       while (*src_line && isspace(*src_line))
> +                               src_line++;
> +
> +                       while (split > src_code && isspace(*split))
> +                               split--;
> +                       *split = '\0';
> +               }
> +
> +               if (src_line)
> +                       printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
> +               else
> +                       printf("%5d: %s\n", freq[i].cnt, src_code);
> +       }
> +

added printf("\n") to visually separate the output a bit

> +cleanup:
> +       free(freq);
> +       free(lines);
> +       return err;
> +}
> +
>  static int guess_prog_type_by_ctx_name(const char *ctx_name,
>                                        enum bpf_prog_type *prog_type,
>                                        enum bpf_attach_type *attach_type)
> @@ -1009,13 +1129,14 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
>         stats = &env.prog_stats[env.prog_stat_cnt++];
>         memset(stats, 0, sizeof(*stats));
>
> -       if (env.verbose) {
> +       if (env.verbose || env.top_src_lines > 0) {
>                 buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
>                 buf = malloc(buf_sz);
>                 if (!buf)
>                         return -ENOMEM;
>                 /* ensure we always request stats */
> -               log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
> +               log_level = (env.top_src_lines > 0 && env.log_level == 0 ? 2 : env.log_level)
> +                       | 4 | (env.log_fixed ? 8 : 0);

this felt a bit too crowded, so I switched this to one extra if, and
left original log_level assignment untouched

>         } else {
>                 buf = verif_log_buf;
>                 buf_sz = sizeof(verif_log_buf);
> @@ -1048,6 +1169,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
>                        filename, prog_name, stats->stats[DURATION],
>                        err ? "failure" : "success", buf);
>         }
> +       if (env.top_src_lines > 0)
> +               print_top_src_lines(buf, buf_sz, stats->prog_name);
>
>         if (verif_log_buf != buf)
>                 free(buf);
> --
> 2.46.2
>
patchwork-bot+netdevbpf@kernel.org Oct. 1, 2024, 5:40 p.m. UTC | #2
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Andrii Nakryiko <andrii@kernel.org>:

On Tue,  1 Oct 2024 00:15:22 +0100 you wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
> 
> [...]

Here is the summary with links:
  - [bpf-next,v3] selftests/bpf: emit top frequent code lines in veristat
    https://git.kernel.org/bpf/bpf-next/c/9502a7de5a61

You are awesome, thank you!
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1ec5c4c47235..977c5eca56f7 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -179,6 +179,7 @@  static struct env {
 	int files_skipped;
 	int progs_processed;
 	int progs_skipped;
+	int top_src_lines;
 } env;
 
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -206,6 +207,7 @@  const char argp_program_doc[] =
 enum {
 	OPT_LOG_FIXED = 1000,
 	OPT_LOG_SIZE = 1001,
+	OPT_TOP_SRC_LINES = 1002,
 };
 
 static const struct argp_option opts[] = {
@@ -228,6 +230,7 @@  static const struct argp_option opts[] = {
 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
 	{ "test-reg-invariants", 'r', NULL, 0,
 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+	{ "top-src-lines", OPT_TOP_SRC_LINES, "N", 0, "Emit N most frequent source code lines" },
 	{},
 };
 
@@ -337,6 +340,14 @@  static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			return -ENOMEM;
 		env.filename_cnt++;
 		break;
+	case OPT_TOP_SRC_LINES:
+		errno = 0;
+		env.top_src_lines = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -854,6 +865,115 @@  static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 	return 0;
 }
 
+struct line_cnt {
+	char *line;
+	int cnt;
+};
+
+static int str_cmp(const void *a, const void *b)
+{
+	const char **str1 = (const char **)a;
+	const char **str2 = (const char **)b;
+
+	return strcmp(*str1, *str2);
+}
+
+static int line_cnt_cmp(const void *a, const void *b)
+{
+	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
+	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
+
+	return b_cnt->cnt - a_cnt->cnt;
+}
+
+static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
+{
+	int lines_cap = 0;
+	int lines_size = 0;
+	char **lines = NULL;
+	char *line = NULL;
+	char *state;
+	struct line_cnt *freq = NULL;
+	struct line_cnt *cur;
+	int unique_lines;
+	int err = 0;
+	int i;
+
+	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
+		if (strncmp(line, "; ", 2) != 0)
+			continue;
+		line += 2;
+
+		if (lines_size == lines_cap) {
+			char **tmp;
+
+			lines_cap = max(16, lines_cap * 2);
+			tmp = realloc(lines, lines_cap * sizeof(*tmp));
+			if (!tmp) {
+				err = -ENOMEM;
+				goto cleanup;
+			}
+			lines = tmp;
+		}
+		lines[lines_size] = line;
+		lines_size++;
+	}
+
+	if (lines_size == 0)
+		goto cleanup;
+
+	qsort(lines, lines_size, sizeof(*lines), str_cmp);
+
+	freq = calloc(lines_size, sizeof(*freq));
+	if (!freq) {
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	cur = freq;
+	cur->line = lines[0];
+	cur->cnt = 1;
+	for (i = 1; i < lines_size; ++i) {
+		if (strcmp(lines[i], cur->line) != 0) {
+			cur++;
+			cur->line = lines[i];
+			cur->cnt = 0;
+		}
+		cur->cnt++;
+	}
+	unique_lines = cur - freq + 1;
+
+	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
+
+	printf("Top source lines (%s):\n", prog_name);
+	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
+		const char *src_code = freq[i].line;
+		const char *src_line = NULL;
+		char *split = strrchr(freq[i].line, '@');
+
+		if (split) {
+			src_line = split + 1;
+
+			while (*src_line && isspace(*src_line))
+				src_line++;
+
+			while (split > src_code && isspace(*split))
+				split--;
+			*split = '\0';
+		}
+
+		if (src_line)
+			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
+		else
+			printf("%5d: %s\n", freq[i].cnt, src_code);
+	}
+
+cleanup:
+	free(freq);
+	free(lines);
+	return err;
+}
+
 static int guess_prog_type_by_ctx_name(const char *ctx_name,
 				       enum bpf_prog_type *prog_type,
 				       enum bpf_attach_type *attach_type)
@@ -1009,13 +1129,14 @@  static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	stats = &env.prog_stats[env.prog_stat_cnt++];
 	memset(stats, 0, sizeof(*stats));
 
-	if (env.verbose) {
+	if (env.verbose || env.top_src_lines > 0) {
 		buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
 		buf = malloc(buf_sz);
 		if (!buf)
 			return -ENOMEM;
 		/* ensure we always request stats */
-		log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
+		log_level = (env.top_src_lines > 0 && env.log_level == 0 ? 2 : env.log_level)
+			| 4 | (env.log_fixed ? 8 : 0);
 	} else {
 		buf = verif_log_buf;
 		buf_sz = sizeof(verif_log_buf);
@@ -1048,6 +1169,8 @@  static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 		       filename, prog_name, stats->stats[DURATION],
 		       err ? "failure" : "success", buf);
 	}
+	if (env.top_src_lines > 0)
+		print_top_src_lines(buf, buf_sz, stats->prog_name);
 
 	if (verif_log_buf != buf)
 		free(buf);