diff mbox series

[bpf-next,v2] selftests/bpf: emit top frequent code lines in veristat

Message ID 20240918203925.150231-1-mykyta.yatsenko5@gmail.com (mailing list archive)
State New
Delegated to: BPF
Headers show
Series [bpf-next,v2] selftests/bpf: emit top frequent code lines in veristat | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 12 maintainers not CCed: sdf@fomichev.me mykolal@fb.com eddyz87@gmail.com haoluo@google.com jolsa@kernel.org shuah@kernel.org linux-kselftest@vger.kernel.org song@kernel.org yonghong.song@linux.dev kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 fail Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-18 / test
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc

Commit Message

Mykyta Yatsenko Sept. 18, 2024, 8:39 p.m. UTC
From: Mykyta Yatsenko <yatsenko@meta.com>

Production BPF programs are increasing in number of instructions and states
to the point, where optimising verification process for them is necessary
to avoid running into instruction limit. Authors of those BPF programs
need to analyze verifier output, for example, collecting the most
frequent source code lines to understand which part of the program has
the biggest verification cost.

This patch introduces `--top-src-lines` flag in veristat.
`--top-src-lines=N` makes veristat output N the most popular sorce code
lines, parsed from verification log.

An example:
```
$ sudo ./veristat --log-size=1000000000 --top-src-lines=4  pyperf600.bpf.o
Processing 'pyperf600.bpf.o'...
Top source lines (on_event):
 4697: (pyperf.h:0)	 
 2334: (pyperf.h:326)	event->stack[i] = *symbol_id; 
 2334: (pyperf.h:118)	pidData->offsets.String_data); 
 1176: (pyperf.h:92)	bpf_probe_read_user(&frame->f_back, 
...
```

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
 tools/testing/selftests/bpf/veristat.c | 132 ++++++++++++++++++++++++-
 1 file changed, 127 insertions(+), 5 deletions(-)

Comments

Eduard Zingerman Sept. 19, 2024, 2:15 a.m. UTC | #1
On Wed, 2024-09-18 at 21:39 +0100, Mykyta Yatsenko wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
> 
> This patch introduces `--top-src-lines` flag in veristat.
> `--top-src-lines=N` makes veristat output N the most popular sorce code
> lines, parsed from verification log.
> 
> An example:
> ```
> $ sudo ./veristat --log-size=1000000000 --top-src-lines=4  pyperf600.bpf.o
> Processing 'pyperf600.bpf.o'...
> Top source lines (on_event):
>  4697: (pyperf.h:0)	 
>  2334: (pyperf.h:326)	event->stack[i] = *symbol_id; 
>  2334: (pyperf.h:118)	pidData->offsets.String_data); 
>  1176: (pyperf.h:92)	bpf_probe_read_user(&frame->f_back, 
> ...
> ```
> 
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>

I think this is a cool feature!
It's a bit of a shame that we don't collect information like this in
the verifier itself, where it would be simpler to do (e.g. associate a
counter with each instruction, or with each jump target).

[...]

> +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
> +{
> +	int lines_cap = 1;
> +	int lines_size = 0;
> +	char **lines;
> +	char *line = NULL;
> +	char *state;
> +	struct line_cnt *freq = NULL;
> +	struct line_cnt *cur;
> +	int unique_lines;
> +	int err;

Note:
  when compiling with clang 20.0.0git the following warning is reported:

veristat.c:957:14: error: variable 'err' is used uninitialized whenever 'for' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized]
  957 |         for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
      |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
veristat.c:972:9: note: uninitialized use occurs here
  972 |         return err;
      |
  ...
veristat.c:903:9: note: initialize the variable 'err' to silence this warning
  903 |         int err;
      |                ^
      |                 = 0

Also, a nitpick: declarations should be sorted in a "reverse Christmas
tree" order (at-least that's what Andrii enforces :).

> +	int i;
> +
> +	lines = calloc(lines_cap, sizeof(char *));

Nitpick: here and in a few places below use sizeof(*<array>), e.g.:
         calloc(lines_cap, sizeof(*lines))

> +	if (!lines)
> +		return -ENOMEM;
> +
> +	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
> +		if (strncmp(line, "; ", 2))
> +			continue;
> +		line += 2;
> +
> +		if (lines_size == lines_cap) {
> +			char **tmp;
> +
> +			lines_cap *= 2;
> +			tmp = realloc(lines, lines_cap * sizeof(char *));
> +			if (!tmp) {
> +				err = -ENOMEM;
> +				goto cleanup;
> +			}
> +			lines = tmp;
> +		}
> +		lines[lines_size] = line;
> +		lines_size++;
> +	}
> +
> +	if (!lines_size)
> +		goto cleanup;
> +
> +	qsort(lines, lines_size, sizeof(char *), str_cmp);
> +
> +	freq = calloc(lines_size, sizeof(struct line_cnt));
> +	if (!freq) {
> +		err = -ENOMEM;
> +		goto cleanup;
> +	}
> +
> +	cur = freq;
> +	cur->line = lines[0];
> +	cur->cnt = 1;
> +	for (i = 1; i < lines_size; ++i) {
> +		if (strcmp(lines[i], cur->line)) {
> +			cur++;
> +			cur->line = lines[i];
> +			cur->cnt = 0;
> +		}
> +		cur->cnt++;
> +	}
> +	unique_lines = cur - freq + 1;
> +
> +	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
> +
> +	printf("Top source lines (%s):\n", prog_name);
> +	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
> +		char *src_code;
> +		char *src_line;
> +
> +		src_code = strtok_r(freq[i].line, "@", &state);

Does verifier guarantee presence of '@' for each source comment line?

> +		src_line = strtok_r(NULL, "\0", &state);
> +		if (src_line)

The '.line' string is null-terminated, can 'src_line' ever be NULL?

> +			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line + 1, src_code);
> +		else
> +			printf("%5d: %s\n", freq[i].cnt, src_code);
> +	}
> +
> +cleanup:
> +	free(freq);
> +	free(lines);
> +	return err;
> +}
> +

[...]
Philo Lu Sept. 19, 2024, 2:51 a.m. UTC | #2
Hi Mykyta,

On 2024/9/19 04:39, Mykyta Yatsenko wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
> 
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
> 
> This patch introduces `--top-src-lines` flag in veristat.
> `--top-src-lines=N` makes veristat output N the most popular sorce code
> lines, parsed from verification log.
> 
> An example:
> ```
> $ sudo ./veristat --log-size=1000000000 --top-src-lines=4  pyperf600.bpf.o
> Processing 'pyperf600.bpf.o'...
> Top source lines (on_event):
>   4697: (pyperf.h:0)	
>   2334: (pyperf.h:326)	event->stack[i] = *symbol_id;
>   2334: (pyperf.h:118)	pidData->offsets.String_data);
>   1176: (pyperf.h:92)	bpf_probe_read_user(&frame->f_back,
> ...
> ```

I think this is useful and wonder how can I use it. In particular, is it 
possible to know the corresponding instruction number contributed by the 
source lines?

Assume a prog is rejected due to instruction limit. I can optimize the 
prog with `--top-src-lines`, but have to check the result with another 
"load" to see the total instruction number (because I don't know how 
many instructions reduced with the optimized src lines).

Am I right? or is there any better method?

Thanks.
Mykyta Yatsenko Sept. 19, 2024, 12:03 p.m. UTC | #3
On 19/09/2024 03:15, Eduard Zingerman wrote:
> On Wed, 2024-09-18 at 21:39 +0100, Mykyta Yatsenko wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Production BPF programs are increasing in number of instructions and states
>> to the point, where optimising verification process for them is necessary
>> to avoid running into instruction limit. Authors of those BPF programs
>> need to analyze verifier output, for example, collecting the most
>> frequent source code lines to understand which part of the program has
>> the biggest verification cost.
>>
>> This patch introduces `--top-src-lines` flag in veristat.
>> `--top-src-lines=N` makes veristat output N the most popular sorce code
>> lines, parsed from verification log.
>>
>> An example:
>> ```
>> $ sudo ./veristat --log-size=1000000000 --top-src-lines=4  pyperf600.bpf.o
>> Processing 'pyperf600.bpf.o'...
>> Top source lines (on_event):
>>   4697: (pyperf.h:0)	
>>   2334: (pyperf.h:326)	event->stack[i] = *symbol_id;
>>   2334: (pyperf.h:118)	pidData->offsets.String_data);
>>   1176: (pyperf.h:92)	bpf_probe_read_user(&frame->f_back,
>> ...
>> ```
>>
>> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> I think this is a cool feature!
> It's a bit of a shame that we don't collect information like this in
> the verifier itself, where it would be simpler to do (e.g. associate a
> counter with each instruction, or with each jump target).
Thanks for looking at this patch.
> [...]
>
>> +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
>> +{
>> +	int lines_cap = 1;
>> +	int lines_size = 0;
>> +	char **lines;
>> +	char *line = NULL;
>> +	char *state;
>> +	struct line_cnt *freq = NULL;
>> +	struct line_cnt *cur;
>> +	int unique_lines;
>> +	int err;
> Note:
>    when compiling with clang 20.0.0git the following warning is reported:
>
> veristat.c:957:14: error: variable 'err' is used uninitialized whenever 'for' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized]
>    957 |         for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
>        |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> veristat.c:972:9: note: uninitialized use occurs here
>    972 |         return err;
>        |
>    ...
> veristat.c:903:9: note: initialize the variable 'err' to silence this warning
>    903 |         int err;
>        |                ^
>        |                 = 0
yes, I've got a mail from CI regarding this, going to fix in the next 
version.
> Also, a nitpick: declarations should be sorted in a "reverse Christmas
> tree" order (at-least that's what Andrii enforces :).
>
>> +	int i;
>> +
>> +	lines = calloc(lines_cap, sizeof(char *));
> Nitpick: here and in a few places below use sizeof(*<array>), e.g.:
>           calloc(lines_cap, sizeof(*lines))
>
>> +	if (!lines)
>> +		return -ENOMEM;
>> +
>> +	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
>> +		if (strncmp(line, "; ", 2))
>> +			continue;
>> +		line += 2;
>> +
>> +		if (lines_size == lines_cap) {
>> +			char **tmp;
>> +
>> +			lines_cap *= 2;
>> +			tmp = realloc(lines, lines_cap * sizeof(char *));
>> +			if (!tmp) {
>> +				err = -ENOMEM;
>> +				goto cleanup;
>> +			}
>> +			lines = tmp;
>> +		}
>> +		lines[lines_size] = line;
>> +		lines_size++;
>> +	}
>> +
>> +	if (!lines_size)
>> +		goto cleanup;
>> +
>> +	qsort(lines, lines_size, sizeof(char *), str_cmp);
>> +
>> +	freq = calloc(lines_size, sizeof(struct line_cnt));
>> +	if (!freq) {
>> +		err = -ENOMEM;
>> +		goto cleanup;
>> +	}
>> +
>> +	cur = freq;
>> +	cur->line = lines[0];
>> +	cur->cnt = 1;
>> +	for (i = 1; i < lines_size; ++i) {
>> +		if (strcmp(lines[i], cur->line)) {
>> +			cur++;
>> +			cur->line = lines[i];
>> +			cur->cnt = 0;
>> +		}
>> +		cur->cnt++;
>> +	}
>> +	unique_lines = cur - freq + 1;
>> +
>> +	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
>> +
>> +	printf("Top source lines (%s):\n", prog_name);
>> +	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
>> +		char *src_code;
>> +		char *src_line;
>> +
>> +		src_code = strtok_r(freq[i].line, "@", &state);
> Does verifier guarantee presence of '@' for each source comment line?
It does not, if there is no @ character in the source line, I just print 
a full source line.
`if (src_line)` should check for that.

>> +		src_line = strtok_r(NULL, "\0", &state);
>> +		if (src_line)
> The '.line' string is null-terminated, can 'src_line' ever be NULL?
>
>> +			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line + 1, src_code);
>> +		else
>> +			printf("%5d: %s\n", freq[i].cnt, src_code);
>> +	}
>> +
>> +cleanup:
>> +	free(freq);
>> +	free(lines);
>> +	return err;
>> +}
>> +
> [...]
>
>
Mykyta Yatsenko Sept. 19, 2024, 12:08 p.m. UTC | #4
On 19/09/2024 03:51, Philo Lu wrote:
>
> Hi Mykyta,
>
> On 2024/9/19 04:39, Mykyta Yatsenko wrote:
>> From: Mykyta Yatsenko <yatsenko@meta.com>
>>
>> Production BPF programs are increasing in number of instructions and 
>> states
>> to the point, where optimising verification process for them is 
>> necessary
>> to avoid running into instruction limit. Authors of those BPF programs
>> need to analyze verifier output, for example, collecting the most
>> frequent source code lines to understand which part of the program has
>> the biggest verification cost.
>>
>> This patch introduces `--top-src-lines` flag in veristat.
>> `--top-src-lines=N` makes veristat output N the most popular sorce code
>> lines, parsed from verification log.
>>
>> An example:
>> ```
>> $ sudo ./veristat --log-size=1000000000 --top-src-lines=4 
>> pyperf600.bpf.o
>> Processing 'pyperf600.bpf.o'...
>> Top source lines (on_event):
>>   4697: (pyperf.h:0)
>>   2334: (pyperf.h:326)    event->stack[i] = *symbol_id;
>>   2334: (pyperf.h:118)    pidData->offsets.String_data);
>>   1176: (pyperf.h:92) bpf_probe_read_user(&frame->f_back,
>> ...
>> ```
>
> I think this is useful and wonder how can I use it. In particular, is 
> it possible to know the corresponding instruction number contributed 
> by the source lines?
>
No, as far as I know, we don't have that info, so we just use number of 
source lines as a proxy for number of instructions. Eduard suggested to 
collect
instruction count per source line in verifier, maybe that actually what 
we should do.
> Assume a prog is rejected due to instruction limit. I can optimize the 
> prog with `--top-src-lines`, but have to check the result with another 
> "load" to see the total instruction number (because I don't know how 
> many instructions reduced with the optimized src lines).
>
> Am I right? or is there any better method?
Yes, you are right.
>
> Thanks.
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1ec5c4c47235..854fa4459b77 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -143,6 +143,7 @@  static struct env {
 	char **filenames;
 	int filename_cnt;
 	bool verbose;
+	bool print_verbose;
 	bool debug;
 	bool quiet;
 	bool force_checkpoints;
@@ -179,11 +180,12 @@  static struct env {
 	int files_skipped;
 	int progs_processed;
 	int progs_skipped;
+	int top_src_lines;
 } env;
 
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
-	if (!env.verbose)
+	if (!env.print_verbose)
 		return 0;
 	if (level == LIBBPF_DEBUG  && !env.debug)
 		return 0;
@@ -206,6 +208,7 @@  const char argp_program_doc[] =
 enum {
 	OPT_LOG_FIXED = 1000,
 	OPT_LOG_SIZE = 1001,
+	OPT_TOP_SRC_LINES = 1002,
 };
 
 static const struct argp_option opts[] = {
@@ -228,6 +231,7 @@  static const struct argp_option opts[] = {
 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
 	{ "test-reg-invariants", 'r', NULL, 0,
 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+	{ "top-src-lines", OPT_TOP_SRC_LINES, "N", 0, "Emit N most frequent source code lines" },
 	{},
 };
 
@@ -249,10 +253,12 @@  static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		break;
 	case 'v':
 		env.verbose = true;
+		env.print_verbose = true;
 		break;
 	case 'd':
 		env.debug = true;
 		env.verbose = true;
+		env.print_verbose = true;
 		break;
 	case 'q':
 		env.quiet = true;
@@ -337,6 +343,15 @@  static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			return -ENOMEM;
 		env.filename_cnt++;
 		break;
+	case OPT_TOP_SRC_LINES:
+		errno = 0;
+		env.verbose = true;
+		env.top_src_lines = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -854,6 +869,109 @@  static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 	return 0;
 }
 
+struct line_cnt {
+	char *line;
+	int cnt;
+};
+
+static int str_cmp(const void *a, const void *b)
+{
+	const char **str1 = (const char **)a;
+	const char **str2 = (const char **)b;
+
+	return strcmp(*str1, *str2);
+}
+
+static int line_cnt_cmp(const void *a, const void *b)
+{
+	const struct line_cnt *a_cnt = (const struct line_cnt *)a;
+	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
+
+	return b_cnt->cnt - a_cnt->cnt;
+}
+
+static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
+{
+	int lines_cap = 1;
+	int lines_size = 0;
+	char **lines;
+	char *line = NULL;
+	char *state;
+	struct line_cnt *freq = NULL;
+	struct line_cnt *cur;
+	int unique_lines;
+	int err;
+	int i;
+
+	lines = calloc(lines_cap, sizeof(char *));
+	if (!lines)
+		return -ENOMEM;
+
+	while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
+		if (strncmp(line, "; ", 2))
+			continue;
+		line += 2;
+
+		if (lines_size == lines_cap) {
+			char **tmp;
+
+			lines_cap *= 2;
+			tmp = realloc(lines, lines_cap * sizeof(char *));
+			if (!tmp) {
+				err = -ENOMEM;
+				goto cleanup;
+			}
+			lines = tmp;
+		}
+		lines[lines_size] = line;
+		lines_size++;
+	}
+
+	if (!lines_size)
+		goto cleanup;
+
+	qsort(lines, lines_size, sizeof(char *), str_cmp);
+
+	freq = calloc(lines_size, sizeof(struct line_cnt));
+	if (!freq) {
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	cur = freq;
+	cur->line = lines[0];
+	cur->cnt = 1;
+	for (i = 1; i < lines_size; ++i) {
+		if (strcmp(lines[i], cur->line)) {
+			cur++;
+			cur->line = lines[i];
+			cur->cnt = 0;
+		}
+		cur->cnt++;
+	}
+	unique_lines = cur - freq + 1;
+
+	qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
+
+	printf("Top source lines (%s):\n", prog_name);
+	for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
+		char *src_code;
+		char *src_line;
+
+		src_code = strtok_r(freq[i].line, "@", &state);
+		src_line = strtok_r(NULL, "\0", &state);
+		if (src_line)
+			printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line + 1, src_code);
+		else
+			printf("%5d: %s\n", freq[i].cnt, src_code);
+	}
+
+cleanup:
+	free(freq);
+	free(lines);
+	return err;
+}
+
 static int guess_prog_type_by_ctx_name(const char *ctx_name,
 				       enum bpf_prog_type *prog_type,
 				       enum bpf_attach_type *attach_type)
@@ -1043,11 +1161,13 @@  static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
 	parse_verif_log(buf, buf_sz, stats);
 
-	if (env.verbose) {
+	if (env.print_verbose) {
 		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
 		       filename, prog_name, stats->stats[DURATION],
 		       err ? "failure" : "success", buf);
 	}
+	if (env.top_src_lines)
+		print_top_src_lines(buf, buf_sz, stats->prog_name);
 
 	if (verif_log_buf != buf)
 		free(buf);
@@ -1065,13 +1185,13 @@  static int process_obj(const char *filename)
 	int err = 0, prog_cnt = 0;
 
 	if (!should_process_file_prog(base_filename, NULL)) {
-		if (env.verbose)
+		if (env.print_verbose)
 			printf("Skipping '%s' due to filters...\n", filename);
 		env.files_skipped++;
 		return 0;
 	}
 	if (!is_bpf_obj_file(filename)) {
-		if (env.verbose)
+		if (env.print_verbose)
 			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
 		env.files_skipped++;
 		return 0;
@@ -2116,13 +2236,15 @@  int main(int argc, char **argv)
 		return 0;
 	}
 
-	if (env.verbose && env.quiet) {
+	if (env.print_verbose && env.quiet) {
 		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
 		return 1;
 	}
 	if (env.verbose && env.log_level == 0)
 		env.log_level = 1;
+	if (env.top_src_lines && env.log_level < 2)
+		env.log_level = 2;
 
 	if (env.output_spec.spec_cnt == 0) {
 		if (env.out_fmt == RESFMT_CSV)