perf trace: Implement syscall summary in BPF

Message ID	20250221183207.560262-1-namhyung@kernel.org (mailing list archive)
State	New
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E505A252909; Fri, 21 Feb 2025 18:32:08 +0000 (UTC) From: Namhyung Kim <namhyung@kernel.org> To: Arnaldo Carvalho de Melo <acme@kernel.org>, Ian Rogers <irogers@google.com>, Kan Liang <kan.liang@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org>, Adrian Hunter <adrian.hunter@intel.com>, Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>, LKML <linux-kernel@vger.kernel.org>, linux-perf-users@vger.kernel.org, bpf@vger.kernel.org, Song Liu <song@kernel.org>, Howard Chu <howardchu95@gmail.com> Subject: [PATCH] perf trace: Implement syscall summary in BPF Date: Fri, 21 Feb 2025 10:32:07 -0800 Message-ID: <20250221183207.560262-1-namhyung@kernel.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	perf trace: Implement syscall summary in BPF \| expand perf trace: Implement syscall summary in BPF

Context	Check	Description
netdev/tree_selection	success	Not a local patch

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 55d6ce9ea52fb2a5..3b68a57682ca991d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1198,7 +1198,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h -SKELETONS += $(SKEL_OUT)/kwork_top.skel.h +SKELETONS += $(SKEL_OUT)/kwork_top.skel.h $(SKEL_OUT)/syscall_summary.skel.h SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f55a8a6481f27f99..1c7af7583bc560c4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -55,6 +55,7 @@ #include "util/thread_map.h" #include "util/stat.h" #include "util/tool.h" +#include "util/trace.h" #include "util/util.h" #include "trace/beauty/beauty.h" #include "trace-event.h" @@ -222,6 +223,7 @@ struct trace { bool force; bool vfs_getname; bool force_btf; + bool summary_bpf; int trace_pgfaults; char *perfconfig_events; struct { @@ -4276,6 +4278,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; + if (trace->summary_bpf) { + if (trace_prepare_bpf_summary() < 0) + goto out_delete_evlist; + + goto create_maps; + } + if (!trace->raw_augmented_syscalls) { if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) goto out_error_raw_syscalls; @@ -4334,6 +4343,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->cgroup) evlist__set_default_cgroup(trace->evlist, trace->cgroup); +create_maps: err = evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); @@ -4426,9 +4436,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_apply_filters; - err = evlist__mmap(evlist, trace->opts.mmap_pages); - if (err < 0) - goto out_error_mmap; + if (!trace->summary_bpf) { + err = evlist__mmap(evlist, trace->opts.mmap_pages); + if (err < 0) + goto out_error_mmap; + } if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay) evlist__enable(evlist); @@ -4441,6 +4453,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__enable(evlist); } + if (trace->summary_bpf) + trace_start_bpf_summary(); + trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || perf_thread_map__nr(evlist->core.threads) > 1 || evlist__first(evlist)->core.attr.inherit; @@ -4508,12 +4523,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__disable(evlist); + if (trace->summary_bpf) + trace_end_bpf_summary(); + if (trace->sort_events) ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); if (!err) { if (trace->summary) { - if (trace->summary_mode == SUMMARY__BY_TOTAL) + if (trace->summary_bpf) + trace_print_bpf_summary(trace->sctbl, trace->output); + else if (trace->summary_mode == SUMMARY__BY_TOTAL) trace__fprintf_total_summary(trace, trace->output); else trace__fprintf_thread_summary(trace, trace->output); @@ -4529,6 +4549,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } out_delete_evlist: + trace_cleanup_bpf_summary(); delete_syscall_stats(trace->syscall_stats); trace__symbols__exit(trace); evlist__free_syscall_tp_fields(evlist); @@ -5446,6 +5467,15 @@ int cmd_trace(int argc, const char **argv) goto skip_augmentation; } + if (!argc && target__none(&trace.opts.target)) + trace.opts.target.system_wide = true; + + if (trace.summary_only && trace.opts.target.system_wide && + trace.summary_mode == SUMMARY__BY_TOTAL && !trace.trace_pgfaults) { + trace.summary_bpf = true; + goto skip_augmentation; + } + trace.skel = augmented_raw_syscalls_bpf__open(); if (!trace.skel) { pr_debug("Failed to open augmented syscalls BPF skeleton"); @@ -5649,9 +5679,6 @@ int cmd_trace(int argc, const char **argv) goto out_close; } - if (!argc && target__none(&trace.opts.target)) - trace.opts.target.system_wide = true; - if (input_name) err = trace__replay(&trace); else diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 034a6603d5a8e8b0..ba4201a6f3c69753 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -171,6 +171,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o ifeq ($(CONFIG_LIBTRACEEVENT),y) diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c new file mode 100644 index 0000000000000000..7e8b1c9b3faeee4f --- /dev/null +++ b/tools/perf/util/bpf-trace-summary.c @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> + +#include "syscalltbl.h" +#include "util/trace.h" +#include "util/util.h" +#include <bpf/bpf.h> +#include <linux/time64.h> + +#include "bpf_skel/syscall_summary.h" +#include "bpf_skel/syscall_summary.skel.h" + + +static struct syscall_summary_bpf *skel; + +int trace_prepare_bpf_summary(void) +{ + skel = syscall_summary_bpf__open_and_load(); + if (skel == NULL) { + fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); + return -1; + } + + if (syscall_summary_bpf__attach(skel) < 0) { + fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); + return -1; + } + + return 0; +} + +void trace_start_bpf_summary(void) +{ + skel->bss->enabled = 1; +} + +void trace_end_bpf_summary(void) +{ + skel->bss->enabled = 0; +} + +struct syscall_data { + int syscall_nr; + struct syscall_stats stats; +}; + +static int datacmp(const void *a, const void *b) +{ + const struct syscall_data *sa = a; + const struct syscall_data *sb = b; + + return sa->stats.total_time > sb->stats.total_time ? -1 : 1; +} + +int trace_print_bpf_summary(struct syscalltbl *sctbl, FILE *fp) +{ + struct syscall_key *prev_key, key; + struct syscall_data *data = NULL; + struct bpf_map *map = skel->maps.syscall_stats_map; + int nr_data = 0; + int printed = 0; + + /* get stats from the bpf map */ + prev_key = NULL; + while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { + struct syscall_stats stat; + + if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { + struct syscall_data *tmp, *pos; + + tmp = realloc(data, sizeof(*data) * (nr_data + 1)); + if (tmp == NULL) + break; + + data = tmp; + pos = &data[nr_data++]; + + pos->syscall_nr = key.nr; + memcpy(&pos->stats, &stat, sizeof(stat)); + } + + prev_key = &key; + } + + qsort(data, nr_data, sizeof(*data), datacmp); + + printed += fprintf(fp, "\n"); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + for (int i = 0; i < nr_data; i++) { + struct syscall_data *pos = &data[i]; + double total = (double)(pos->stats.total_time) / NSEC_PER_MSEC; + double min = (double)(pos->stats.min_time) / NSEC_PER_MSEC; + double max = (double)(pos->stats.max_time) / NSEC_PER_MSEC; + double avg = total / pos->stats.count; + + printed += fprintf(fp, " %-15s", syscalltbl__name(sctbl, pos->syscall_nr)); + printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", + pos->stats.count, pos->stats.error, total, min, avg, max, + /*stddev=*/0.0); + } + + printed += fprintf(fp, "\n\n"); + free(data); + + return printed; +} + +void trace_cleanup_bpf_summary(void) +{ + syscall_summary_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c new file mode 100644 index 0000000000000000..e573ce39de73eaf3 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace raw_syscalls tracepoints to collect system call statistics. + */ + +#include "vmlinux.h" +#include "syscall_summary.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +/* This is to calculate a delta between sys-enter and sys-exit for each thread */ +struct syscall_trace { + int nr; /* syscall number is only available at sys-enter */ + int unused; + u64 timestamp; +}; + +#define MAX_ENTRIES (16 * 1024) + +struct syscall_trace_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); /* tid */ + __type(value, struct syscall_trace); + __uint(max_entries, MAX_ENTRIES); +} syscall_trace_map SEC(".maps"); + +struct syscall_stats_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct syscall_key); + __type(value, struct syscall_stats); + __uint(max_entries, MAX_ENTRIES); +} syscall_stats_map SEC(".maps"); + +int enabled; /* controlled from userspace */ + +static void update_stats(int nr, s64 duration, long ret) +{ + struct syscall_key key = { .nr = nr, }; + struct syscall_stats *stats; + + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) { + struct syscall_stats zero = {}; + + bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) + return; + } + + __sync_fetch_and_add(&stats->count, 1); + if (ret < 0) + __sync_fetch_and_add(&stats->error, 1); + + if (duration > 0) { + __sync_fetch_and_add(&stats->total_time, duration); + if (stats->max_time < duration) + stats->max_time = duration; + if (stats->min_time > duration || stats->min_time == 0) + stats->min_time = duration; + } + + return; +} + +SEC("tp_btf/sys_enter") +int sys_enter(u64 *ctx) +{ + int tid; + struct syscall_trace st; + + if (!enabled) + return 0; + + st.nr = ctx[1]; /* syscall number */ + st.unused = 0; + st.timestamp = bpf_ktime_get_ns(); + + tid = bpf_get_current_pid_tgid(); + bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); + + return 0; +} + +SEC("tp_btf/sys_exit") +int sys_exit(u64 *ctx) +{ + int tid; + long ret = ctx[1]; /* return value of the syscall */ + struct syscall_trace *st; + s64 delta; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + st = bpf_map_lookup_elem(&syscall_trace_map, &tid); + if (st == NULL) + return 0; + + delta = bpf_ktime_get_ns() - st->timestamp; + update_stats(st->nr, delta, ret); + + bpf_map_delete_elem(&syscall_trace_map, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h new file mode 100644 index 0000000000000000..644dc7049377147e --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Data structures shared between BPF and tools. */ +#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H +#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H + +struct syscall_key { + int nr; +}; + +struct syscall_stats { + u64 total_time; + u64 max_time; + u64 min_time; + u32 count; + u32 error; +}; + +#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */ diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h new file mode 100644 index 0000000000000000..4d7a7c4544d94caf --- /dev/null +++ b/tools/perf/util/trace.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UTIL_TRACE_H +#define UTIL_TRACE_H + +#include <stdio.h> /* for FILE */ + +struct syscalltbl; + +#ifdef HAVE_BPF_SKEL + +int trace_prepare_bpf_summary(void); +void trace_start_bpf_summary(void); +void trace_end_bpf_summary(void); +int trace_print_bpf_summary(struct syscalltbl *sctbl, FILE *fp); +void trace_cleanup_bpf_summary(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int trace_prepare_bpf_summary(void) { return -1; } +static inline void trace_start_bpf_summary(void) {} +static inline void trace_end_bpf_summary(void) {} +static inline int trace_print_bpf_summary(struct syscalltbl *sctbl __maybe_unused, + FILE *fp __maybe_unused) +{ + return 0; +} +static inline void trace_cleanup_bpf_summary(void) {} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* UTIL_TRACE_H */

perf trace: Implement syscall summary in BPF

Checks

Commit Message

Patch