@@ -533,7 +533,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
-$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+$(OUTPUT)/bench: $(OUTPUT)/bench.o \
+ $(OUTPUT)/testing_helpers.o \
+ $(OUTPUT)/trace_helpers.o \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
$(OUTPUT)/bench_trigger.o \
@@ -359,6 +359,9 @@ extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_uprobe;
+extern const struct bench bench_trig_uretprobe;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -385,6 +388,9 @@ static const struct bench *benchs[] = {
&bench_trig_fentry,
&bench_trig_fentry_sleep,
&bench_trig_fmodret,
+ &bench_trig_uprobe_base,
+ &bench_trig_uprobe,
+ &bench_trig_uretprobe,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bench.h"
#include "trigger_bench.skel.h"
+#include "trace_helpers.h"
/* BPF triggering benchmarks */
static struct trigger_ctx {
@@ -107,6 +108,69 @@ static void *trigger_consumer(void *input)
return NULL;
}
+/* make sure call is not inlined and not avoided by compiler, so __weak and
+ * inline asm volatile in the body of the function
+ */
+__weak void uprobe_target(void)
+{
+ asm volatile ("");
+}
+
+static void *uprobe_base_producer(void *input)
+{
+ while (true) {
+ uprobe_target();
+ atomic_inc(&base_hits.value);
+ }
+ return NULL;
+}
+
+static void *uprobe_producer(void *input)
+{
+ while (true)
+ uprobe_target();
+ return NULL;
+}
+
+static void usetup(bool use_retprobe)
+{
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ base_addr = get_base_addr();
+ uprobe_offset = get_uprobe_offset(&uprobe_target, base_addr);
+
+ link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
+ use_retprobe,
+ -1 /* all PIDs */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!link) {
+ fprintf(stderr, "failed to attach uprobe!\n");
+ exit(1);
+ }
+ ctx.skel->links.bench_trigger_uprobe = link;
+}
+
+static void uprobe_setup()
+{
+ usetup(false);
+}
+
+static void uretprobe_setup()
+{
+ usetup(true);
+}
+
const struct bench bench_trig_base = {
.name = "trig-base",
.validate = trigger_validate,
@@ -182,3 +246,33 @@ const struct bench bench_trig_fmodret = {
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
+
+const struct bench bench_trig_uprobe_base = {
+ .name = "trig-uprobe-base",
+ .setup = NULL, /* no uprobe/uretprobe is attached */
+ .producer_thread = uprobe_base_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_base_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe = {
+ .name = "trig-uprobe",
+ .setup = uprobe_setup,
+ .producer_thread = uprobe_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe = {
+ .name = "trig-uretprobe",
+ .setup = uretprobe_setup,
+ .producer_thread = uprobe_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
@@ -52,3 +52,10 @@ int bench_trigger_fmodret(void *ctx)
__sync_add_and_fetch(&hits, 1);
return -22;
}
+
+SEC("uprobe/self/uprobe_target")
+int bench_trigger_uprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
Add benchmark to measure overhead of uprobes and uretprobes. Also have a baseline (no uprobe attached) benchmark. On my dev machine, baseline benchmark can trigger 130M user_target() invocations. When uprobe is attached, this falls to just 700K. With uretprobe, we get down to 520K: $ sudo ./bench trig-uprobe-base -a Summary: hits 131.289 ± 2.872M/s $ sudo ./bench trig-uprobe -a Summary: hits 0.700 ± 0.008M/s $ sudo ./bench trig-uretprobe -a Summary: hits 0.518 ± 0.006M/s This means that uprobe overhead is around 1.4 microseconds for uprobe and 2 microseconds for uretprobe. For comparison, just doing a very low-overhead syscall (with no BPF programs attached anywhere) gives: $ sudo ./bench trig-base -a Summary: hits 4.830 ± 0.036M/s So uprobes are about 7x slower than pure context switch. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bench.c | 6 ++ .../selftests/bpf/benchs/bench_trigger.c | 94 +++++++++++++++++++ .../selftests/bpf/progs/trigger_bench.c | 7 ++ 4 files changed, 110 insertions(+), 1 deletion(-)