Message ID | 20211006222103.3631981-5-joannekoong@fb.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Implement bitset maps, with bloom filter | expand |
On 10/6/21 3:21 PM, Joanne Koong wrote: > This patch adds benchmark tests for the throughput (for lookups + updates) > and the false positive rate of bloom filter lookups, as well as some > minor refactoring of the bash script for running the benchmarks. > > These benchmarks show that as the number of hash functions increases, > the throughput and the false positive rate of the bloom filter decreases. > From the benchmark data, the approximate average false-positive rates for > 8-byte values are roughly as follows: > > 1 hash function = ~30% > 2 hash functions = ~15% > 3 hash functions = ~5% > 4 hash functions = ~2.5% > 5 hash functions = ~1% > 6 hash functions = ~0.5% > 7 hash functions = ~0.35% > 8 hash functions = ~0.15% > 9 hash functions = ~0.1% > 10 hash functions = ~0% > > Signed-off-by: Joanne Koong <joannekoong@fb.com> > --- > tools/testing/selftests/bpf/Makefile | 6 +- > tools/testing/selftests/bpf/bench.c | 37 ++ > tools/testing/selftests/bpf/bench.h | 3 + > .../bpf/benchs/bench_bloom_filter_map.c | 411 ++++++++++++++++++ > .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ > .../bpf/benchs/run_bench_ringbufs.sh | 30 +- > .../selftests/bpf/benchs/run_common.sh | 48 ++ > tools/testing/selftests/bpf/bpf_util.h | 11 + > .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ > 9 files changed, 690 insertions(+), 30 deletions(-) > create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c > create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh > create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh > create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c > > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile > index c5c9a9f50d8d..66e1ad17acef 100644 > --- a/tools/testing/selftests/bpf/Makefile > +++ b/tools/testing/selftests/bpf/Makefile > @@ -517,18 +517,20 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) > # Benchmark runner > $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) > $(call msg,CC,,$@) > - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ > + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ > $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h > $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h > $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ > $(OUTPUT)/perfbuf_bench.skel.h > +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h > $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) > $(OUTPUT)/bench: LDLIBS += -lm > $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ > $(OUTPUT)/bench_count.o \ > $(OUTPUT)/bench_rename.o \ > $(OUTPUT)/bench_trigger.o \ > - $(OUTPUT)/bench_ringbufs.o > + $(OUTPUT)/bench_ringbufs.o \ > + $(OUTPUT)/bench_bloom_filter_map.o > $(call msg,BINARY,,$@) > $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) > > diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c > index 6ea15b93a2f8..e836bacd6f9d 100644 > --- a/tools/testing/selftests/bpf/bench.c > +++ b/tools/testing/selftests/bpf/bench.c > @@ -51,6 +51,35 @@ void setup_libbpf() > fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); > } > > +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) > +{ > + long total = res->false_hits + res->hits + res->drops; > + > + printf("Iter %3d (%7.3lfus): ", > + iter, (delta_ns - 1000000000) / 1000.0); > + > + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", > + res->false_hits, total, ((float)res->false_hits / total) * 100); > +} > + > +void false_hits_report_final(struct bench_res res[], int res_cnt) > +{ > + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; > + int i; > + > + for (i = 0; i < res_cnt; i++) { > + total_hits += res[i].hits; > + total_false_hits += res[i].false_hits; > + total_drops += res[i].drops; > + } > + total_ops = total_hits + total_false_hits + total_drops; > + > + printf("Summary: %ld false hits of %ld total operations. ", > + total_false_hits, total_ops); > + printf("Percentage = %2.2f %%\n", > + ((float)total_false_hits / total_ops) * 100); > +} > + > void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) > { > double hits_per_sec, drops_per_sec; > @@ -132,9 +161,11 @@ static const struct argp_option opts[] = { > }; > > extern struct argp bench_ringbufs_argp; > +extern struct argp bench_bloom_filter_map_argp; > > static const struct argp_child bench_parsers[] = { > { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, > + { &bench_bloom_filter_map_argp, 0, "Bloom filter map benchmark", 0 }, > {}, > }; > > @@ -323,6 +354,9 @@ extern const struct bench bench_rb_libbpf; > extern const struct bench bench_rb_custom; > extern const struct bench bench_pb_libbpf; > extern const struct bench bench_pb_custom; > +extern const struct bench bench_bloom_filter_lookup; > +extern const struct bench bench_bloom_filter_update; > +extern const struct bench bench_bloom_filter_false_positive; > > static const struct bench *benchs[] = { > &bench_count_global, > @@ -344,6 +378,9 @@ static const struct bench *benchs[] = { > &bench_rb_custom, > &bench_pb_libbpf, > &bench_pb_custom, > + &bench_bloom_filter_lookup, > + &bench_bloom_filter_update, > + &bench_bloom_filter_false_positive, > }; > > static void setup_benchmark() > diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h > index c1f48a473b02..624c6b11501f 100644 > --- a/tools/testing/selftests/bpf/bench.h > +++ b/tools/testing/selftests/bpf/bench.h > @@ -33,6 +33,7 @@ struct env { > struct bench_res { > long hits; > long drops; > + long false_hits; > }; > > struct bench { > @@ -56,6 +57,8 @@ extern const struct bench *bench; > void setup_libbpf(); > void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); > void hits_drops_report_final(struct bench_res res[], int res_cnt); > +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); > +void false_hits_report_final(struct bench_res res[], int res_cnt); > > static inline __u64 get_time_ns() { > struct timespec t; > diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c > new file mode 100644 > index 000000000000..25d6b8179c8e > --- /dev/null > +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c > @@ -0,0 +1,411 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2021 Facebook */ > + > +#include <argp.h> > +#include <linux/log2.h> > +#include <pthread.h> > +#include "bench.h" > +#include "bloom_filter_bench.skel.h" > +#include "bpf_util.h" > + > +static struct ctx { > + struct bloom_filter_bench *skel; > + > + int bloom_filter_fd; > + int hashmap_fd; > + int array_map_fd; > + > + pthread_mutex_t map_done_mtx; > + pthread_cond_t map_done; > + bool map_prepare_err; > + > + __u32 next_map_idx; > + > +} ctx = { > + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, > + .map_done = PTHREAD_COND_INITIALIZER, > +}; > + > +struct stat { > + __u32 stats[3]; > +}; > + > +static struct { > + __u32 nr_entries; > + __u8 nr_hash_funcs; > + __u32 value_size; > +} args = { > + .nr_entries = 1000, > + .nr_hash_funcs = 3, > + .value_size = 8, > +}; > + > +enum { > + ARG_NR_ENTRIES = 3000, > + ARG_NR_HASH_FUNCS = 3001, > + ARG_VALUE_SIZE = 3002, > +}; > + > +static const struct argp_option opts[] = { > + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, > + "Set number of expected unique entries in the bloom filter"}, > + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, > + "Set number of hash functions in the bloom filter"}, > + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, > + "Set value size (in bytes) of bloom filter entries"}, > + {}, > +}; > + > +static error_t parse_arg(int key, char *arg, struct argp_state *state) > +{ > + switch (key) { > + case ARG_NR_ENTRIES: > + args.nr_entries = strtol(arg, NULL, 10); > + if (args.nr_entries == 0) { > + fprintf(stderr, "Invalid nr_entries count."); > + argp_usage(state); > + } > + break; > + case ARG_NR_HASH_FUNCS: > + args.nr_hash_funcs = strtol(arg, NULL, 10); > + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { > + fprintf(stderr, > + "The bloom filter must use 1 to 15 hash functions."); > + argp_usage(state); > + } > + break; > + case ARG_VALUE_SIZE: > + args.value_size = strtol(arg, NULL, 10); > + if (args.value_size < 2 || args.value_size > 256) { > + fprintf(stderr, > + "Invalid value size. Must be between 2 and 256 bytes"); > + argp_usage(state); > + } > + break; > + default: > + return ARGP_ERR_UNKNOWN; > + } > + > + return 0; > +} > + > +/* exported into benchmark runner */ > +const struct argp bench_bloom_filter_map_argp = { > + .options = opts, > + .parser = parse_arg, > +}; > + > +static void validate(void) > +{ > + if (env.consumer_cnt != 1) { > + fprintf(stderr, > + "The bloom filter benchmarks do not support multi-consumer use\n"); > + exit(1); > + } > +} > + > +static inline void trigger_bpf_program(void) > +{ > + syscall(__NR_getpgid); > +} > + > +static void *producer(void *input) > +{ > + while (true) > + trigger_bpf_program(); > + > + return NULL; > +} > + > +static void *map_prepare_thread(void *arg) > +{ > + __u32 val_size, i; > + void *val = NULL; > + int err; > + > + val_size = args.value_size; > + val = malloc(val_size); > + if (!val) { > + ctx.map_prepare_err = true; > + goto done; > + } > + > + while (true) { > + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); > + if (i > args.nr_entries) > + break; > + > +again: > + /* Populate hashmap, bloom filter map, and array map with the same > + * random values > + */ > + err = syscall(__NR_getrandom, val, val_size, 0); > + if (err != val_size) { > + ctx.map_prepare_err = true; > + fprintf(stderr, "failed to get random value: %d\n", -errno); > + break; > + } > + > + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); > + if (err) { > + if (err != -EEXIST) { > + ctx.map_prepare_err = true; > + fprintf(stderr, "failed to add elem to hashmap: %d\n", > + -errno); > + break; > + } > + goto again; > + } > + > + i--; > + > + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); > + if (err) { > + ctx.map_prepare_err = true; > + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); > + break; > + } > + > + err = bpf_map_update_elem(ctx.bloom_filter_fd, NULL, val, 0); > + if (err) { > + ctx.map_prepare_err = true; > + fprintf(stderr, > + "failed to add elem to bloom filter map: %d\n", -errno); > + break; > + } > + } > +done: > + pthread_mutex_lock(&ctx.map_done_mtx); > + pthread_cond_signal(&ctx.map_done); > + pthread_mutex_unlock(&ctx.map_done_mtx); > + > + if (val) > + free(val); > + > + return NULL; > +} > + > +static void populate_maps(void) > +{ > + unsigned int nr_cpus = bpf_num_possible_cpus(); > + pthread_t map_thread; > + int i, err; > + > + ctx.bloom_filter_fd = bpf_map__fd(ctx.skel->maps.bloom_filter_map); > + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); > + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); > + > + for (i = 0; i < nr_cpus; i++) { > + err = pthread_create(&map_thread, NULL, map_prepare_thread, > + NULL); > + if (err) { > + fprintf(stderr, "failed to create pthread: %d\n", -errno); > + exit(1); > + } > + } > + > + pthread_mutex_lock(&ctx.map_done_mtx); > + pthread_cond_wait(&ctx.map_done, &ctx.map_done_mtx); > + pthread_mutex_unlock(&ctx.map_done_mtx); > + > + if (ctx.map_prepare_err) > + exit(1); > +} > + > +static struct bloom_filter_bench *setup_skeleton(bool hashmap_use_bloom_filter) > +{ > + struct bloom_filter_bench *skel; > + int err; > + > + setup_libbpf(); > + > + skel = bloom_filter_bench__open(); > + if (!skel) { > + fprintf(stderr, "failed to open skeleton\n"); > + exit(1); > + } > + > + skel->rodata->hashmap_use_bloom_filter = hashmap_use_bloom_filter; > + > + /* Resize number of entries */ > + err = bpf_map__resize(skel->maps.hashmap, args.nr_entries); > + if (err) { > + fprintf(stderr, "failed to resize hashmap\n"); > + exit(1); > + } > + > + err = bpf_map__resize(skel->maps.array_map, args.nr_entries); > + if (err) { > + fprintf(stderr, "failed to resize array map\n"); > + exit(1); > + } > + > + err = bpf_map__resize(skel->maps.bloom_filter_map, > + BPF_BLOOM_FILTER_BITSET_SZ(args.nr_entries, > + args.nr_hash_funcs)); > + if (err) { > + fprintf(stderr, "failed to resize bloom filter\n"); > + exit(1); > + } > + > + /* Set value size */ > + err = bpf_map__set_value_size(skel->maps.array_map, args.value_size); > + if (err) { > + fprintf(stderr, "failed to set array map value size\n"); > + exit(1); > + } > + > + err = bpf_map__set_value_size(skel->maps.bloom_filter_map, args.value_size); > + if (err) { > + fprintf(stderr, "failed to set bloom filter map value size\n"); > + exit(1); > + } > + > + err = bpf_map__set_value_size(skel->maps.hashmap, args.value_size); > + if (err) { > + fprintf(stderr, "failed to set hashmap value size\n"); > + exit(1); > + } > + /* For the hashmap, we use the value as the key as well */ > + err = bpf_map__set_key_size(skel->maps.hashmap, args.value_size); > + if (err) { > + fprintf(stderr, "failed to set hashmap value size\n"); > + exit(1); > + } > + > + skel->bss->value_sz_nr_u32s = (args.value_size + sizeof(__u32) - 1) > + / sizeof(__u32); > + > + /* Set number of hash functions */ > + err = bpf_map__set_map_extra(skel->maps.bloom_filter_map, > + args.nr_hash_funcs); > + if (err) { > + fprintf(stderr, "failed to set bloom filter nr_hash_funcs\n"); > + exit(1); > + } > + > + if (bloom_filter_bench__load(skel)) { > + fprintf(stderr, "failed to load skeleton\n"); > + exit(1); > + } > + > + return skel; > +} > + > +static void bench_setup_lookup(void) > +{ > + struct bpf_link *link; > + > + ctx.skel = setup_skeleton(true); > + > + populate_maps(); > + > + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_lookup); > + if (!link) { > + fprintf(stderr, "failed to attach program!\n"); > + exit(1); > + } > +} > + > +static void bench_setup_update(void) > +{ > + struct bpf_link *link; > + > + ctx.skel = setup_skeleton(true); > + > + populate_maps(); > + > + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_update); > + if (!link) { > + fprintf(stderr, "failed to attach program!\n"); > + exit(1); > + } > +} > + > +static void hashmap_lookup_setup(void) > +{ > + struct bpf_link *link; > + > + ctx.skel = setup_skeleton(true); > + > + populate_maps(); > + > + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_hashmap_lookup); > + if (!link) { > + fprintf(stderr, "failed to attach program!\n"); > + exit(1); > + } > +} > + > +static void measure(struct bench_res *res) > +{ > + long total_hits = 0, total_drops = 0, total_false_hits = 0; > + static long last_hits, last_drops, last_false_hits; > + unsigned int nr_cpus = bpf_num_possible_cpus(); > + int hit_key, drop_key, false_hit_key; > + int i; > + > + hit_key = ctx.skel->rodata->hit_key; > + drop_key = ctx.skel->rodata->drop_key; > + false_hit_key = ctx.skel->rodata->false_hit_key; > + > + if (ctx.skel->bss->error != 0) { > + fprintf(stderr, "error (%d) when searching the bitset\n", > + ctx.skel->bss->error); > + exit(1); > + } > + > + for (i = 0; i < nr_cpus; i++) { > + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; > + > + total_hits += s->stats[hit_key]; > + total_drops += s->stats[drop_key]; > + total_false_hits += s->stats[false_hit_key]; > + } > + > + res->hits = total_hits - last_hits; > + res->drops = total_drops - last_drops; > + res->false_hits = total_false_hits - last_false_hits; > + > + last_hits = total_hits; > + last_drops = total_drops; > + last_false_hits = total_false_hits; > +} > + > +static void *consumer(void *input) > +{ > + return NULL; > +} > + > +const struct bench bench_bloom_filter_lookup = { > + .name = "bloom-filter-lookup", > + .validate = validate, > + .setup = bench_setup_lookup, > + .producer_thread = producer, > + .consumer_thread = consumer, > + .measure = measure, > + .report_progress = hits_drops_report_progress, > + .report_final = hits_drops_report_final, > +}; > + > +const struct bench bench_bloom_filter_update = { > + .name = "bloom-filter-update", > + .validate = validate, > + .setup = bench_setup_update, > + .producer_thread = producer, > + .consumer_thread = consumer, > + .measure = measure, > + .report_progress = hits_drops_report_progress, > + .report_final = hits_drops_report_final, > +}; > + > +const struct bench bench_bloom_filter_false_positive = { > + .name = "bloom-filter-false-positive", > + .validate = validate, > + .setup = hashmap_lookup_setup, > + .producer_thread = producer, > + .consumer_thread = consumer, > + .measure = measure, > + .report_progress = false_hits_report_progress, > + .report_final = false_hits_report_final, > +}; > diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh > new file mode 100755 > index 000000000000..5d4f84ad9973 > --- /dev/null > +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh > @@ -0,0 +1,28 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > + > +source ./benchs/run_common.sh > + > +set -eufo pipefail > + > +header "Bitset map" > +for v in 2, 4, 8, 16, 40; do > +for t in 1 4 8 12 16; do > +for h in {1..10}; do > +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" > + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do > + printf "%'d entries -\n" $e > + printf "\t" > + summarize "Lookups, total operations: " \ > + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-lookup)" > + printf "\t" > + summarize "Updates, total operations: " \ > + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-update)" > + printf "\t" > + summarize_percentage "False positive rate: " \ > + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-false-positive)" > + done > + printf "\n" > +done > +done > +done > diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh > index af4aa04caba6..ada028aa9007 100755 > --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh > +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh > @@ -1,34 +1,8 @@ > #!/bin/bash > > -set -eufo pipefail > - > -RUN_BENCH="sudo ./bench -w3 -d10 -a" > - > -function hits() > -{ > - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" > -} > - > -function drops() > -{ > - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" > -} > +source ./benchs/run_common.sh > > -function header() > -{ > - local len=${#1} > - > - printf "\n%s\n" "$1" > - for i in $(seq 1 $len); do printf '='; done > - printf '\n' > -} > - > -function summarize() > -{ > - bench="$1" > - summary=$(echo $2 | tail -n1) > - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" > -} > +set -eufo pipefail > > header "Single-producer, parallel producer" > for b in rb-libbpf rb-custom pb-libbpf pb-custom; do > diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh > new file mode 100644 > index 000000000000..670f23b037c4 > --- /dev/null > +++ b/tools/testing/selftests/bpf/benchs/run_common.sh > @@ -0,0 +1,48 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > + > +RUN_BENCH="sudo ./bench -w3 -d10 -a" > + > +function header() > +{ > + local len=${#1} > + > + printf "\n%s\n" "$1" > + for i in $(seq 1 $len); do printf '='; done > + printf '\n' > +} > + > +function subtitle() > +{ > + local len=${#1} > + printf "\t%s\n" "$1" > +} > + > +function hits() > +{ > + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" > +} > + > +function drops() > +{ > + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" > +} > + > +function percentage() > +{ > + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" > +} > + > +function summarize() > +{ > + bench="$1" > + summary=$(echo $2 | tail -n1) > + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" > +} > + > +function summarize_percentage() > +{ > + bench="$1" > + summary=$(echo $2 | tail -n1) > + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" > +} > diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h > index a3352a64c067..a260a963efda 100644 > --- a/tools/testing/selftests/bpf/bpf_util.h > +++ b/tools/testing/selftests/bpf/bpf_util.h > @@ -40,4 +40,15 @@ static inline unsigned int bpf_num_possible_cpus(void) > (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) > #endif > > +/* Helper macro for computing the optimal number of bits for a > + * bloom filter map. > + * > + * Mathematically, the optimal bitset size that minimizes the > + * false positive probability is n * k / ln(2) where n is the expected > + * number of unique entries in the bloom filter and k is the number of > + * hash functions. We use 7 / 5 to approximate 1 / ln(2). > + */ > +#define BPF_BLOOM_FILTER_BITSET_SZ(nr_uniq_entries, nr_hash_funcs) \ > + ((nr_uniq_entries) * (nr_hash_funcs) / 5 * 7) > + > #endif /* __BPF_UTIL__ */ > diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c > new file mode 100644 > index 000000000000..a44a47ddc4d7 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c > @@ -0,0 +1,146 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2021 Facebook */ > + > +#include <errno.h> > +#include <linux/bpf.h> > +#include <stdbool.h> > +#include <bpf/bpf_helpers.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct bpf_map; > + > +struct { > + __uint(type, BPF_MAP_TYPE_ARRAY); > + __uint(key_size, sizeof(__u32)); > + /* max entries and value_size will be set programmatically. > + * They are configurable from the userspace bench program. > + */ > +} array_map SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_BITSET); > + /* max entries, value_size, and # of hash functions will be set > + * programmatically. They are configurable from the userspace > + * bench program. > + */ > +} bloom_filter_map SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_HASH); > + /* max entries, key_size, and value_size, will be set > + * programmatically. They are configurable from the userspace > + * bench program. > + */ > +} hashmap SEC(".maps"); > + > +struct callback_ctx { > + struct bpf_map *map; > + bool update; > +}; > + > +/* Tracks the number of hits, drops, and false hits */ > +struct { > + __u32 stats[3]; > +} __attribute__((__aligned__(256))) percpu_stats[256]; > + > +__u8 value_sz_nr_u32s; > + > +const __u32 hit_key = 0; > +const __u32 drop_key = 1; > +const __u32 false_hit_key = 2; > + > +const volatile bool hashmap_use_bloom_filter = true; > + > +int error = 0; > + > +static __always_inline void log_result(__u32 key) > +{ > + __u32 cpu = bpf_get_smp_processor_id(); > + > + percpu_stats[cpu & 255].stats[key]++; > +} > + > +static __u64 > +bloom_filter_callback(struct bpf_map *map, __u32 *key, void *val, > + struct callback_ctx *data) > +{ > + int err; > + > + if (data->update) > + err = bpf_map_push_elem(data->map, val, 0); > + else > + err = bpf_map_peek_elem(data->map, val); > + > + if (err) { > + error |= 1; > + return 1; /* stop the iteration */ > + } > + > + log_result(hit_key); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_lookup(void *ctx) > +{ > + struct callback_ctx data; > + > + data.map = (struct bpf_map *)&bloom_filter_map; > + data.update = false; > + > + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_update(void *ctx) > +{ > + struct callback_ctx data; > + > + data.map = (struct bpf_map *)&bloom_filter_map; > + data.update = true; > + > + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_hashmap_lookup(void *ctx) > +{ > + __u64 *result; > + int i, j, err; > + > + __u32 val[64] = {0}; > + > + for (i = 0; i < 1024; i++) { > + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) > + val[j] = bpf_get_prandom_u32(); > + I tried out prepopulating these random values from the userspace side (where we generate a random sequence of 10000 bytes and put that in a bpf array map, then iterate through the bpf array map in the bpf program; when I tried implementing it using a global array, I saw verifier errors when indexing into the array). However, this runs into issues where we do not know ahead of time how many random bytes will be needed (eg on a local qemu server, 1 million may be enough whereas on other servers, 1 million is too little and results in benchmark stats that show there is no discernible difference with vs. without the bloom filter). Additionally, this slows down the bench program as well, since we need to generate all of these random values in the setup() portion of the program. I'm not convinced that prepopulating the values ahead of time nets us anything - if the concern is that this slows down the bpf program, I think this slows down the program in both the hashmap with and without bloom filter cases; since we are mainly only interested in the delta between these two scenarios, I don't think this ends up mattering that much. > + if (hashmap_use_bloom_filter) { > + err = bpf_map_peek_elem(&bloom_filter_map, val); > + if (err) { > + if (err != -ENOENT) { > + error |= 3; > + return 0; > + } > + log_result(hit_key); > + continue; > + } > + } > + > + result = bpf_map_lookup_elem(&hashmap, val); > + if (result) { > + log_result(hit_key); > + } else { > + if (hashmap_use_bloom_filter) > + log_result(false_hit_key); > + log_result(drop_key); > + } > + } > + > + return 0; > +}
On Wed, Oct 6, 2021 at 3:27 PM Joanne Koong <joannekoong@fb.com> wrote: > > This patch adds benchmark tests for the throughput (for lookups + updates) > and the false positive rate of bloom filter lookups, as well as some > minor refactoring of the bash script for running the benchmarks. > > These benchmarks show that as the number of hash functions increases, > the throughput and the false positive rate of the bloom filter decreases. > From the benchmark data, the approximate average false-positive rates for > 8-byte values are roughly as follows: > > 1 hash function = ~30% > 2 hash functions = ~15% > 3 hash functions = ~5% > 4 hash functions = ~2.5% > 5 hash functions = ~1% > 6 hash functions = ~0.5% > 7 hash functions = ~0.35% > 8 hash functions = ~0.15% > 9 hash functions = ~0.1% > 10 hash functions = ~0% > > Signed-off-by: Joanne Koong <joannekoong@fb.com> > --- > tools/testing/selftests/bpf/Makefile | 6 +- > tools/testing/selftests/bpf/bench.c | 37 ++ > tools/testing/selftests/bpf/bench.h | 3 + > .../bpf/benchs/bench_bloom_filter_map.c | 411 ++++++++++++++++++ > .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ > .../bpf/benchs/run_bench_ringbufs.sh | 30 +- > .../selftests/bpf/benchs/run_common.sh | 48 ++ > tools/testing/selftests/bpf/bpf_util.h | 11 + > .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ > 9 files changed, 690 insertions(+), 30 deletions(-) > create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c > create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh > create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh > create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c > [...] > +static struct ctx { > + struct bloom_filter_bench *skel; > + > + int bloom_filter_fd; > + int hashmap_fd; > + int array_map_fd; > + > + pthread_mutex_t map_done_mtx; > + pthread_cond_t map_done; > + bool map_prepare_err; > + > + __u32 next_map_idx; > + nit: unnecessary empty line > +} ctx = { > + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, > + .map_done = PTHREAD_COND_INITIALIZER, > +}; > + [...] > + > +static void populate_maps(void) > +{ > + unsigned int nr_cpus = bpf_num_possible_cpus(); > + pthread_t map_thread; > + int i, err; > + > + ctx.bloom_filter_fd = bpf_map__fd(ctx.skel->maps.bloom_filter_map); > + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); > + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); > + > + for (i = 0; i < nr_cpus; i++) { > + err = pthread_create(&map_thread, NULL, map_prepare_thread, > + NULL); > + if (err) { > + fprintf(stderr, "failed to create pthread: %d\n", -errno); > + exit(1); > + } > + } > + > + pthread_mutex_lock(&ctx.map_done_mtx); > + pthread_cond_wait(&ctx.map_done, &ctx.map_done_mtx); This is a fragile way to use cond_wait. If prepare finishes faster than you get to this cond_wait, you'll be stuck forevere. Also cond_var can spuriously wake up, if I remember correctly. So the pattern is usually to do checking of some condition in a loop (inside the locked region) and if the condition doesn't hold, cond_wait on it (I renamed ctx.map_done into ctx.map_done_cv): pthread_mutex_lock(&ctx.map_done_mtx); while (!ctx.map_done /* this is bool now */) pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); pthread_mutex_unlock(&ctx.map_done_mtx); > + pthread_mutex_unlock(&ctx.map_done_mtx); > + > + if (ctx.map_prepare_err) > + exit(1); > +} > + > +static struct bloom_filter_bench *setup_skeleton(bool hashmap_use_bloom_filter) > +{ > + struct bloom_filter_bench *skel; > + int err; > + > + setup_libbpf(); > + > + skel = bloom_filter_bench__open(); > + if (!skel) { > + fprintf(stderr, "failed to open skeleton\n"); > + exit(1); > + } > + > + skel->rodata->hashmap_use_bloom_filter = hashmap_use_bloom_filter; > + > + /* Resize number of entries */ > + err = bpf_map__resize(skel->maps.hashmap, args.nr_entries); > + if (err) { These errors can't happen unless args.nr_entries is zero, so I'd just drop them. But please use bpf_map__set_max_entries() instead, bpf_map__resize() is going to be deprecated. > + fprintf(stderr, "failed to resize hashmap\n"); > + exit(1); > + } > + > + err = bpf_map__resize(skel->maps.array_map, args.nr_entries); > + if (err) { > + fprintf(stderr, "failed to resize array map\n"); > + exit(1); > + } > + > + err = bpf_map__resize(skel->maps.bloom_filter_map, > + BPF_BLOOM_FILTER_BITSET_SZ(args.nr_entries, > + args.nr_hash_funcs)); > + if (err) { > + fprintf(stderr, "failed to resize bloom filter\n"); > + exit(1); > + } > + > + /* Set value size */ > + err = bpf_map__set_value_size(skel->maps.array_map, args.value_size); > + if (err) { same here, error can only happen if the map is already created in the kernel, so be pragmatic and skip that (especially in benchmarks) > + fprintf(stderr, "failed to set array map value size\n"); > + exit(1); > + } > + > + err = bpf_map__set_value_size(skel->maps.bloom_filter_map, args.value_size); > + if (err) { > + fprintf(stderr, "failed to set bloom filter map value size\n"); > + exit(1); > + } > + [...] > diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h > index a3352a64c067..a260a963efda 100644 > --- a/tools/testing/selftests/bpf/bpf_util.h > +++ b/tools/testing/selftests/bpf/bpf_util.h > @@ -40,4 +40,15 @@ static inline unsigned int bpf_num_possible_cpus(void) > (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) > #endif > > +/* Helper macro for computing the optimal number of bits for a > + * bloom filter map. > + * > + * Mathematically, the optimal bitset size that minimizes the > + * false positive probability is n * k / ln(2) where n is the expected > + * number of unique entries in the bloom filter and k is the number of > + * hash functions. We use 7 / 5 to approximate 1 / ln(2). > + */ > +#define BPF_BLOOM_FILTER_BITSET_SZ(nr_uniq_entries, nr_hash_funcs) \ > + ((nr_uniq_entries) * (nr_hash_funcs) / 5 * 7) hm.. I thought you were going to add into include/linux/uapi/bpf.h, why did you change your mind? > + > #endif /* __BPF_UTIL__ */ > diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c > new file mode 100644 > index 000000000000..a44a47ddc4d7 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c > @@ -0,0 +1,146 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2021 Facebook */ > + > +#include <errno.h> > +#include <linux/bpf.h> > +#include <stdbool.h> > +#include <bpf/bpf_helpers.h> > + > +char _license[] SEC("license") = "GPL"; > + > +struct bpf_map; > + > +struct { > + __uint(type, BPF_MAP_TYPE_ARRAY); > + __uint(key_size, sizeof(__u32)); > + /* max entries and value_size will be set programmatically. > + * They are configurable from the userspace bench program. > + */ > +} array_map SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_BITSET); > + /* max entries, value_size, and # of hash functions will be set > + * programmatically. They are configurable from the userspace > + * bench program. > + */ > +} bloom_filter_map SEC(".maps"); > + > +struct { > + __uint(type, BPF_MAP_TYPE_HASH); > + /* max entries, key_size, and value_size, will be set > + * programmatically. They are configurable from the userspace > + * bench program. > + */ > +} hashmap SEC(".maps"); > + > +struct callback_ctx { > + struct bpf_map *map; > + bool update; > +}; > + > +/* Tracks the number of hits, drops, and false hits */ > +struct { > + __u32 stats[3]; > +} __attribute__((__aligned__(256))) percpu_stats[256]; > + > +__u8 value_sz_nr_u32s; > + > +const __u32 hit_key = 0; > +const __u32 drop_key = 1; > +const __u32 false_hit_key = 2; > + > +const volatile bool hashmap_use_bloom_filter = true; > + > +int error = 0; > + > +static __always_inline void log_result(__u32 key) > +{ > + __u32 cpu = bpf_get_smp_processor_id(); > + > + percpu_stats[cpu & 255].stats[key]++; > +} > + > +static __u64 > +bloom_filter_callback(struct bpf_map *map, __u32 *key, void *val, > + struct callback_ctx *data) > +{ > + int err; > + > + if (data->update) > + err = bpf_map_push_elem(data->map, val, 0); > + else > + err = bpf_map_peek_elem(data->map, val); > + > + if (err) { > + error |= 1; > + return 1; /* stop the iteration */ > + } > + > + log_result(hit_key); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_lookup(void *ctx) > +{ > + struct callback_ctx data; > + > + data.map = (struct bpf_map *)&bloom_filter_map; > + data.update = false; > + > + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_update(void *ctx) > +{ > + struct callback_ctx data; > + > + data.map = (struct bpf_map *)&bloom_filter_map; > + data.update = true; > + > + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); > + > + return 0; > +} > + > +SEC("fentry/__x64_sys_getpgid") > +int prog_bloom_filter_hashmap_lookup(void *ctx) > +{ > + __u64 *result; > + int i, j, err; > + > + __u32 val[64] = {0}; > + > + for (i = 0; i < 1024; i++) { > + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) > + val[j] = bpf_get_prandom_u32(); > + > + if (hashmap_use_bloom_filter) { this is purely subjective, so take it for what it is worth. Using full "bloom_filter" everywhere is a bit mouthful and causes unnecessarily long identifiers. I think "bloom" itself is very recognizable and doesn't detract from readability (I'd claim it actually improves it). When using a bench tool manually, having to type "bloom-filter-update" if the equivalent "bloom-update" is just as good, would get old pretty fast for me. Similarly program names above, why "prog_" prefix? What does it contribute except causes longer identifiers in skeleton? > + err = bpf_map_peek_elem(&bloom_filter_map, val); > + if (err) { > + if (err != -ENOENT) { > + error |= 3; > + return 0; > + } > + log_result(hit_key); > + continue; > + } > + } > + > + result = bpf_map_lookup_elem(&hashmap, val); > + if (result) { > + log_result(hit_key); > + } else { > + if (hashmap_use_bloom_filter) > + log_result(false_hit_key); > + log_result(drop_key); > + } > + } > + > + return 0; > +} > -- > 2.30.2 >
On Wed, Oct 6, 2021 at 3:37 PM Joanne Koong <joannekoong@fb.com> wrote: > > On 10/6/21 3:21 PM, Joanne Koong wrote: > > > This patch adds benchmark tests for the throughput (for lookups + updates) > > and the false positive rate of bloom filter lookups, as well as some > > minor refactoring of the bash script for running the benchmarks. > > > > These benchmarks show that as the number of hash functions increases, > > the throughput and the false positive rate of the bloom filter decreases. > > From the benchmark data, the approximate average false-positive rates for > > 8-byte values are roughly as follows: > > > > 1 hash function = ~30% > > 2 hash functions = ~15% > > 3 hash functions = ~5% > > 4 hash functions = ~2.5% > > 5 hash functions = ~1% > > 6 hash functions = ~0.5% > > 7 hash functions = ~0.35% > > 8 hash functions = ~0.15% > > 9 hash functions = ~0.1% > > 10 hash functions = ~0% > > > > Signed-off-by: Joanne Koong <joannekoong@fb.com> > > --- > > tools/testing/selftests/bpf/Makefile | 6 +- > > tools/testing/selftests/bpf/bench.c | 37 ++ > > tools/testing/selftests/bpf/bench.h | 3 + > > .../bpf/benchs/bench_bloom_filter_map.c | 411 ++++++++++++++++++ > > .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ > > .../bpf/benchs/run_bench_ringbufs.sh | 30 +- > > .../selftests/bpf/benchs/run_common.sh | 48 ++ > > tools/testing/selftests/bpf/bpf_util.h | 11 + > > .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ > > 9 files changed, 690 insertions(+), 30 deletions(-) > > create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c > > create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh > > create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh > > create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c > > [...] (it's a good idea to trim irrelevant parts of email to make it easier to see relevant parts) > > + > > +SEC("fentry/__x64_sys_getpgid") > > +int prog_bloom_filter_hashmap_lookup(void *ctx) > > +{ > > + __u64 *result; > > + int i, j, err; > > + > > + __u32 val[64] = {0}; > > + > > + for (i = 0; i < 1024; i++) { > > + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) > > + val[j] = bpf_get_prandom_u32(); > > + > I tried out prepopulating these random values from the userspace side > (where we generate a random sequence of 10000 bytes and put that > in a bpf array map, then iterate through the bpf array map in the bpf > program; when I tried implementing it using a global array, I saw > verifier errors when indexing into the array). > > Additionally, this slows down the bench program as well, since we need > to generate all of these random values in the setup() portion of the > program. > I'm not convinced that prepopulating the values ahead of time nets us > anything - if the concern is that this slows down the bpf program, > I think this slows down the program in both the hashmap with and without > bloom filter cases; since we are mainly only interested in the delta between > these two scenarios, I don't think this ends up mattering that much. So imagine that a hashmap benchmark takes 10ms per iteration, and bloom filter + hashmap takes 5ms. That's a 2x difference, right? Now imagine that random values generation takes another 5ms, so actually you measure 15ms vs 10ms run time. Now, suddenly, you have measured just a 1.5x difference. But it's ok, feel free to just keep the benchmark as is. > > > + if (hashmap_use_bloom_filter) { > > + err = bpf_map_peek_elem(&bloom_filter_map, val); > > + if (err) { > > + if (err != -ENOENT) { > > + error |= 3; > > + return 0; > > + } > > + log_result(hit_key); > > + continue; > > + } > > + } > > + > > + result = bpf_map_lookup_elem(&hashmap, val); > > + if (result) { > > + log_result(hit_key); > > + } else { > > + if (hashmap_use_bloom_filter) > > + log_result(false_hit_key); > > + log_result(drop_key); > > + } > > + } > > + > > + return 0; > > +}
On 10/8/21 7:54 PM, Andrii Nakryiko wrote: > On Wed, Oct 6, 2021 at 3:37 PM Joanne Koong <joannekoong@fb.com> wrote: >> On 10/6/21 3:21 PM, Joanne Koong wrote: >> >>> This patch adds benchmark tests for the throughput (for lookups + updates) >>> and the false positive rate of bloom filter lookups, as well as some >>> minor refactoring of the bash script for running the benchmarks. >>> >>> These benchmarks show that as the number of hash functions increases, >>> the throughput and the false positive rate of the bloom filter decreases. >>> From the benchmark data, the approximate average false-positive rates for >>> 8-byte values are roughly as follows: >>> >>> 1 hash function = ~30% >>> 2 hash functions = ~15% >>> 3 hash functions = ~5% >>> 4 hash functions = ~2.5% >>> 5 hash functions = ~1% >>> 6 hash functions = ~0.5% >>> 7 hash functions = ~0.35% >>> 8 hash functions = ~0.15% >>> 9 hash functions = ~0.1% >>> 10 hash functions = ~0% >>> >>> Signed-off-by: Joanne Koong <joannekoong@fb.com> >>> --- >>> tools/testing/selftests/bpf/Makefile | 6 +- >>> tools/testing/selftests/bpf/bench.c | 37 ++ >>> tools/testing/selftests/bpf/bench.h | 3 + >>> .../bpf/benchs/bench_bloom_filter_map.c | 411 ++++++++++++++++++ >>> .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ >>> .../bpf/benchs/run_bench_ringbufs.sh | 30 +- >>> .../selftests/bpf/benchs/run_common.sh | 48 ++ >>> tools/testing/selftests/bpf/bpf_util.h | 11 + >>> .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ >>> 9 files changed, 690 insertions(+), 30 deletions(-) >>> create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c >>> create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh >>> create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh >>> create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c >>> [...] >>> + >>> +SEC("fentry/__x64_sys_getpgid") >>> +int prog_bloom_filter_hashmap_lookup(void *ctx) >>> +{ >>> + __u64 *result; >>> + int i, j, err; >>> + >>> + __u32 val[64] = {0}; >>> + >>> + for (i = 0; i < 1024; i++) { >>> + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) >>> + val[j] = bpf_get_prandom_u32(); >>> + >> I tried out prepopulating these random values from the userspace side >> (where we generate a random sequence of 10000 bytes and put that >> in a bpf array map, then iterate through the bpf array map in the bpf >> program; when I tried implementing it using a global array, I saw >> verifier errors when indexing into the array). >> >> Additionally, this slows down the bench program as well, since we need >> to generate all of these random values in the setup() portion of the >> program. >> I'm not convinced that prepopulating the values ahead of time nets us >> anything - if the concern is that this slows down the bpf program, >> I think this slows down the program in both the hashmap with and without >> bloom filter cases; since we are mainly only interested in the delta between >> these two scenarios, I don't think this ends up mattering that much. > So imagine that a hashmap benchmark takes 10ms per iteration, and > bloom filter + hashmap takes 5ms. That's a 2x difference, right? Now > imagine that random values generation takes another 5ms, so actually > you measure 15ms vs 10ms run time. Now, suddenly, you have measured > just a 1.5x difference. Yeah, you're right - in this case, the calls to bpf_get_prandom_u32() are time-intensive enough to have a measurable impact on the difference. I guess we could say that the delta is a conservative lower bound estimate - that this shows the bloom filter is at least X% faster on average, but I agree that it'd be great to get a more accurate estimate of the speed improvement. What do you think about expanding the benchmark framework to let the user program control when an iteration is finished? Right now, every iteration runs for 1 sec, and we calculate how many hits+drops have occurred within that 1 sec. This doesn't work great for when we need to prepopulate the data in advance since we don't know how much data is needed (eg 1 million entries might be enough on some servers while on other more powerful servers, it'll finish going through the 1 million entries before the timer is triggered - one option is to keep reusing the same data until the timer triggers, but this runs into potential issues where the hits/drops stats can overflow, especially since they monotonically increase between iterations); we could err on overpopulating to make sure there will always be enough entries, but then this leads to irritatingly long setup times. A more ideal setup would be something where we prepopulate the data to 1 million entries, then in each iteration of the benchmark go through the 1 million entries timing how long it takes to run through it with vs. without the bloom filter. This also leads to slightly more accurate results since now we also don't have to spend time logging each hit/drop in the corresponding per-cpu stats. I'm thinking about this mostly in the context of the bloom filter, but maybe having this option of running benchmarks this way could be useful for other maps in the future as well. What are your thoughts? > > But it's ok, feel free to just keep the benchmark as is. > [...]
On 10/15/21 4:35 PM, Joanne Koong wrote: > On 10/8/21 7:54 PM, Andrii Nakryiko wrote: > >> On Wed, Oct 6, 2021 at 3:37 PM Joanne Koong <joannekoong@fb.com> wrote: >>> On 10/6/21 3:21 PM, Joanne Koong wrote: >>> >>>> This patch adds benchmark tests for the throughput (for lookups + >>>> updates) >>>> and the false positive rate of bloom filter lookups, as well as some >>>> minor refactoring of the bash script for running the benchmarks. >>>> >>>> These benchmarks show that as the number of hash functions increases, >>>> the throughput and the false positive rate of the bloom filter >>>> decreases. >>>> From the benchmark data, the approximate average false-positive >>>> rates for >>>> 8-byte values are roughly as follows: >>>> >>>> 1 hash function = ~30% >>>> 2 hash functions = ~15% >>>> 3 hash functions = ~5% >>>> 4 hash functions = ~2.5% >>>> 5 hash functions = ~1% >>>> 6 hash functions = ~0.5% >>>> 7 hash functions = ~0.35% >>>> 8 hash functions = ~0.15% >>>> 9 hash functions = ~0.1% >>>> 10 hash functions = ~0% >>>> >>>> Signed-off-by: Joanne Koong <joannekoong@fb.com> >>>> --- >>>> tools/testing/selftests/bpf/Makefile | 6 +- >>>> tools/testing/selftests/bpf/bench.c | 37 ++ >>>> tools/testing/selftests/bpf/bench.h | 3 + >>>> .../bpf/benchs/bench_bloom_filter_map.c | 411 >>>> ++++++++++++++++++ >>>> .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ >>>> .../bpf/benchs/run_bench_ringbufs.sh | 30 +- >>>> .../selftests/bpf/benchs/run_common.sh | 48 ++ >>>> tools/testing/selftests/bpf/bpf_util.h | 11 + >>>> .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ >>>> 9 files changed, 690 insertions(+), 30 deletions(-) >>>> create mode 100644 >>>> tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c >>>> create mode 100755 >>>> tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh >>>> create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh >>>> create mode 100644 >>>> tools/testing/selftests/bpf/progs/bloom_filter_bench.c >>>> > [...] > >>>> + >>>> +SEC("fentry/__x64_sys_getpgid") >>>> +int prog_bloom_filter_hashmap_lookup(void *ctx) >>>> +{ >>>> + __u64 *result; >>>> + int i, j, err; >>>> + >>>> + __u32 val[64] = {0}; >>>> + >>>> + for (i = 0; i < 1024; i++) { >>>> + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) >>>> + val[j] = bpf_get_prandom_u32(); >>>> + >>> I tried out prepopulating these random values from the userspace side >>> (where we generate a random sequence of 10000 bytes and put that >>> in a bpf array map, then iterate through the bpf array map in the bpf >>> program; when I tried implementing it using a global array, I saw >>> verifier errors when indexing into the array). >>> >>> Additionally, this slows down the bench program as well, since we need >>> to generate all of these random values in the setup() portion of the >>> program. >>> I'm not convinced that prepopulating the values ahead of time nets us >>> anything - if the concern is that this slows down the bpf program, >>> I think this slows down the program in both the hashmap with and >>> without >>> bloom filter cases; since we are mainly only interested in the delta >>> between >>> these two scenarios, I don't think this ends up mattering that much. >> So imagine that a hashmap benchmark takes 10ms per iteration, and >> bloom filter + hashmap takes 5ms. That's a 2x difference, right? Now >> imagine that random values generation takes another 5ms, so actually >> you measure 15ms vs 10ms run time. Now, suddenly, you have measured >> just a 1.5x difference. > Yeah, you're right - in this case, the calls to bpf_get_prandom_u32() > are time-intensive enough to have a measurable impact on the > difference. I > guess we could say that the delta is a conservative lower bound > estimate - > that this shows the bloom filter is at least X% faster on average, > but I agree that it'd be great to get a more accurate estimate of the > speed improvement. > > What do you think about expanding the benchmark framework to let the > user program control when an iteration is finished? Right now, every > iteration > runs for 1 sec, and we calculate how many hits+drops have occurred within > that 1 sec. This doesn't work great for when we need to prepopulate > the data in > advance since we don't know how much data is needed (eg 1 million entries > might be enough on some servers while on other more powerful servers, > it'll > finish going through the 1 million entries before the timer is > triggered - > one option is to keep reusing the same data until the timer triggers, but > this runs into potential issues where the hits/drops stats can overflow, > especially since they monotonically increase between iterations); we > could err > on overpopulating to make sure there will always be enough entries, but > then this leads to irritatingly long setup times. > > A more ideal setup would be something where we prepopulate the data to > 1 million entries, then in each iteration of the benchmark go through the > 1 million entries timing how long it takes to run through it with vs. > without > the bloom filter. This also leads to slightly more accurate results > since now > we also don't have to spend time logging each hit/drop in the > corresponding > per-cpu stats. I'm thinking about this mostly in the context of the bloom > filter, but maybe having this option of running benchmarks this way > could be > useful for other maps in the future as well. > > What are your thoughts? > Andrii and I had a great discussion about this offline and in the next revision (aka v5), the values will be prepopulated ahead of time and recycled. We don't have to worry about hits/drops overflowing u64; a u64 is large enough to handle this > >> >> But it's ok, feel free to just keep the benchmark as is. >> > [...] >
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c5c9a9f50d8d..66e1ad17acef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -517,18 +517,20 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6ea15b93a2f8..e836bacd6f9d 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -51,6 +51,35 @@ void setup_libbpf() fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + long total = res->false_hits + res->hits + res->drops; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", + res->false_hits, total, ((float)res->false_hits / total) * 100); +} + +void false_hits_report_final(struct bench_res res[], int res_cnt) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + total_hits += res[i].hits; + total_false_hits += res[i].false_hits; + total_drops += res[i].drops; + } + total_ops = total_hits + total_false_hits + total_drops; + + printf("Summary: %ld false hits of %ld total operations. ", + total_false_hits, total_ops); + printf("Percentage = %2.2f %%\n", + ((float)total_false_hits / total_ops) * 100); +} + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) { double hits_per_sec, drops_per_sec; @@ -132,9 +161,11 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; +extern struct argp bench_bloom_filter_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_filter_map_argp, 0, "Bloom filter map benchmark", 0 }, {}, }; @@ -323,6 +354,9 @@ extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; +extern const struct bench bench_bloom_filter_lookup; +extern const struct bench bench_bloom_filter_update; +extern const struct bench bench_bloom_filter_false_positive; static const struct bench *benchs[] = { &bench_count_global, @@ -344,6 +378,9 @@ static const struct bench *benchs[] = { &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, + &bench_bloom_filter_lookup, + &bench_bloom_filter_update, + &bench_bloom_filter_false_positive, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index c1f48a473b02..624c6b11501f 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,6 +33,7 @@ struct env { struct bench_res { long hits; long drops; + long false_hits; }; struct bench { @@ -56,6 +57,8 @@ extern const struct bench *bench; void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +void false_hits_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 000000000000..25d6b8179c8e --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + struct bloom_filter_bench *skel; + + int bloom_filter_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done; + bool map_prepare_err; + + __u32 next_map_idx; + +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u32 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ENTRIES: + args.nr_entries = strtol(arg, NULL, 10); + if (args.nr_entries == 0) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + break; + case ARG_NR_HASH_FUNCS: + args.nr_hash_funcs = strtol(arg, NULL, 10); + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + break; + case ARG_VALUE_SIZE: + args.value_size = strtol(arg, NULL, 10); + if (args.value_size < 2 || args.value_size > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_filter_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, + "The bloom filter benchmarks do not support multi-consumer use\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + + i--; + + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + + err = bpf_map_update_elem(ctx.bloom_filter_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + pthread_cond_signal(&ctx.map_done); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err; + + ctx.bloom_filter_fd = bpf_map__fd(ctx.skel->maps.bloom_filter_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + pthread_cond_wait(&ctx.map_done, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); +} + +static struct bloom_filter_bench *setup_skeleton(bool hashmap_use_bloom_filter) +{ + struct bloom_filter_bench *skel; + int err; + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom_filter = hashmap_use_bloom_filter; + + /* Resize number of entries */ + err = bpf_map__resize(skel->maps.hashmap, args.nr_entries); + if (err) { + fprintf(stderr, "failed to resize hashmap\n"); + exit(1); + } + + err = bpf_map__resize(skel->maps.array_map, args.nr_entries); + if (err) { + fprintf(stderr, "failed to resize array map\n"); + exit(1); + } + + err = bpf_map__resize(skel->maps.bloom_filter_map, + BPF_BLOOM_FILTER_BITSET_SZ(args.nr_entries, + args.nr_hash_funcs)); + if (err) { + fprintf(stderr, "failed to resize bloom filter\n"); + exit(1); + } + + /* Set value size */ + err = bpf_map__set_value_size(skel->maps.array_map, args.value_size); + if (err) { + fprintf(stderr, "failed to set array map value size\n"); + exit(1); + } + + err = bpf_map__set_value_size(skel->maps.bloom_filter_map, args.value_size); + if (err) { + fprintf(stderr, "failed to set bloom filter map value size\n"); + exit(1); + } + + err = bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + if (err) { + fprintf(stderr, "failed to set hashmap value size\n"); + exit(1); + } + /* For the hashmap, we use the value as the key as well */ + err = bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + if (err) { + fprintf(stderr, "failed to set hashmap value size\n"); + exit(1); + } + + skel->bss->value_sz_nr_u32s = (args.value_size + sizeof(__u32) - 1) + / sizeof(__u32); + + /* Set number of hash functions */ + err = bpf_map__set_map_extra(skel->maps.bloom_filter_map, + args.nr_hash_funcs); + if (err) { + fprintf(stderr, "failed to set bloom filter nr_hash_funcs\n"); + exit(1); + } + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bench_setup_lookup(void) +{ + struct bpf_link *link; + + ctx.skel = setup_skeleton(true); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bench_setup_update(void) +{ + struct bpf_link *link; + + ctx.skel = setup_skeleton(true); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.skel = setup_skeleton(true); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.prog_bloom_filter_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0; + static long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bitset\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_bloom_filter_lookup = { + .name = "bloom-filter-lookup", + .validate = validate, + .setup = bench_setup_lookup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_filter_update = { + .name = "bloom-filter-update", + .validate = validate, + .setup = bench_setup_update, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_filter_false_positive = { + .name = "bloom-filter-false-positive", + .validate = validate, + .setup = hashmap_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 000000000000..5d4f84ad9973 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bitset map" +for v in 2, 4, 8, 16, 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-filter-false-positive)" + done + printf "\n" +done +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6..ada028aa9007 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,34 +1,8 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} - -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +set -eufo pipefail header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 000000000000..670f23b037c4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index a3352a64c067..a260a963efda 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -40,4 +40,15 @@ static inline unsigned int bpf_num_possible_cpus(void) (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif +/* Helper macro for computing the optimal number of bits for a + * bloom filter map. + * + * Mathematically, the optimal bitset size that minimizes the + * false positive probability is n * k / ln(2) where n is the expected + * number of unique entries in the bloom filter and k is the number of + * hash functions. We use 7 / 5 to approximate 1 / ln(2). + */ +#define BPF_BLOOM_FILTER_BITSET_SZ(nr_uniq_entries, nr_hash_funcs) \ + ((nr_uniq_entries) * (nr_hash_funcs) / 5 * 7) + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c new file mode 100644 index 000000000000..a44a47ddc4d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + /* max entries and value_size will be set programmatically. + * They are configurable from the userspace bench program. + */ +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_BITSET); + /* max entries, value_size, and # of hash functions will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} bloom_filter_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + /* max entries, key_size, and value_size, will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} hashmap SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; + bool update; +}; + +/* Tracks the number of hits, drops, and false hits */ +struct { + __u32 stats[3]; +} __attribute__((__aligned__(256))) percpu_stats[256]; + +__u8 value_sz_nr_u32s; + +const __u32 hit_key = 0; +const __u32 drop_key = 1; +const __u32 false_hit_key = 2; + +const volatile bool hashmap_use_bloom_filter = true; + +int error = 0; + +static __always_inline void log_result(__u32 key) +{ + __u32 cpu = bpf_get_smp_processor_id(); + + percpu_stats[cpu & 255].stats[key]++; +} + +static __u64 +bloom_filter_callback(struct bpf_map *map, __u32 *key, void *val, + struct callback_ctx *data) +{ + int err; + + if (data->update) + err = bpf_map_push_elem(data->map, val, 0); + else + err = bpf_map_peek_elem(data->map, val); + + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + log_result(hit_key); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int prog_bloom_filter_lookup(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_filter_map; + data.update = false; + + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int prog_bloom_filter_update(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_filter_map; + data.update = true; + + bpf_for_each_map_elem(&array_map, bloom_filter_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int prog_bloom_filter_hashmap_lookup(void *ctx) +{ + __u64 *result; + int i, j, err; + + __u32 val[64] = {0}; + + for (i = 0; i < 1024; i++) { + for (j = 0; j < value_sz_nr_u32s && j < 64; j++) + val[j] = bpf_get_prandom_u32(); + + if (hashmap_use_bloom_filter) { + err = bpf_map_peek_elem(&bloom_filter_map, val); + if (err) { + if (err != -ENOENT) { + error |= 3; + return 0; + } + log_result(hit_key); + continue; + } + } + + result = bpf_map_lookup_elem(&hashmap, val); + if (result) { + log_result(hit_key); + } else { + if (hashmap_use_bloom_filter) + log_result(false_hit_key); + log_result(drop_key); + } + } + + return 0; +}
This patch adds benchmark tests for the throughput (for lookups + updates) and the false positive rate of bloom filter lookups, as well as some minor refactoring of the bash script for running the benchmarks. These benchmarks show that as the number of hash functions increases, the throughput and the false positive rate of the bloom filter decreases. From the benchmark data, the approximate average false-positive rates for 8-byte values are roughly as follows: 1 hash function = ~30% 2 hash functions = ~15% 3 hash functions = ~5% 4 hash functions = ~2.5% 5 hash functions = ~1% 6 hash functions = ~0.5% 7 hash functions = ~0.35% 8 hash functions = ~0.15% 9 hash functions = ~0.1% 10 hash functions = ~0% Signed-off-by: Joanne Koong <joannekoong@fb.com> --- tools/testing/selftests/bpf/Makefile | 6 +- tools/testing/selftests/bpf/bench.c | 37 ++ tools/testing/selftests/bpf/bench.h | 3 + .../bpf/benchs/bench_bloom_filter_map.c | 411 ++++++++++++++++++ .../bpf/benchs/run_bench_bloom_filter_map.sh | 28 ++ .../bpf/benchs/run_bench_ringbufs.sh | 30 +- .../selftests/bpf/benchs/run_common.sh | 48 ++ tools/testing/selftests/bpf/bpf_util.h | 11 + .../selftests/bpf/progs/bloom_filter_bench.c | 146 +++++++ 9 files changed, 690 insertions(+), 30 deletions(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh create mode 100644 tools/testing/selftests/bpf/benchs/run_common.sh create mode 100644 tools/testing/selftests/bpf/progs/bloom_filter_bench.c