[v2,bpf-next,1/4] bpf: Add bloom filter map implementation

Message ID	20210914040433.3184308-2-joannekoong@fb.com (mailing list archive)
State	Superseded
Delegated to:	BPF
Headers	show Return-Path: <bpf-owner@kernel.org> From: Joanne Koong <joannekoong@fb.com> To: <bpf@vger.kernel.org> CC: <Kernel-team@fb.com>, Joanne Koong <joannekoong@fb.com> Subject: [PATCH v2 bpf-next 1/4] bpf: Add bloom filter map implementation Date: Mon, 13 Sep 2021 21:04:30 -0700 Message-ID: <20210914040433.3184308-2-joannekoong@fb.com> In-Reply-To: <20210914040433.3184308-1-joannekoong@fb.com> References: <20210914040433.3184308-1-joannekoong@fb.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain Precedence: bulk
Series	Implement bloom filter map \| expand [v2,bpf-next,0/4] Implement bloom filter map [v2,bpf-next,1/4] bpf: Add bloom filter map implementation [v2,bpf-next,2/4] selftests/bpf: Add bloom filter map test cases [v2,bpf-next,3/4] bpf/benchs: Add benchmark test for bloom filter maps [v2,bpf-next,4/4] bpf/benchs: Add benchmarks for comparing hashmap lookups with vs. without bloom f…

Context	Check	Description
netdev/apply	fail	Patch does not apply to bpf-next
netdev/tree_selection	success	Clearly marked for bpf-next
bpf/vmtest-bpf-next-PR	fail	merge-conflict

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..c4424ac2fa02 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 791f31dd0abe..1d82860fd98e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1210,6 +1211,15 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* For bloom filter maps, the next 4 bits represent how many hashes to use. + * The maximum number of hash functions supported is 15. If this is not set, + * the default number of hash functions used will be 5. + */ + BPF_F_BLOOM_FILTER_HASH_BIT_1 = (1U << 13), + BPF_F_BLOOM_FILTER_HASH_BIT_2 = (1U << 14), + BPF_F_BLOOM_FILTER_HASH_BIT_3 = (1U << 15), + BPF_F_BLOOM_FILTER_HASH_BIT_4 = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63f..cf6ca339f3cd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c new file mode 100644 index 000000000000..43a17c5b35ac --- /dev/null +++ b/kernel/bpf/bloom_filter.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bitmap.h> +#include <linux/bpf.h> +#include <linux/err.h> +#include <linux/jhash.h> +#include <linux/random.h> + +#define BLOOM_FILTER_HASH_BITMASK \ + (BPF_F_BLOOM_FILTER_HASH_BIT_1 | BPF_F_BLOOM_FILTER_HASH_BIT_2 | \ + BPF_F_BLOOM_FILTER_HASH_BIT_3 | BPF_F_BLOOM_FILTER_HASH_BIT_4) + +#define BLOOM_FILTER_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK | \ + BLOOM_FILTER_HASH_BITMASK) + +struct bpf_bloom_filter { + struct bpf_map map; + u32 bit_array_mask; + u32 hash_seed; + /* If the size of the values in the bloom filter is u32 aligned, + * then it is more performant to use jhash2 as the underlying hash + * function, else we use jhash. This tracks the number of u32s + * in an u32-aligned value size. If the value size is not u32 aligned, + * this will be 0. + */ + u32 aligned_u32_count; + u8 nr_hashes; + unsigned long bit_array[]; +}; + +static int bloom_filter_map_peek_elem(struct bpf_map *map, void *value) +{ + struct bpf_bloom_filter *bloom_filter = + container_of(map, struct bpf_bloom_filter, map); + u32 hash; + u8 i; + + for (i = 0; i < bloom_filter->nr_hashes; i++) { + if (bloom_filter->aligned_u32_count) + hash = jhash2(value, bloom_filter->aligned_u32_count, + bloom_filter->hash_seed + i) & + bloom_filter->bit_array_mask; + else + hash = jhash(value, map->value_size, + bloom_filter->hash_seed + i) & + bloom_filter->bit_array_mask; + + if (!test_bit(hash, bloom_filter->bit_array)) + return -ENOENT; + } + + return 0; +} + +static u8 get_nr_hashes(u32 map_flags) +{ + u8 nr_hashes = (map_flags & BLOOM_FILTER_HASH_BITMASK) >> + ilog2(BPF_F_BLOOM_FILTER_HASH_BIT_1); + + /* Default to 5 if no number of hashes was specified */ + return nr_hashes == 0 ? 5 : nr_hashes; +} + +static struct bpf_map *bloom_filter_map_alloc(union bpf_attr *attr) +{ + u32 nr_bits, bit_array_bytes, bit_array_mask; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_bloom_filter *bloom_filter; + u8 nr_hashes; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + + if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || + attr->map_flags & ~BLOOM_FILTER_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags)) + return ERR_PTR(-EINVAL); + + nr_hashes = get_nr_hashes(attr->map_flags); + + /* For the bloom filter, the optimal bit array size that minimizes the + * false positive probability is n * k / ln(2) where n is the number of + * expected entries in the bloom filter and k is the number of hash + * functions. We use 7 / 5 to approximate 1 / ln(2). + * + * We round this up to the nearest power of two to enable more efficient + * hashing using bitmasks. The bitmask will be the bit array size - 1. + * + * If this overflows a u32, the bit array size will have 2^32 (4 + * GB) bits. + */ + if (check_mul_overflow(attr->max_entries, (u32)nr_hashes, &nr_bits) || + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || + nr_bits > (1UL << 31)) { + /* The bit array size is 2^32 bits but to avoid overflowing the + * u32, we use BITS_TO_BYTES(U32_MAX), which will round up to the + * equivalent number of bytes + */ + bit_array_bytes = BITS_TO_BYTES(U32_MAX); + bit_array_mask = U32_MAX; + } else { + if (nr_bits <= BITS_PER_LONG) + nr_bits = BITS_PER_LONG; + else + nr_bits = roundup_pow_of_two(nr_bits); + bit_array_bytes = BITS_TO_BYTES(nr_bits); + bit_array_mask = nr_bits - 1; + } + + bit_array_bytes = roundup(bit_array_bytes, sizeof(unsigned long)); + bloom_filter = bpf_map_area_alloc(sizeof(*bloom_filter) + bit_array_bytes, + numa_node); + + if (!bloom_filter) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&bloom_filter->map, attr); + + bloom_filter->nr_hashes = nr_hashes; + bloom_filter->bit_array_mask = bit_array_mask; + if ((attr->value_size & (sizeof(u32) - 1)) == 0) + bloom_filter->aligned_u32_count = attr->value_size / sizeof(u32); + + if (!(attr->map_flags & BPF_F_ZERO_SEED)) + bloom_filter->hash_seed = get_random_int(); + + return &bloom_filter->map; +} + +static void bloom_filter_map_free(struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom_filter = + container_of(map, struct bpf_bloom_filter, map); + + bpf_map_area_free(bloom_filter); +} + +static int bloom_filter_map_push_elem(struct bpf_map *map, void *value, + u64 flags) +{ + struct bpf_bloom_filter *bloom_filter = + container_of(map, struct bpf_bloom_filter, map); + u32 hash; + u8 i; + + if (flags != BPF_ANY) + return -EINVAL; + + for (i = 0; i < bloom_filter->nr_hashes; i++) { + if (bloom_filter->aligned_u32_count) + hash = jhash2(value, bloom_filter->aligned_u32_count, + bloom_filter->hash_seed + i) & + bloom_filter->bit_array_mask; + else + hash = jhash(value, map->value_size, + bloom_filter->hash_seed + i) & + bloom_filter->bit_array_mask; + + set_bit(hash, bloom_filter->bit_array); + } + + return 0; +} + +static void *bloom_filter_map_lookup_elem(struct bpf_map *map, void *key) +{ + /* The eBPF program should use map_peek_elem instead */ + return ERR_PTR(-EINVAL); +} + +static int bloom_filter_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + /* The eBPF program should use map_push_elem instead */ + return -EINVAL; +} + +static int bloom_filter_map_delete_elem(struct bpf_map *map, void *key) +{ + return -EOPNOTSUPP; +} + +static int bloom_filter_map_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + return -EOPNOTSUPP; +} + +static int bloom_filter_map_btf_id; +const struct bpf_map_ops bloom_filter_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = bloom_filter_map_alloc, + .map_free = bloom_filter_map_free, + .map_push_elem = bloom_filter_map_push_elem, + .map_peek_elem = bloom_filter_map_peek_elem, + .map_lookup_elem = bloom_filter_map_lookup_elem, + .map_update_elem = bloom_filter_map_update_elem, + .map_delete_elem = bloom_filter_map_delete_elem, + .map_get_next_key = bloom_filter_map_get_next_key, + .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_bloom_filter", + .map_btf_id = &bloom_filter_map_btf_id, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e50c0bfdb7d..9865b5b1e667 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -1080,6 +1082,14 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; + if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + if (copy_from_user(value, uvalue, value_size)) + err = -EFAULT; + else + err = bpf_map_copy_value(map, key, value, attr->flags); + goto free_value; + } + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 047ac4b4703b..5cbcff4c2222 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4813,7 +4813,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -5388,6 +5391,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_push_elem && + func_id != BPF_FUNC_map_peek_elem) + goto error; + break; default: break; } @@ -5455,13 +5463,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_push_elem: + case BPF_FUNC_map_peek_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 791f31dd0abe..1d82860fd98e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1210,6 +1211,15 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* For bloom filter maps, the next 4 bits represent how many hashes to use. + * The maximum number of hash functions supported is 15. If this is not set, + * the default number of hash functions used will be 5. + */ + BPF_F_BLOOM_FILTER_HASH_BIT_1 = (1U << 13), + BPF_F_BLOOM_FILTER_HASH_BIT_2 = (1U << 14), + BPF_F_BLOOM_FILTER_HASH_BIT_3 = (1U << 15), + BPF_F_BLOOM_FILTER_HASH_BIT_4 = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */

[v2,bpf-next,1/4] bpf: Add bloom filter map implementation

Checks

Commit Message

Comments

Patch