diff mbox series

[RFC,v6,2/5] bpf: Add map in map support to rbtree

Message ID 20221005171709.150520-3-xiyou.wangcong@gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series net_sched: introduce eBPF based Qdisc | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 pending Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
bpf/vmtest-bpf-next-VM_Test-2 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1693 this patch: 1693
netdev/cc_maintainers warning 10 maintainers not CCed: john.fastabend@gmail.com andrii@kernel.org yhs@fb.com ast@kernel.org haoluo@google.com jolsa@kernel.org kpsingh@kernel.org song@kernel.org daniel@iogearbox.net martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 171 this patch: 171
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1685 this patch: 1685
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: Missing commit description - Add an appropriate one WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Cong Wang Oct. 5, 2022, 5:17 p.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/bpf.h       |   4 +
 include/linux/bpf_types.h |   1 +
 include/uapi/linux/bpf.h  |   1 +
 kernel/bpf/rbtree.c       | 158 ++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c      |   7 ++
 5 files changed, 171 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9e7d46d16032..d4d85df1e8ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1913,6 +1913,10 @@  int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_fd_rbtree_map_update_elem(struct bpf_map *map, struct file *map_file,
+				  void *key, void *value, u64 map_flags);
+int bpf_fd_rbtree_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_fd_rbtree_map_pop_elem(struct bpf_map *map, void *value);
 
 int bpf_get_file_flag(int flags);
 int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size,
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c53ba6de1613..d1ef13b08e28 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -128,6 +128,7 @@  BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_RBTREE, rbtree_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_RBTREE_OF_MAPS, rbtree_map_in_map_ops)
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9492cd3af701..994a3e42a4fa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -936,6 +936,7 @@  enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_RBTREE,
+	BPF_MAP_TYPE_RBTREE_OF_MAPS,
 };
 
 /* Note that tracing related programs such as
diff --git a/kernel/bpf/rbtree.c b/kernel/bpf/rbtree.c
index f1a9b1c40b8b..43d3d4193ce4 100644
--- a/kernel/bpf/rbtree.c
+++ b/kernel/bpf/rbtree.c
@@ -12,6 +12,7 @@ 
 #include <linux/bpf_mem_alloc.h>
 #include <linux/math.h>
 #include <linux/seq_file.h>
+#include "map_in_map.h"
 
 #define RBTREE_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
@@ -443,3 +444,160 @@  const struct bpf_map_ops rbtree_map_ops = {
 	.iter_seq_info = &rbtree_map_iter_seq_info,
 };
 
+static struct bpf_map *rbtree_map_in_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_map *map, *inner_map_meta;
+
+	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
+	if (IS_ERR(inner_map_meta))
+		return inner_map_meta;
+
+	map = rbtree_map_alloc(attr);
+	if (IS_ERR(map)) {
+		bpf_map_meta_free(inner_map_meta);
+		return map;
+	}
+
+	map->inner_map_meta = inner_map_meta;
+	return map;
+}
+
+static void *fd_rbtree_map_get_ptr(const struct bpf_map *map, struct rbtree_elem *e)
+{
+	return *(void **)(e->key + roundup(map->key_size, 8));
+}
+
+static void rbtree_map_in_map_purge(struct bpf_map *map)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+	struct rbtree_elem *e, *tmp;
+
+	rbtree_walk_safe(e, tmp, &rb->root) {
+		void *ptr = fd_rbtree_map_get_ptr(map, e);
+
+		map->ops->map_fd_put_ptr(ptr);
+	}
+}
+
+static void rbtree_map_in_map_free(struct bpf_map *map)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+
+	bpf_map_meta_free(map->inner_map_meta);
+	rbtree_map_in_map_purge(map);
+	bpf_map_area_free(rb);
+}
+
+/* Called from eBPF program */
+static void *rbtree_map_in_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_map **inner_map = rbtree_map_lookup_elem(map, key);
+
+	if (!inner_map)
+		return NULL;
+
+	return READ_ONCE(*inner_map);
+}
+
+static int rbtree_map_in_map_alloc_check(union bpf_attr *attr)
+{
+	if (attr->value_size != sizeof(u32))
+		return -EINVAL;
+	return rbtree_map_alloc_check(attr);
+}
+
+/* Called from eBPF program */
+static int rbtree_map_in_map_pop_elem(struct bpf_map *map, void *value)
+{
+	struct rbtree_map *rb = rbtree_map(map);
+	struct rbtree_elem *e = elem_rb_first(&rb->root);
+	struct bpf_map **inner_map;
+	unsigned long flags;
+
+	if (!e)
+		return -ENOENT;
+	raw_spin_lock_irqsave(&rb->lock, flags);
+	rb_erase(&e->rbnode, &rb->root);
+	raw_spin_unlock_irqrestore(&rb->lock, flags);
+	inner_map = fd_rbtree_map_get_ptr(map, e);
+	*(void **)value = *inner_map;
+	bpf_mem_cache_free(&rb->ma, e);
+	atomic_dec(&rb->nr_entries);
+	return 0;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_pop_elem(struct bpf_map *map, void *value)
+{
+	struct bpf_map *ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ret = rbtree_map_in_map_pop_elem(map, &ptr);
+	if (!ret)
+		*(u32 *)value = map->ops->map_fd_sys_lookup_elem(ptr);
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+{
+	void **ptr;
+	int ret = 0;
+
+	if (!map->ops->map_fd_sys_lookup_elem)
+		return -ENOTSUPP;
+
+	rcu_read_lock();
+	ptr = rbtree_map_lookup_elem(map, key);
+	if (ptr)
+		*value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+	else
+		ret = -ENOENT;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+/* only called from syscall */
+int bpf_fd_rbtree_map_update_elem(struct bpf_map *map, struct file *map_file,
+				  void *key, void *value, u64 map_flags)
+{
+	void *ptr;
+	int ret;
+	u32 ufd = *(u32 *)value;
+
+	ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	ret = rbtree_map_update_elem(map, key, &ptr, map_flags);
+	if (ret)
+		map->ops->map_fd_put_ptr(ptr);
+
+	return ret;
+}
+
+const struct bpf_map_ops rbtree_map_in_map_ops = {
+	.map_alloc_check = rbtree_map_in_map_alloc_check,
+	.map_alloc = rbtree_map_in_map_alloc,
+	.map_free = rbtree_map_in_map_free,
+	.map_get_next_key = rbtree_map_get_next_key,
+	.map_lookup_elem = rbtree_map_in_map_lookup_elem,
+	.map_update_elem = rbtree_map_update_elem,
+	.map_pop_elem = rbtree_map_in_map_pop_elem,
+	.map_delete_elem = rbtree_map_delete_elem,
+	.map_fd_get_ptr = bpf_map_fd_get_ptr,
+	.map_fd_put_ptr = bpf_map_fd_put_ptr,
+	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_check_btf = map_check_no_btf,
+	.map_btf_id = &rbtree_map_btf_ids[0],
+};
+
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7b373a5e861f..1b968dc38500 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -213,6 +213,11 @@  static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
 		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 						  flags);
 		rcu_read_unlock();
+	} else if (map->map_type == BPF_MAP_TYPE_RBTREE_OF_MAPS) {
+		rcu_read_lock();
+		err = bpf_fd_rbtree_map_update_elem(map, f.file, key, value,
+						    flags);
+		rcu_read_unlock();
 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 		/* rcu_read_lock() is not needed */
 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
@@ -1832,6 +1837,8 @@  static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_RBTREE_OF_MAPS) {
+		bpf_fd_rbtree_map_pop_elem(map, value);
 	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
 		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||